Module: Senko::Format

Defined in:
lib/senko/format.rb

Constant Summary collapse

DOT_ATOM =
%r{\A[A-Za-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[A-Za-z0-9!#$%&'*+/=?^_`{|}~-]+)*\z}
HOST_LABEL =
/\A[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?\z/
UUID =
/\A[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\z/
JSON_POINTER =
%r{\A(?:/(?:[^~/]|~0|~1)*)*\z}
RELATIVE_JSON_POINTER =
%r{\A(?:0|[1-9][0-9]*)(?:#|(?:/(?:[^~/]|~0|~1)*)*)\z}
RFC3339_DATE =
/\A([0-9]{4})-([0-9]{2})-([0-9]{2})\z/
RFC3339_DATE_TIME =
/\A([0-9]{4}-[0-9]{2}-[0-9]{2})[Tt]([0-9]{2}:[0-9]{2}:[0-9]{2}(?:\.[0-9]+)?(?:[Zz]|[+-][0-9]{2}:[0-9]{2}))\z/
RFC3339_TIME =
/\A([0-9]{2}):([0-9]{2}):([0-9]{2})(?:\.[0-9]+)?([Zz]|[+-]([0-9]{2}):([0-9]{2}))\z/
IPV4_DECIMAL =
/\A(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])(?:\.(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])){3}\z/
IDN_SEPARATORS =
/[.\u{3002}\u{ff0e}\u{ff61}]/
DISALLOWED_IDNA =
/[\u{0640}\u{07fa}\u{302e}\u{302f}\u{3031}-\u{3035}\u{303b}]/

Class Method Summary collapse

Class Method Details

.absolute_uri?(value) ⇒ Boolean

Returns:

  • (Boolean)


71
72
73
74
75
76
# File 'lib/senko/format.rb', line 71

def absolute_uri?(value)
  return false if invalid_uri_ascii?(value)

  uri = Addressable::URI.parse(value)
  uri.scheme && value.match?(/\A[A-Za-z][A-Za-z0-9+\-.]*:/)
end

.address_literal?(value) ⇒ Boolean

Returns:

  • (Boolean)


214
215
216
217
218
219
220
# File 'lib/senko/format.rb', line 214

def address_literal?(value)
  if value.start_with?('IPv6:')
    ipv6?(value.delete_prefix('IPv6:'))
  else
    value.match?(IPV4_DECIMAL) && IPAddr.new(value).ipv4?
  end
end

.ascii_label?(label) ⇒ Boolean

Returns:

  • (Boolean)


237
238
239
# File 'lib/senko/format.rb', line 237

def ascii_label?(label)
  label.ascii_only? && label.match?(HOST_LABEL) && !label.include?('_')
end

.bad_percent_encoding?(value) ⇒ Boolean

Returns:

  • (Boolean)


292
293
294
# File 'lib/senko/format.rb', line 292

def bad_percent_encoding?(value)
  value.match?(/%(?![0-9A-Fa-f]{2})/)
end

.canonical_idn_label?(label, unicode) ⇒ Boolean

Returns:

  • (Boolean)


241
242
243
244
245
246
# File 'lib/senko/format.rb', line 241

def canonical_idn_label?(label, unicode)
  ascii = SimpleIDN.to_ascii(unicode).downcase
  return label.downcase == ascii if label.downcase.start_with?('xn--')

  ascii.length <= 63
end

.contextual_idn_label?(label) ⇒ Boolean

Returns:

  • (Boolean)


248
249
250
251
252
253
254
255
256
257
258
259
260
261
# File 'lib/senko/format.rb', line 248

def contextual_idn_label?(label)
  chars = label.each_char.to_a
  chars.each_with_index do |char, index|
    return false if char == "\u{00b7}" && !(index.positive? && chars[index - 1] == 'l' && chars[index + 1] == 'l')
    return false if char == "\u{0375}" && !greek?(chars[index + 1])
    return false if char == "\u{05f3}" && !(index.positive? && hebrew?(chars[index - 1]))
    return false if char == "\u{05f4}" && !(index.positive? && hebrew?(chars[index - 1]))
    return false if char == "\u{30fb}" && !label.match?(/[\p{Hiragana}\p{Katakana}\p{Han}]/)
    return false if char == "\u{200d}" && !(index.positive? && virama?(chars[index - 1]))
  end
  return false if label.match?(/[\u{0660}-\u{0669}]/) && label.match?(/[\u{06f0}-\u{06f9}]/)

  true
end

.days_in_month(year, month) ⇒ Object



169
170
171
172
173
174
175
# File 'lib/senko/format.rb', line 169

def days_in_month(year, month)
  return 29 if month == 2 && leap_year?(year)
  return 28 if month == 2
  return 30 if [4, 6, 9, 11].include?(month)

  31
end

.duration?(value) ⇒ Boolean

Returns:

  • (Boolean)


105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/senko/format.rb', line 105

def duration?(value)
  return true if value.match?(/\AP[0-9]+W\z/)
  return false unless value.ascii_only?

  match = value.match(/\AP(?:(?<years>[0-9]+)Y)?(?:(?<months>[0-9]+)M)?(?:(?<days>[0-9]+)D)?(?:T(?:(?<hours>[0-9]+)H)?(?:(?<minutes>[0-9]+)M)?(?:(?<seconds>[0-9]+)S)?)?\z/)
  return false unless match

  date_units = %w[years months days].select { |name| match[name] }
  time_units = %w[hours minutes seconds].select { |name| match[name] }
  return false if date_units.empty? && time_units.empty?
  return false if value.include?('T') && time_units.empty?
  return false if match['years'] && match['days'] && !match['months']
  return false if match['hours'] && match['seconds'] && !match['minutes']

  true
end

.ecma_pattern_source(value) ⇒ Object



315
316
317
318
319
320
321
322
323
324
325
# File 'lib/senko/format.rb', line 315

def ecma_pattern_source(value)
  value
    .gsub(/\\c([A-Za-z])/) { Regexp.escape((::Regexp.last_match(1).upcase.ord - 64).chr) }
    .gsub('\\d', '[0-9]')
    .gsub('\\D', '[^0-9]')
    .gsub('\\w', '[A-Za-z0-9_]')
    .gsub('\\W', '[^A-Za-z0-9_]')
    .gsub('\\s', '[\\t\\n\\v\\f\\r \\u00a0\\ufeff\\u2028\\u2029\\p{Space_Separator}]')
    .gsub('\\S', '[^\\t\\n\\v\\f\\r \\u00a0\\ufeff\\u2028\\u2029\\p{Space_Separator}]')
    .gsub(/(?<!\\)\$/, '\\z')
end

.ecma_regex?(value) ⇒ Boolean

Returns:

  • (Boolean)


304
305
306
307
308
309
310
311
312
313
# File 'lib/senko/format.rb', line 304

def ecma_regex?(value)
  return false if value.match?(/(?:^|[^\\])\\a/)

  verbose = $VERBOSE
  $VERBOSE = nil
  Regexp.new(ecma_pattern_source(value))
  true
ensure
  $VERBOSE = verbose
end

.email?(value, idn:) ⇒ Boolean

Returns:

  • (Boolean)


181
182
183
184
185
186
187
188
189
190
191
# File 'lib/senko/format.rb', line 181

def email?(value, idn:)
  local, domain = split_email(value)
  return false unless local && domain
  return false unless local_part?(local, idn: idn)

  if domain.start_with?('[') && domain.end_with?(']')
    address_literal?(domain[1...-1])
  else
    hostname?(domain, ascii_only: !idn)
  end
end

.greek?(char) ⇒ Boolean

Returns:

  • (Boolean)


263
264
265
# File 'lib/senko/format.rb', line 263

def greek?(char)
  char&.match?(/\p{Greek}/)
end

.hebrew?(char) ⇒ Boolean

Returns:

  • (Boolean)


267
268
269
# File 'lib/senko/format.rb', line 267

def hebrew?(char)
  char&.match?(/\p{Hebrew}/)
end

.hostname?(value, ascii_only: true) ⇒ Boolean

Returns:

  • (Boolean)


61
62
63
64
65
66
67
68
69
# File 'lib/senko/format.rb', line 61

def hostname?(value, ascii_only: true)
  return false if value.empty? || value.length > 253
  return false if ascii_only && !value.ascii_only?

  labels = value.split(ascii_only ? '.' : IDN_SEPARATORS, -1)
  return false if labels.empty? || labels.any?(&:empty?)

  labels.all? { |label| hostname_label?(label, ascii_only: ascii_only) }
end

.hostname_label?(label, ascii_only:) ⇒ Boolean

Returns:

  • (Boolean)


222
223
224
225
226
227
228
229
230
231
232
233
234
235
# File 'lib/senko/format.rb', line 222

def hostname_label?(label, ascii_only:)
  return false if label.empty?
  return false if label.length > 63 && label.ascii_only?
  return false if label.start_with?('-') || label.end_with?('-')
  return ascii_label?(label) if ascii_only && !label.downcase.start_with?('xn--')

  unicode = label.downcase.start_with?('xn--') ? SimpleIDN.to_unicode(label.downcase) : label
  return false if unicode[2, 2] == '--'
  return false unless canonical_idn_label?(label, unicode)
  return false if unicode.match?(DISALLOWED_IDNA)
  return false if unicode.match?(/\A\p{Mark}/)

  contextual_idn_label?(unicode)
end

.invalid_uri_ascii?(value, allow_relative: false) ⇒ Boolean

Returns:

  • (Boolean)


282
283
284
285
286
287
288
289
290
# File 'lib/senko/format.rb', line 282

def invalid_uri_ascii?(value, allow_relative: false)
  return true unless value.ascii_only?
  return true if value.match?(/[\s<>\\]/)
  return true if value.match?(/[{}|^`"]/)
  return true if bad_percent_encoding?(value)
  return true if !allow_relative && !value.match?(/\A[A-Za-z][A-Za-z0-9+\-.]*:/)

  false
end

.ipv6?(value) ⇒ Boolean

Returns:

  • (Boolean)


275
276
277
278
279
280
# File 'lib/senko/format.rb', line 275

def ipv6?(value)
  return false unless value.ascii_only?
  return false if value.match?(/\s/) || value.include?('/') || value.include?('%') || !value.include?(':')

  IPAddr.new(value).ipv6?
end

.iri?(value, absolute:) ⇒ Boolean

Returns:

  • (Boolean)


85
86
87
88
89
90
91
92
93
# File 'lib/senko/format.rb', line 85

def iri?(value, absolute:)
  return false if value.match?(/[\s<>\\]/)
  return false if bad_percent_encoding?(value)
  return true unless absolute
  return false if unbracketed_ipv6_authority?(value)

  Addressable::URI.parse(value)
  value.match?(/\A[A-Za-z][A-Za-z0-9+\-.]*:/)
end

.leap_second_utc_minute?(hour, minute, offset, offset_hour, offset_minute) ⇒ Boolean

Returns:

  • (Boolean)


158
159
160
161
162
163
164
165
166
167
# File 'lib/senko/format.rb', line 158

def leap_second_utc_minute?(hour, minute, offset, offset_hour, offset_minute)
  offset_minutes = if %w[Z z].include?(offset)
                     0
                   else
                     sign = offset.start_with?('+') ? 1 : -1
                     sign * ((offset_hour.to_i * 60) + offset_minute.to_i)
                   end

  (((hour * 60) + minute - offset_minutes) % 1440) == ((23 * 60) + 59)
end

.leap_year?(year) ⇒ Boolean

Returns:

  • (Boolean)


177
178
179
# File 'lib/senko/format.rb', line 177

def leap_year?(year)
  (year % 4).zero? && (!(year % 100).zero? || (year % 400).zero?)
end

.local_part?(value, idn:) ⇒ Boolean

Returns:

  • (Boolean)


207
208
209
210
211
212
# File 'lib/senko/format.rb', line 207

def local_part?(value, idn:)
  return value.match?(/\A"(?:[^"\\]|\\.)+"\z/) if value.start_with?('"')
  return value.match?(/\A[^\s@.]+(?:\.[^\s@.]+)*\z/) if idn

  value.match?(DOT_ATOM)
end

.rfc3339_date?(value) ⇒ Boolean

Returns:

  • (Boolean)


129
130
131
132
133
134
135
136
137
138
139
# File 'lib/senko/format.rb', line 129

def rfc3339_date?(value)
  match = value.match(RFC3339_DATE)
  return false unless match

  year = match[1].to_i
  month = match[2].to_i
  day = match[3].to_i
  return false unless month.between?(1, 12)

  day.between?(1, days_in_month(year, month))
end

.rfc3339_date_time?(value) ⇒ Boolean

Returns:

  • (Boolean)


122
123
124
125
126
127
# File 'lib/senko/format.rb', line 122

def rfc3339_date_time?(value)
  match = value.match(RFC3339_DATE_TIME)
  return false unless match

  rfc3339_date?(match[1]) && rfc3339_time?(match[2])
end

.rfc3339_time?(value) ⇒ Boolean

Returns:

  • (Boolean)


141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/senko/format.rb', line 141

def rfc3339_time?(value)
  match = value.match(RFC3339_TIME)
  return false unless match

  hour = match[1].to_i
  minute = match[2].to_i
  second = match[3].to_i
  offset = match[4]
  return false unless hour.between?(0, 23) && minute.between?(0, 59)
  return false unless second.between?(0, 60)
  return false if offset != 'Z' && offset != 'z' && (!match[5].to_i.between?(0,
                                                                             23) || !match[6].to_i.between?(0, 59))
  return true if second < 60

  leap_second_utc_minute?(hour, minute, offset, match[5], match[6])
end

.split_email(value) ⇒ Object



193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/senko/format.rb', line 193

def split_email(value)
  if value.start_with?('"')
    index = value.index('"@', 1)
    return nil unless index

    [value[0..index], value[(index + 2)..]]
  else
    parts = value.split('@', -1)
    return nil unless parts.length == 2

    parts
  end
end

.unbracketed_ipv6_authority?(value) ⇒ Boolean

Returns:

  • (Boolean)


296
297
298
299
300
301
302
# File 'lib/senko/format.rb', line 296

def unbracketed_ipv6_authority?(value)
  match = value.match(%r{\A[A-Za-z][A-Za-z0-9+\-.]*://([^/?#]*)})
  return false unless match

  host = match[1].split('@', 2).last
  host.include?(':') && !host.start_with?('[')
end

.uri_reference?(value) ⇒ Boolean

Returns:

  • (Boolean)


78
79
80
81
82
83
# File 'lib/senko/format.rb', line 78

def uri_reference?(value)
  return false if invalid_uri_ascii?(value, allow_relative: true)

  Addressable::URI.parse(value)
  true
end

.uri_template?(value) ⇒ Boolean

Returns:

  • (Boolean)


95
96
97
98
99
100
101
102
103
# File 'lib/senko/format.rb', line 95

def uri_template?(value)
  depth = 0
  value.each_char do |char|
    depth += 1 if char == '{'
    depth -= 1 if char == '}'
    return false if depth.negative? || depth > 1
  end
  depth.zero?
end

.valid?(name, value, custom_formats = {}) ⇒ Boolean

Returns:

  • (Boolean)


26
27
28
29
30
31
32
33
34
35
# File 'lib/senko/format.rb', line 26

def valid?(name, value, custom_formats = {})
  validator = custom_formats[name] || validators[name]
  return true unless validator
  return true unless value.is_a?(String)

  validator.call(value)
rescue Addressable::URI::InvalidURIError, ArgumentError, IPAddr::InvalidAddressError,
       RegexpError, SimpleIDN::ConversionError, URI::InvalidURIError
  false
end

.validatorsObject



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/senko/format.rb', line 37

def validators
  @validators ||= {
    'date-time' => ->(value) { rfc3339_date_time?(value) },
    'date' => ->(value) { rfc3339_date?(value) },
    'time' => ->(value) { rfc3339_time?(value) },
    'duration' => ->(value) { duration?(value) },
    'email' => ->(value) { email?(value, idn: false) },
    'idn-email' => ->(value) { email?(value, idn: true) },
    'hostname' => ->(value) { hostname?(value) },
    'idn-hostname' => ->(value) { hostname?(value, ascii_only: false) },
    'ipv4' => ->(value) { value.match?(IPV4_DECIMAL) && IPAddr.new(value).ipv4? },
    'ipv6' => ->(value) { ipv6?(value) },
    'uri' => ->(value) { absolute_uri?(value) },
    'uri-reference' => ->(value) { uri_reference?(value) },
    'iri' => ->(value) { iri?(value, absolute: true) },
    'iri-reference' => ->(value) { iri?(value, absolute: false) },
    'uuid' => ->(value) { value.match?(UUID) },
    'uri-template' => ->(value) { uri_template?(value) },
    'json-pointer' => ->(value) { value.match?(JSON_POINTER) },
    'relative-json-pointer' => ->(value) { value.match?(RELATIVE_JSON_POINTER) },
    'regex' => ->(value) { ecma_regex?(value) }
  }
end

.virama?(char) ⇒ Boolean

Returns:

  • (Boolean)


271
272
273
# File 'lib/senko/format.rb', line 271

def virama?(char)
  char == "\u{094d}"
end