Module: Senko::Format
- Defined in:
- lib/senko/format.rb
Constant Summary collapse
- DOT_ATOM =
%r{\A[A-Za-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[A-Za-z0-9!#$%&'*+/=?^_`{|}~-]+)*\z}- HOST_LABEL =
/\A[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?\z/- UUID =
/\A[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\z/- JSON_POINTER =
%r{\A(?:/(?:[^~/]|~0|~1)*)*\z}- RELATIVE_JSON_POINTER =
%r{\A(?:0|[1-9][0-9]*)(?:#|(?:/(?:[^~/]|~0|~1)*)*)\z}- RFC3339_DATE =
/\A([0-9]{4})-([0-9]{2})-([0-9]{2})\z/- RFC3339_DATE_TIME =
/\A([0-9]{4}-[0-9]{2}-[0-9]{2})[Tt]([0-9]{2}:[0-9]{2}:[0-9]{2}(?:\.[0-9]+)?(?:[Zz]|[+-][0-9]{2}:[0-9]{2}))\z/- RFC3339_TIME =
/\A([0-9]{2}):([0-9]{2}):([0-9]{2})(?:\.[0-9]+)?([Zz]|[+-]([0-9]{2}):([0-9]{2}))\z/- IPV4_DECIMAL =
/\A(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])(?:\.(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9]?[0-9])){3}\z/- IDN_SEPARATORS =
/[.\u{3002}\u{ff0e}\u{ff61}]/- DISALLOWED_IDNA =
/[\u{0640}\u{07fa}\u{302e}\u{302f}\u{3031}-\u{3035}\u{303b}]/
Class Method Summary collapse
- .absolute_uri?(value) ⇒ Boolean
- .address_literal?(value) ⇒ Boolean
- .ascii_label?(label) ⇒ Boolean
- .bad_percent_encoding?(value) ⇒ Boolean
- .canonical_idn_label?(label, unicode) ⇒ Boolean
- .contextual_idn_label?(label) ⇒ Boolean
- .days_in_month(year, month) ⇒ Object
- .duration?(value) ⇒ Boolean
- .ecma_pattern_source(value) ⇒ Object
- .ecma_regex?(value) ⇒ Boolean
- .email?(value, idn:) ⇒ Boolean
- .greek?(char) ⇒ Boolean
- .hebrew?(char) ⇒ Boolean
- .hostname?(value, ascii_only: true) ⇒ Boolean
- .hostname_label?(label, ascii_only:) ⇒ Boolean
- .invalid_uri_ascii?(value, allow_relative: false) ⇒ Boolean
- .ipv6?(value) ⇒ Boolean
- .iri?(value, absolute:) ⇒ Boolean
- .leap_second_utc_minute?(hour, minute, offset, offset_hour, offset_minute) ⇒ Boolean
- .leap_year?(year) ⇒ Boolean
- .local_part?(value, idn:) ⇒ Boolean
- .rfc3339_date?(value) ⇒ Boolean
- .rfc3339_date_time?(value) ⇒ Boolean
- .rfc3339_time?(value) ⇒ Boolean
- .split_email(value) ⇒ Object
- .unbracketed_ipv6_authority?(value) ⇒ Boolean
- .uri_reference?(value) ⇒ Boolean
- .uri_template?(value) ⇒ Boolean
- .valid?(name, value, custom_formats = {}) ⇒ Boolean
- .validators ⇒ Object
- .virama?(char) ⇒ Boolean
Class Method Details
.absolute_uri?(value) ⇒ Boolean
71 72 73 74 75 76 |
# File 'lib/senko/format.rb', line 71 def absolute_uri?(value) return false if invalid_uri_ascii?(value) uri = Addressable::URI.parse(value) uri.scheme && value.match?(/\A[A-Za-z][A-Za-z0-9+\-.]*:/) end |
.address_literal?(value) ⇒ Boolean
214 215 216 217 218 219 220 |
# File 'lib/senko/format.rb', line 214 def address_literal?(value) if value.start_with?('IPv6:') ipv6?(value.delete_prefix('IPv6:')) else value.match?(IPV4_DECIMAL) && IPAddr.new(value).ipv4? end end |
.ascii_label?(label) ⇒ Boolean
237 238 239 |
# File 'lib/senko/format.rb', line 237 def ascii_label?(label) label.ascii_only? && label.match?(HOST_LABEL) && !label.include?('_') end |
.bad_percent_encoding?(value) ⇒ Boolean
292 293 294 |
# File 'lib/senko/format.rb', line 292 def bad_percent_encoding?(value) value.match?(/%(?![0-9A-Fa-f]{2})/) end |
.canonical_idn_label?(label, unicode) ⇒ Boolean
241 242 243 244 245 246 |
# File 'lib/senko/format.rb', line 241 def canonical_idn_label?(label, unicode) ascii = SimpleIDN.to_ascii(unicode).downcase return label.downcase == ascii if label.downcase.start_with?('xn--') ascii.length <= 63 end |
.contextual_idn_label?(label) ⇒ Boolean
248 249 250 251 252 253 254 255 256 257 258 259 260 261 |
# File 'lib/senko/format.rb', line 248 def contextual_idn_label?(label) chars = label.each_char.to_a chars.each_with_index do |char, index| return false if char == "\u{00b7}" && !(index.positive? && chars[index - 1] == 'l' && chars[index + 1] == 'l') return false if char == "\u{0375}" && !greek?(chars[index + 1]) return false if char == "\u{05f3}" && !(index.positive? && hebrew?(chars[index - 1])) return false if char == "\u{05f4}" && !(index.positive? && hebrew?(chars[index - 1])) return false if char == "\u{30fb}" && !label.match?(/[\p{Hiragana}\p{Katakana}\p{Han}]/) return false if char == "\u{200d}" && !(index.positive? && virama?(chars[index - 1])) end return false if label.match?(/[\u{0660}-\u{0669}]/) && label.match?(/[\u{06f0}-\u{06f9}]/) true end |
.days_in_month(year, month) ⇒ Object
169 170 171 172 173 174 175 |
# File 'lib/senko/format.rb', line 169 def days_in_month(year, month) return 29 if month == 2 && leap_year?(year) return 28 if month == 2 return 30 if [4, 6, 9, 11].include?(month) 31 end |
.duration?(value) ⇒ Boolean
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
# File 'lib/senko/format.rb', line 105 def duration?(value) return true if value.match?(/\AP[0-9]+W\z/) return false unless value.ascii_only? match = value.match(/\AP(?:(?<years>[0-9]+)Y)?(?:(?<months>[0-9]+)M)?(?:(?<days>[0-9]+)D)?(?:T(?:(?<hours>[0-9]+)H)?(?:(?<minutes>[0-9]+)M)?(?:(?<seconds>[0-9]+)S)?)?\z/) return false unless match date_units = %w[years months days].select { |name| match[name] } time_units = %w[hours minutes seconds].select { |name| match[name] } return false if date_units.empty? && time_units.empty? return false if value.include?('T') && time_units.empty? return false if match['years'] && match['days'] && !match['months'] return false if match['hours'] && match['seconds'] && !match['minutes'] true end |
.ecma_pattern_source(value) ⇒ Object
315 316 317 318 319 320 321 322 323 324 325 |
# File 'lib/senko/format.rb', line 315 def ecma_pattern_source(value) value .gsub(/\\c([A-Za-z])/) { Regexp.escape((::Regexp.last_match(1).upcase.ord - 64).chr) } .gsub('\\d', '[0-9]') .gsub('\\D', '[^0-9]') .gsub('\\w', '[A-Za-z0-9_]') .gsub('\\W', '[^A-Za-z0-9_]') .gsub('\\s', '[\\t\\n\\v\\f\\r \\u00a0\\ufeff\\u2028\\u2029\\p{Space_Separator}]') .gsub('\\S', '[^\\t\\n\\v\\f\\r \\u00a0\\ufeff\\u2028\\u2029\\p{Space_Separator}]') .gsub(/(?<!\\)\$/, '\\z') end |
.ecma_regex?(value) ⇒ Boolean
304 305 306 307 308 309 310 311 312 313 |
# File 'lib/senko/format.rb', line 304 def ecma_regex?(value) return false if value.match?(/(?:^|[^\\])\\a/) verbose = $VERBOSE $VERBOSE = nil Regexp.new(ecma_pattern_source(value)) true ensure $VERBOSE = verbose end |
.email?(value, idn:) ⇒ Boolean
181 182 183 184 185 186 187 188 189 190 191 |
# File 'lib/senko/format.rb', line 181 def email?(value, idn:) local, domain = split_email(value) return false unless local && domain return false unless local_part?(local, idn: idn) if domain.start_with?('[') && domain.end_with?(']') address_literal?(domain[1...-1]) else hostname?(domain, ascii_only: !idn) end end |
.greek?(char) ⇒ Boolean
263 264 265 |
# File 'lib/senko/format.rb', line 263 def greek?(char) char&.match?(/\p{Greek}/) end |
.hebrew?(char) ⇒ Boolean
267 268 269 |
# File 'lib/senko/format.rb', line 267 def hebrew?(char) char&.match?(/\p{Hebrew}/) end |
.hostname?(value, ascii_only: true) ⇒ Boolean
61 62 63 64 65 66 67 68 69 |
# File 'lib/senko/format.rb', line 61 def hostname?(value, ascii_only: true) return false if value.empty? || value.length > 253 return false if ascii_only && !value.ascii_only? labels = value.split(ascii_only ? '.' : IDN_SEPARATORS, -1) return false if labels.empty? || labels.any?(&:empty?) labels.all? { |label| hostname_label?(label, ascii_only: ascii_only) } end |
.hostname_label?(label, ascii_only:) ⇒ Boolean
222 223 224 225 226 227 228 229 230 231 232 233 234 235 |
# File 'lib/senko/format.rb', line 222 def hostname_label?(label, ascii_only:) return false if label.empty? return false if label.length > 63 && label.ascii_only? return false if label.start_with?('-') || label.end_with?('-') return ascii_label?(label) if ascii_only && !label.downcase.start_with?('xn--') unicode = label.downcase.start_with?('xn--') ? SimpleIDN.to_unicode(label.downcase) : label return false if unicode[2, 2] == '--' return false unless canonical_idn_label?(label, unicode) return false if unicode.match?(DISALLOWED_IDNA) return false if unicode.match?(/\A\p{Mark}/) contextual_idn_label?(unicode) end |
.invalid_uri_ascii?(value, allow_relative: false) ⇒ Boolean
282 283 284 285 286 287 288 289 290 |
# File 'lib/senko/format.rb', line 282 def invalid_uri_ascii?(value, allow_relative: false) return true unless value.ascii_only? return true if value.match?(/[\s<>\\]/) return true if value.match?(/[{}|^`"]/) return true if bad_percent_encoding?(value) return true if !allow_relative && !value.match?(/\A[A-Za-z][A-Za-z0-9+\-.]*:/) false end |
.ipv6?(value) ⇒ Boolean
275 276 277 278 279 280 |
# File 'lib/senko/format.rb', line 275 def ipv6?(value) return false unless value.ascii_only? return false if value.match?(/\s/) || value.include?('/') || value.include?('%') || !value.include?(':') IPAddr.new(value).ipv6? end |
.iri?(value, absolute:) ⇒ Boolean
85 86 87 88 89 90 91 92 93 |
# File 'lib/senko/format.rb', line 85 def iri?(value, absolute:) return false if value.match?(/[\s<>\\]/) return false if bad_percent_encoding?(value) return true unless absolute return false if (value) Addressable::URI.parse(value) value.match?(/\A[A-Za-z][A-Za-z0-9+\-.]*:/) end |
.leap_second_utc_minute?(hour, minute, offset, offset_hour, offset_minute) ⇒ Boolean
158 159 160 161 162 163 164 165 166 167 |
# File 'lib/senko/format.rb', line 158 def leap_second_utc_minute?(hour, minute, offset, offset_hour, offset_minute) offset_minutes = if %w[Z z].include?(offset) 0 else sign = offset.start_with?('+') ? 1 : -1 sign * ((offset_hour.to_i * 60) + offset_minute.to_i) end (((hour * 60) + minute - offset_minutes) % 1440) == ((23 * 60) + 59) end |
.leap_year?(year) ⇒ Boolean
177 178 179 |
# File 'lib/senko/format.rb', line 177 def leap_year?(year) (year % 4).zero? && (!(year % 100).zero? || (year % 400).zero?) end |
.local_part?(value, idn:) ⇒ Boolean
207 208 209 210 211 212 |
# File 'lib/senko/format.rb', line 207 def local_part?(value, idn:) return value.match?(/\A"(?:[^"\\]|\\.)+"\z/) if value.start_with?('"') return value.match?(/\A[^\s@.]+(?:\.[^\s@.]+)*\z/) if idn value.match?(DOT_ATOM) end |
.rfc3339_date?(value) ⇒ Boolean
129 130 131 132 133 134 135 136 137 138 139 |
# File 'lib/senko/format.rb', line 129 def rfc3339_date?(value) match = value.match(RFC3339_DATE) return false unless match year = match[1].to_i month = match[2].to_i day = match[3].to_i return false unless month.between?(1, 12) day.between?(1, days_in_month(year, month)) end |
.rfc3339_date_time?(value) ⇒ Boolean
122 123 124 125 126 127 |
# File 'lib/senko/format.rb', line 122 def rfc3339_date_time?(value) match = value.match(RFC3339_DATE_TIME) return false unless match rfc3339_date?(match[1]) && rfc3339_time?(match[2]) end |
.rfc3339_time?(value) ⇒ Boolean
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
# File 'lib/senko/format.rb', line 141 def rfc3339_time?(value) match = value.match(RFC3339_TIME) return false unless match hour = match[1].to_i minute = match[2].to_i second = match[3].to_i offset = match[4] return false unless hour.between?(0, 23) && minute.between?(0, 59) return false unless second.between?(0, 60) return false if offset != 'Z' && offset != 'z' && (!match[5].to_i.between?(0, 23) || !match[6].to_i.between?(0, 59)) return true if second < 60 leap_second_utc_minute?(hour, minute, offset, match[5], match[6]) end |
.split_email(value) ⇒ Object
193 194 195 196 197 198 199 200 201 202 203 204 205 |
# File 'lib/senko/format.rb', line 193 def split_email(value) if value.start_with?('"') index = value.index('"@', 1) return nil unless index [value[0..index], value[(index + 2)..]] else parts = value.split('@', -1) return nil unless parts.length == 2 parts end end |
.unbracketed_ipv6_authority?(value) ⇒ Boolean
296 297 298 299 300 301 302 |
# File 'lib/senko/format.rb', line 296 def (value) match = value.match(%r{\A[A-Za-z][A-Za-z0-9+\-.]*://([^/?#]*)}) return false unless match host = match[1].split('@', 2).last host.include?(':') && !host.start_with?('[') end |
.uri_reference?(value) ⇒ Boolean
78 79 80 81 82 83 |
# File 'lib/senko/format.rb', line 78 def uri_reference?(value) return false if invalid_uri_ascii?(value, allow_relative: true) Addressable::URI.parse(value) true end |
.uri_template?(value) ⇒ Boolean
95 96 97 98 99 100 101 102 103 |
# File 'lib/senko/format.rb', line 95 def uri_template?(value) depth = 0 value.each_char do |char| depth += 1 if char == '{' depth -= 1 if char == '}' return false if depth.negative? || depth > 1 end depth.zero? end |
.valid?(name, value, custom_formats = {}) ⇒ Boolean
26 27 28 29 30 31 32 33 34 35 |
# File 'lib/senko/format.rb', line 26 def valid?(name, value, custom_formats = {}) validator = custom_formats[name] || validators[name] return true unless validator return true unless value.is_a?(String) validator.call(value) rescue Addressable::URI::InvalidURIError, ArgumentError, IPAddr::InvalidAddressError, RegexpError, SimpleIDN::ConversionError, URI::InvalidURIError false end |
.validators ⇒ Object
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# File 'lib/senko/format.rb', line 37 def validators @validators ||= { 'date-time' => ->(value) { rfc3339_date_time?(value) }, 'date' => ->(value) { rfc3339_date?(value) }, 'time' => ->(value) { rfc3339_time?(value) }, 'duration' => ->(value) { duration?(value) }, 'email' => ->(value) { email?(value, idn: false) }, 'idn-email' => ->(value) { email?(value, idn: true) }, 'hostname' => ->(value) { hostname?(value) }, 'idn-hostname' => ->(value) { hostname?(value, ascii_only: false) }, 'ipv4' => ->(value) { value.match?(IPV4_DECIMAL) && IPAddr.new(value).ipv4? }, 'ipv6' => ->(value) { ipv6?(value) }, 'uri' => ->(value) { absolute_uri?(value) }, 'uri-reference' => ->(value) { uri_reference?(value) }, 'iri' => ->(value) { iri?(value, absolute: true) }, 'iri-reference' => ->(value) { iri?(value, absolute: false) }, 'uuid' => ->(value) { value.match?(UUID) }, 'uri-template' => ->(value) { uri_template?(value) }, 'json-pointer' => ->(value) { value.match?(JSON_POINTER) }, 'relative-json-pointer' => ->(value) { value.match?(RELATIVE_JSON_POINTER) }, 'regex' => ->(value) { ecma_regex?(value) } } end |
.virama?(char) ⇒ Boolean
271 272 273 |
# File 'lib/senko/format.rb', line 271 def virama?(char) char == "\u{094d}" end |