Module: Odin::Parsing::ValueParser
- Defined in:
- lib/odin/parsing/value_parser.rb
Constant Summary collapse
- RE_DATE =
/\A(\d{4})-(\d{2})-(\d{2})\z/.freeze
- RE_TIMESTAMP_DATE =
/\A(\d{4})-(\d{2})-(\d{2})T/.freeze
- RE_TIMESTAMP_FULL =
/\A(\d{4}-\d{2}-\d{2})T(\d{2}):(\d{2})(?::(\d{2})(?:\.\d+)?)?(Z|[+-]\d{2}:\d{2})?\z/.freeze
- RE_TIME_FULL =
/\AT(\d{2}):(\d{2})(?::(\d{2})(?:\.\d+)?)?\z/.freeze
- RE_ISO_TS =
Fast ISO 8601 timestamp parser: 2024-01-15T10:30:00Z or with offset
/\A(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(?:\.(\d+))?(Z|([+-]\d{2}):?(\d{2}))?\z/.freeze
Class Method Summary collapse
- .fast_parse_timestamp(raw) ⇒ Object
- .leap_year?(year) ⇒ Boolean
- .needs_raw?(raw, val) ⇒ Boolean
- .parse_binary(token) ⇒ Object
- .parse_boolean(token) ⇒ Object
- .parse_currency(token) ⇒ Object
- .parse_date(token) ⇒ Object
- .parse_duration(token) ⇒ Object
- .parse_integer(token) ⇒ Object
- .parse_number(token) ⇒ Object
- .parse_percent(token) ⇒ Object
- .parse_time(token) ⇒ Object
- .parse_timestamp(token) ⇒ Object
- .parse_value(token) ⇒ Object
- .parse_verb_name(token) ⇒ Object
- .raise_temporal_error(raw_str, token) ⇒ Object
- .validate_base64!(data, token) ⇒ Object
-
.validate_date!(date_str, token) ⇒ Object
— helpers —.
-
.validate_time!(time_str, token) ⇒ Object
Validate time-only value: THH:MM[:SS].
-
.validate_time_components!(hour_str, min_str, sec_str, raw_str, token) ⇒ Object
Validate hour/minute/second bounds.
-
.validate_timestamp!(ts_str, token) ⇒ Object
Validate timestamp date portion, time components, and timezone offset.
Class Method Details
.fast_parse_timestamp(raw) ⇒ Object
353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 |
# File 'lib/odin/parsing/value_parser.rb', line 353 def (raw) m = RE_ISO_TS.match(raw) return nil unless m y, mo, d = m[1].to_i, m[2].to_i, m[3].to_i h, mi, s = m[4].to_i, m[5].to_i, m[6].to_i frac = m[7] if m[8] == "Z" || m[8].nil? offset = "+00:00" else offset = "#{m[9]}:#{m[10]}" end sec = frac ? Rational("#{s}.#{frac}".to_r) : s DateTime.new(y, mo, d, h, mi, sec, offset) rescue nil end |
.leap_year?(year) ⇒ Boolean
274 275 276 |
# File 'lib/odin/parsing/value_parser.rb', line 274 def leap_year?(year) (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0) end |
.needs_raw?(raw, val) ⇒ Boolean
370 371 372 373 374 375 376 377 |
# File 'lib/odin/parsing/value_parser.rb', line 370 def needs_raw?(raw, val) # Store raw when float representation differs significantly formatted = val == val.to_i && !raw.include?(".") && !raw.include?("e") && !raw.include?("E") ? val.to_i.to_s : val.to_s # If the raw string has more precision info than the float raw.gsub(/\.?0+\z/, "") != formatted.gsub(/\.?0+\z/, "") rescue true end |
.parse_binary(token) ⇒ Object
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 |
# File 'lib/odin/parsing/value_parser.rb', line 213 def parse_binary(token) raw = token.value # Handle empty binary if raw.nil? || raw.empty? return Types::OdinBinary.new("", algorithm: nil) end algorithm = nil base64_data = raw # Check for algorithm prefix (identifier:base64) if raw =~ /\A([a-zA-Z][a-zA-Z0-9]*):(.*)$/ algorithm = $1 base64_data = $2 end # Validate base64 characters validate_base64!(base64_data, token) unless base64_data.empty? # Decode begin decoded = Base64.strict_decode64(base64_data) unless base64_data.empty? rescue ArgumentError # Try lenient decode decoded = Base64.decode64(base64_data) end Types::OdinBinary.new(base64_data, algorithm: algorithm) end |
.parse_boolean(token) ⇒ Object
145 146 147 148 149 150 151 152 153 154 155 156 |
# File 'lib/odin/parsing/value_parser.rb', line 145 def parse_boolean(token) case token.value when "true" then Types::TRUE_VAL when "false" then Types::FALSE_VAL else raise Errors::ParseError.new( Errors::ParseErrorCode::INVALID_TYPE_PREFIX, token.line, token.column, "Invalid boolean: #{token.value}" ) end end |
.parse_currency(token) ⇒ Object
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# File 'lib/odin/parsing/value_parser.rb', line 87 def parse_currency(token) raw = token.value if raw.nil? || raw.empty? raise Errors::ParseError.new( Errors::ParseErrorCode::INVALID_TYPE_PREFIX, token.line, token.column, "Invalid numeric format" ) end currency_code = nil numeric_part = raw if raw.include?(":") parts = raw.split(":", 2) numeric_part = parts[0] currency_code = parts[1].upcase unless parts[1].empty? end bd = BigDecimal(numeric_part) # Count decimal places if numeric_part.include?(".") e_pos = numeric_part.downcase.index("e") check_part = e_pos ? numeric_part[0...e_pos] : numeric_part decimal_str = check_part.split(".")[1] || "" dp = [decimal_str.length, 2].max else dp = 2 end Types::OdinCurrency.new(bd, currency_code: currency_code, decimal_places: dp, raw: numeric_part) rescue ArgumentError raise Errors::ParseError.new( Errors::ParseErrorCode::INVALID_TYPE_PREFIX, token.line, token.column, "Invalid currency format: #{raw}" ) end |
.parse_date(token) ⇒ Object
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
# File 'lib/odin/parsing/value_parser.rb', line 161 def parse_date(token) raw = token.value validate_date!(raw, token) m = RE_DATE.match(raw) if m d = Date.new(m[1].to_i, m[2].to_i, m[3].to_i) else d = Date.parse(raw) end Types::OdinDate.new(d, raw: raw) rescue Date::Error, ArgumentError => e raise Errors::ParseError.new( Errors::ParseErrorCode::UNEXPECTED_CHARACTER, token.line, token.column, "Invalid date: #{raw}" ) end |
.parse_duration(token) ⇒ Object
200 201 202 203 204 205 206 207 208 209 210 211 |
# File 'lib/odin/parsing/value_parser.rb', line 200 def parse_duration(token) raw = token.value # Basic validation: must start with P unless raw.start_with?("P") raise Errors::ParseError.new( Errors::ParseErrorCode::BARE_STRING_NOT_ALLOWED, token.line, token.column, "Invalid duration format: #{raw}" ) end Types::OdinDuration.new(raw) end |
.parse_integer(token) ⇒ Object
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
# File 'lib/odin/parsing/value_parser.rb', line 58 def parse_integer(token) raw = token.value if raw.nil? || raw.empty? raise Errors::ParseError.new( Errors::ParseErrorCode::INVALID_TYPE_PREFIX, token.line, token.column, "Invalid numeric format" ) end fval = Float(raw) unless fval == fval.to_i raise Errors::ParseError.new( Errors::ParseErrorCode::INVALID_TYPE_PREFIX, token.line, token.column, "Integer (##) value cannot have a fractional part: #{raw}" ) end val = fval.to_i # Beyond the 2^53-1 safe integer range, store raw safe = val.abs <= 9_007_199_254_740_991 Types::OdinInteger.new(val, raw: safe && raw.length <= 15 ? nil : raw) rescue ArgumentError raise Errors::ParseError.new( Errors::ParseErrorCode::INVALID_TYPE_PREFIX, token.line, token.column, "Invalid integer format: #{raw}" ) end |
.parse_number(token) ⇒ Object
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/odin/parsing/value_parser.rb', line 37 def parse_number(token) raw = token.value if raw.nil? || raw.empty? raise Errors::ParseError.new( Errors::ParseErrorCode::INVALID_TYPE_PREFIX, token.line, token.column, "Invalid numeric format" ) end val = Float(raw) # Store raw if high precision (> 15 significant digits) store_raw = raw.length > 15 || needs_raw?(raw, val) Types::OdinNumber.new(val, raw: store_raw ? raw : nil) rescue ArgumentError raise Errors::ParseError.new( Errors::ParseErrorCode::INVALID_TYPE_PREFIX, token.line, token.column, "Invalid numeric format: #{raw}" ) end |
.parse_percent(token) ⇒ Object
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
# File 'lib/odin/parsing/value_parser.rb', line 126 def parse_percent(token) raw = token.value if raw.nil? || raw.empty? raise Errors::ParseError.new( Errors::ParseErrorCode::INVALID_TYPE_PREFIX, token.line, token.column, "Invalid numeric format" ) end val = Float(raw) Types::OdinPercent.new(val, raw: raw) rescue ArgumentError raise Errors::ParseError.new( Errors::ParseErrorCode::INVALID_TYPE_PREFIX, token.line, token.column, "Invalid percent format: #{raw}" ) end |
.parse_time(token) ⇒ Object
195 196 197 198 |
# File 'lib/odin/parsing/value_parser.rb', line 195 def parse_time(token) validate_time!(token.value, token) Types::OdinTime.new(token.value) end |
.parse_timestamp(token) ⇒ Object
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
# File 'lib/odin/parsing/value_parser.rb', line 179 def (token) raw = token.value # Validate date portion, time components, and timezone offset (raw, token) # DateTime.new is much faster than DateTime.parse # Try fast path for ISO 8601 timestamps dt = (raw) || DateTime.parse(raw) Types::OdinTimestamp.new(dt, raw: raw) rescue Date::Error, ArgumentError raise Errors::ParseError.new( Errors::ParseErrorCode::UNEXPECTED_CHARACTER, token.line, token.column, "Invalid timestamp: #{raw}" ) end |
.parse_value(token) ⇒ Object
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/odin/parsing/value_parser.rb', line 12 def parse_value(token) case token.type when TokenType::STRING then Types::OdinString.new(token.value) when TokenType::NUMBER then parse_number(token) when TokenType::INTEGER then parse_integer(token) when TokenType::CURRENCY then parse_currency(token) when TokenType::PERCENT then parse_percent(token) when TokenType::BOOLEAN then parse_boolean(token) when TokenType::NULL then Types::NULL when TokenType::DATE then parse_date(token) when TokenType::TIMESTAMP then (token) when TokenType::TIME then parse_time(token) when TokenType::DURATION then parse_duration(token) when TokenType::REFERENCE then Types::OdinReference.new(token.value) when TokenType::BINARY then parse_binary(token) when TokenType::VERB then parse_verb_name(token) else raise Errors::ParseError.new( Errors::ParseErrorCode::UNEXPECTED_CHARACTER, token.line, token.column, "Unexpected token type: #{token.type}" ) end end |
.parse_verb_name(token) ⇒ Object
244 245 246 247 248 249 250 |
# File 'lib/odin/parsing/value_parser.rb', line 244 def parse_verb_name(token) name = token.value is_custom = name.start_with?("&") verb_name = is_custom ? name[1..] : name # Args will be filled in by the parser Types::OdinVerbExpression.new(verb_name, is_custom: is_custom, args: []) end |
.raise_temporal_error(raw_str, token) ⇒ Object
322 323 324 325 326 327 328 |
# File 'lib/odin/parsing/value_parser.rb', line 322 def raise_temporal_error(raw_str, token) raise Errors::ParseError.new( Errors::ParseErrorCode::UNEXPECTED_CHARACTER, token.line, token.column, "Invalid temporal value: #{raw_str}" ) end |
.validate_base64!(data, token) ⇒ Object
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 |
# File 'lib/odin/parsing/value_parser.rb', line 330 def validate_base64!(data, token) # Check for invalid characters unless data.match?(/\A[A-Za-z0-9+\/]*=*\z/) raise Errors::ParseError.new( Errors::ParseErrorCode::UNEXPECTED_CHARACTER, token.line, token.column, "Invalid Base64 character" ) end # Check padding position - padding only at end if data =~ /=/ && data !~ /\A[A-Za-z0-9+\/]*={0,2}\z/ raise Errors::ParseError.new( Errors::ParseErrorCode::UNEXPECTED_CHARACTER, token.line, token.column, "Invalid Base64 padding" ) end end |
.validate_date!(date_str, token) ⇒ Object
— helpers —
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 |
# File 'lib/odin/parsing/value_parser.rb', line 254 def validate_date!(date_str, token) return unless date_str =~ /\A(\d{4})-(\d{2})-(\d{2})\z/ year, month, day = $1.to_i, $2.to_i, $3.to_i return if month < 1 || month > 12 max_days = case month when 1, 3, 5, 7, 8, 10, 12 then 31 when 4, 6, 9, 11 then 30 when 2 then leap_year?(year) ? 29 : 28 end if day > max_days || day < 1 raise Errors::ParseError.new( Errors::ParseErrorCode::UNEXPECTED_CHARACTER, token.line, token.column, "Invalid date: #{date_str}" ) end end |
.validate_time!(time_str, token) ⇒ Object
Validate time-only value: THH:MM[:SS].
298 299 300 301 302 303 304 |
# File 'lib/odin/parsing/value_parser.rb', line 298 def validate_time!(time_str, token) m = RE_TIME_FULL.match(time_str) unless m raise_temporal_error(time_str, token) end validate_time_components!(m[1], m[2], m[3], time_str, token) end |
.validate_time_components!(hour_str, min_str, sec_str, raw_str, token) ⇒ Object
Validate hour/minute/second bounds. Hour 24 only as end-of-day midnight; second may be 60 (leap second).
308 309 310 311 312 313 314 315 316 317 318 319 320 |
# File 'lib/odin/parsing/value_parser.rb', line 308 def validate_time_components!(hour_str, min_str, sec_str, raw_str, token) hour = hour_str.to_i minute = min_str.to_i second = sec_str.nil? ? 0 : sec_str.to_i if hour == 24 raise_temporal_error(raw_str, token) if minute != 0 || second != 0 elsif hour > 23 raise_temporal_error(raw_str, token) end raise_temporal_error(raw_str, token) if minute > 59 raise_temporal_error(raw_str, token) if second > 60 end |
.validate_timestamp!(ts_str, token) ⇒ Object
Validate timestamp date portion, time components, and timezone offset.
282 283 284 285 286 287 288 289 290 291 292 293 294 295 |
# File 'lib/odin/parsing/value_parser.rb', line 282 def (ts_str, token) m = RE_TIMESTAMP_FULL.match(ts_str) unless m raise_temporal_error(ts_str, token) end validate_date!(m[1], token) validate_time_components!(m[2], m[3], m[4], ts_str, token) offset = m[5] if offset && offset != "Z" off_hour = offset[1, 2].to_i off_min = offset[4, 2].to_i raise_temporal_error(ts_str, token) if off_hour > 23 || off_min > 59 end end |