Module: Odin::Transform::SourceParsers
- Defined in:
- lib/odin/transform/source_parsers.rb
Defined Under Namespace
Classes: FormatError
Class Method Summary collapse
-
.parse_csv(input, headers: true, delimiter: ",") ⇒ Object
Parse CSV string into DynValue (array of objects).
-
.parse_fixed_width(input, columns:) ⇒ Object
Parse fixed-width text into DynValue columns: [pos:, len:, trim: true].
-
.parse_flat_kvp(input) ⇒ Object
Parse flat key=value pairs into DynValue.
-
.parse_json(input) ⇒ Object
Parse JSON string into DynValue.
-
.parse_xml(input) ⇒ Object
Parse XML string into DynValue.
-
.parse_yaml(input) ⇒ Object
Parse YAML string into DynValue.
-
.qualified_name(element) ⇒ Object
Get the full qualified name of an element (prefix:localName or just localName).
Class Method Details
.parse_csv(input, headers: true, delimiter: ",") ⇒ Object
Parse CSV string into DynValue (array of objects)
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/odin/transform/source_parsers.rb', line 22 def self.parse_csv(input, headers: true, delimiter: ",") return Types::DynValue.of_array([]) if input.nil? || input.strip.empty? # Strip BOM cleaned = input.sub(/\A\xEF\xBB\xBF/n, "") cleaned = cleaned.encode("UTF-8", "UTF-8", invalid: :replace, undef: :replace) rows = parse_csv_rows(cleaned, delimiter) return Types::DynValue.of_array([]) if rows.empty? if headers && rows.size > 1 header_row = rows[0] data_rows = rows[1..] items = data_rows.map do |row| fields = {} header_row.each_with_index do |col, i| val = i < row.size ? row[i] : "" fields[col] = infer_type(val) end Types::DynValue.of_object(fields) end Types::DynValue.of_array(items) elsif headers # Only header, no data Types::DynValue.of_array([]) else items = rows.map do |row| Types::DynValue.of_array(row.map { |cell| infer_type(cell) }) end Types::DynValue.of_array(items) end end |
.parse_fixed_width(input, columns:) ⇒ Object
Parse fixed-width text into DynValue columns: [pos:, len:, trim: true]
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/odin/transform/source_parsers.rb', line 80 def self.parse_fixed_width(input, columns:) return Types::DynValue.of_array([]) if input.nil? || input.strip.empty? raise ArgumentError, "Columns specification required" if columns.nil? || columns.empty? lines = input.lines.map(&:chomp).reject(&:empty?) rows = lines.map do |line| fields = {} columns.each do |col| start_pos = col[:pos] || 0 len = col[:len] || 0 name = col[:name] trim = col.fetch(:trim, true) raw = if start_pos < line.length end_pos = [start_pos + len, line.length].min line[start_pos...end_pos] || "" else "" end raw = raw.strip if trim fields[name] = Types::DynValue.of_string(raw) end Types::DynValue.of_object(fields) end rows.size == 1 ? rows[0] : Types::DynValue.of_array(rows) end |
.parse_flat_kvp(input) ⇒ Object
Parse flat key=value pairs into DynValue
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
# File 'lib/odin/transform/source_parsers.rb', line 109 def self.parse_flat_kvp(input) return Types::DynValue.of_object({}) if input.nil? || input.strip.empty? result = {} input.each_line do |line| line = line.chomp.sub(/\r$/, "") next if line.strip.empty? next if line.strip.start_with?("#", ";") eq_pos = line.index("=") next unless eq_pos key = line[0...eq_pos].strip val_str = line[(eq_pos + 1)..].strip value = parse_flat_value(val_str) set_nested(result, key, value) end Types::DynValue.of_object(result.transform_values { |v| wrap_nested(v) }) end |
.parse_json(input) ⇒ Object
Parse JSON string into DynValue
12 13 14 15 16 17 18 19 |
# File 'lib/odin/transform/source_parsers.rb', line 12 def self.parse_json(input) raise ArgumentError, "Input cannot be nil or empty" if input.nil? || input.strip.empty? parsed = JSON.parse(input) Types::DynValue.from_json_value(parsed) rescue JSON::ParserError => e raise FormatError, "Invalid JSON: #{e.}" end |
.parse_xml(input) ⇒ Object
Parse XML string into DynValue
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
# File 'lib/odin/transform/source_parsers.rb', line 56 def self.parse_xml(input) raise ArgumentError, "Input cannot be nil or empty" if input.nil? || input.strip.empty? # Pre-process: mark self-closing elements with a synthetic attribute # REXML doesn't distinguish <tag/> from <tag></tag>, so we inject a marker marked = input.gsub(/<([a-zA-Z_][\w:.-]*)\s*(\s[^>]*)?\/>/) do |_match| tag_name = $1 attrs = $2 || "" "<#{tag_name}#{attrs} __odin_sc=\"1\"/>" end doc = REXML::Document.new(marked) root = doc.root raise FormatError, "No root element found" unless root root_name = qualified_name(root) content = parse_xml_element(root, 0) Types::DynValue.of_object({ root_name => content }) rescue REXML::ParseException => e raise FormatError, "Invalid XML: #{e.}" end |
.parse_yaml(input) ⇒ Object
Parse YAML string into DynValue
132 133 134 135 136 137 138 139 |
# File 'lib/odin/transform/source_parsers.rb', line 132 def self.parse_yaml(input) return Types::DynValue.of_object({}) if input.nil? || input.strip.empty? parsed = YAML.safe_load(input, permitted_classes: [Date, Time, BigDecimal]) Types::DynValue.from_ruby(parsed) rescue Psych::SyntaxError => e raise FormatError, "Invalid YAML: #{e.}" end |
.qualified_name(element) ⇒ Object
Get the full qualified name of an element (prefix:localName or just localName)
365 366 367 368 369 370 371 |
# File 'lib/odin/transform/source_parsers.rb', line 365 def self.qualified_name(element) if element.prefix && !element.prefix.empty? "#{element.prefix}:#{element.name}" else element.name end end |