Module: Odin::Transform::SourceParsers

Defined in:
lib/odin/transform/source_parsers.rb

Defined Under Namespace

Classes: FormatError

Class Method Summary collapse

Class Method Details

.parse_csv(input, headers: true, delimiter: ",") ⇒ Object

Parse CSV string into DynValue (array of objects)



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/odin/transform/source_parsers.rb', line 22

def self.parse_csv(input, headers: true, delimiter: ",")
  return Types::DynValue.of_array([]) if input.nil? || input.strip.empty?

  # Strip BOM
  cleaned = input.sub(/\A\xEF\xBB\xBF/n, "")
  cleaned = cleaned.encode("UTF-8", "UTF-8", invalid: :replace, undef: :replace)

  rows = parse_csv_rows(cleaned, delimiter)
  return Types::DynValue.of_array([]) if rows.empty?

  if headers && rows.size > 1
    header_row = rows[0]
    data_rows = rows[1..]
    items = data_rows.map do |row|
      fields = {}
      header_row.each_with_index do |col, i|
        val = i < row.size ? row[i] : ""
        fields[col] = infer_type(val)
      end
      Types::DynValue.of_object(fields)
    end
    Types::DynValue.of_array(items)
  elsif headers
    # Only header, no data
    Types::DynValue.of_array([])
  else
    items = rows.map do |row|
      Types::DynValue.of_array(row.map { |cell| infer_type(cell) })
    end
    Types::DynValue.of_array(items)
  end
end

.parse_fixed_width(input, columns:) ⇒ Object

Parse fixed-width text into DynValue columns: [pos:, len:, trim: true]

Raises:

  • (ArgumentError)


80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/odin/transform/source_parsers.rb', line 80

def self.parse_fixed_width(input, columns:)
  return Types::DynValue.of_array([]) if input.nil? || input.strip.empty?
  raise ArgumentError, "Columns specification required" if columns.nil? || columns.empty?

  lines = input.lines.map(&:chomp).reject(&:empty?)
  rows = lines.map do |line|
    fields = {}
    columns.each do |col|
      start_pos = col[:pos] || 0
      len = col[:len] || 0
      name = col[:name]
      trim = col.fetch(:trim, true)

      raw = if start_pos < line.length
              end_pos = [start_pos + len, line.length].min
              line[start_pos...end_pos] || ""
            else
              ""
            end
      raw = raw.strip if trim
      fields[name] = Types::DynValue.of_string(raw)
    end
    Types::DynValue.of_object(fields)
  end

  rows.size == 1 ? rows[0] : Types::DynValue.of_array(rows)
end

.parse_flat_kvp(input) ⇒ Object

Parse flat key=value pairs into DynValue



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/odin/transform/source_parsers.rb', line 109

def self.parse_flat_kvp(input)
  return Types::DynValue.of_object({}) if input.nil? || input.strip.empty?

  result = {}
  input.each_line do |line|
    line = line.chomp.sub(/\r$/, "")
    next if line.strip.empty?
    next if line.strip.start_with?("#", ";")

    eq_pos = line.index("=")
    next unless eq_pos

    key = line[0...eq_pos].strip
    val_str = line[(eq_pos + 1)..].strip

    value = parse_flat_value(val_str)
    set_nested(result, key, value)
  end

  Types::DynValue.of_object(result.transform_values { |v| wrap_nested(v) })
end

.parse_json(input) ⇒ Object

Parse JSON string into DynValue



12
13
14
15
16
17
18
19
# File 'lib/odin/transform/source_parsers.rb', line 12

def self.parse_json(input)
  raise ArgumentError, "Input cannot be nil or empty" if input.nil? || input.strip.empty?

  parsed = JSON.parse(input)
  Types::DynValue.from_json_value(parsed)
rescue JSON::ParserError => e
  raise FormatError, "Invalid JSON: #{e.message}"
end

.parse_xml(input) ⇒ Object

Parse XML string into DynValue



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/odin/transform/source_parsers.rb', line 56

def self.parse_xml(input)
  raise ArgumentError, "Input cannot be nil or empty" if input.nil? || input.strip.empty?

  # Pre-process: mark self-closing elements with a synthetic attribute
  # REXML doesn't distinguish <tag/> from <tag></tag>, so we inject a marker
  marked = input.gsub(/<([a-zA-Z_][\w:.-]*)\s*(\s[^>]*)?\/>/) do |_match|
    tag_name = $1
    attrs = $2 || ""
    "<#{tag_name}#{attrs} __odin_sc=\"1\"/>"
  end

  doc = REXML::Document.new(marked)
  root = doc.root
  raise FormatError, "No root element found" unless root

  root_name = qualified_name(root)
  content = parse_xml_element(root, 0)
  Types::DynValue.of_object({ root_name => content })
rescue REXML::ParseException => e
  raise FormatError, "Invalid XML: #{e.message}"
end

.parse_yaml(input) ⇒ Object

Parse YAML string into DynValue



132
133
134
135
136
137
138
139
# File 'lib/odin/transform/source_parsers.rb', line 132

def self.parse_yaml(input)
  return Types::DynValue.of_object({}) if input.nil? || input.strip.empty?

  parsed = YAML.safe_load(input, permitted_classes: [Date, Time, BigDecimal])
  Types::DynValue.from_ruby(parsed)
rescue Psych::SyntaxError => e
  raise FormatError, "Invalid YAML: #{e.message}"
end

.qualified_name(element) ⇒ Object

Get the full qualified name of an element (prefix:localName or just localName)



365
366
367
368
369
370
371
# File 'lib/odin/transform/source_parsers.rb', line 365

def self.qualified_name(element)
  if element.prefix && !element.prefix.empty?
    "#{element.prefix}:#{element.name}"
  else
    element.name
  end
end