Class: Udb::Yaml::Resolver

Inherits:
Object
  • Object
show all
Extended by:
T::Sig
Defined in:
lib/udb/yaml/yaml_resolver.rb

Overview

Ruby implementation of YAML resolver that preserves comments and order

Instance Method Summary collapse

Constructor Details

#initialize(quiet: false, compile_idl: false, schemas_path: nil) ⇒ Resolver

Returns a new instance of Resolver.



34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/udb/yaml/yaml_resolver.rb', line 34

def initialize(quiet: false, compile_idl: false, schemas_path: nil)
  @quiet = T.let(quiet, T::Boolean)
  @compile_idl = T.let(compile_idl, T::Boolean)
  @compiler = T.let(nil, T.nilable(Idl::Compiler))
  if @compile_idl
    @compiler = Idl::Compiler.new
  end
  @resolved_objs = T.let({}, T::Hash[String, T::Hash[Symbol, T.untyped]])
  @current_comment_map = T.let(nil, T.nilable(CommentMap))
  @schemas_path = T.let(schemas_path.nil? ? nil : Pathname.new(schemas_path), T.nilable(Pathname))
  @schema_version_map = T.let(nil, T.nilable(T::Hash[String, String]))
end

Instance Method Details

#add_parent_of_reference(parent_obj, child_ref) ⇒ Object



565
566
567
568
569
570
571
572
573
574
# File 'lib/udb/yaml/yaml_resolver.rb', line 565

def add_parent_of_reference(parent_obj, child_ref)
  if parent_obj.key?("$parent_of")
    existing = parent_obj["$parent_of"]
    existing = [existing] unless existing.is_a?(Array)
    existing << child_ref unless existing.include?(child_ref)
    parent_obj["$parent_of"] = existing.length == 1 ? existing[0] : existing
  else
    parent_obj["$parent_of"] = child_ref
  end
end

#build_line_file_offsets(idl_string, first_line_file_offset, file_contents) ⇒ Object



939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
# File 'lib/udb/yaml/yaml_resolver.rb', line 939

def build_line_file_offsets(idl_string, first_line_file_offset, file_contents)
  offsets = T.let([], T::Array[Integer])

  # Work in binary (byte) space to avoid UTF-8 character/byte index mismatches.
  file_bytes = file_contents.b

  # first_line_file_offset is the byte offset of the start of the first content line
  # in the file (i.e. cumulative_offsets[node.start_line + 1]).  Determine the
  # indentation width by counting leading spaces on the first non-empty content line.
  indent_width = 0
  scan_pos = first_line_file_offset
  while scan_pos < file_bytes.bytesize
    spaces = 0
    while scan_pos + spaces < file_bytes.bytesize &&
          file_bytes.getbyte(scan_pos + spaces) == 32
      spaces += 1
    end
    # If this line has non-whitespace content, use its indent
    if scan_pos + spaces < file_bytes.bytesize &&
       file_bytes.getbyte(scan_pos + spaces) != 10  # 10 = \n
      indent_width = spaces
      break
    end
    # Skip to next line
    nl = file_bytes.index("\n".b, scan_pos)
    scan_pos = nl ? nl + 1 : file_bytes.bytesize
  end

  # file_pos tracks the byte offset of the start of the current file line.
  file_pos = first_line_file_offset

  idl_string.each_line do |_idl_line|
    # Skip up to indent_width space bytes to reach the content start.
    # Empty file lines (just "\n") have no spaces to skip.
    content_pos = file_pos
    skipped = 0
    while skipped < indent_width &&
          content_pos < file_bytes.bytesize &&
          file_bytes.getbyte(content_pos) == 32  # 32 = ASCII space
      content_pos += 1
      skipped += 1
    end
    offsets << content_pos

    # Advance to the start of the next file line.
    newline_pos = file_bytes.index("\n".b, file_pos)
    file_pos = newline_pos ? newline_pos + 1 : file_bytes.bytesize
  end

  offsets
end

#calculate_content_offset(line, value_part, line_num, lines, cumulative_offsets, file_bytes) ⇒ Object



849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
# File 'lib/udb/yaml/yaml_resolver.rb', line 849

def calculate_content_offset(line, value_part, line_num, lines, cumulative_offsets, file_bytes)
  return 0 if value_part.nil?

  colon_pos = line.index(":")
  return 0 if colon_pos.nil?

  value_stripped = value_part.strip

  if value_stripped.start_with?("|")
    # For literal block scalars, YAML strips the minimum common indentation from all lines.
    # We need to calculate what that indentation is and point to the content after it's stripped.

    line_lens = value_stripped.lines.map { |l| T.must(l[/^\s*/]).length }
    min_indent =
      if value_stripped[1] == "\n"
        # implicit indent, need to find min # of starting spaces
        T.must(line_lens[1..]).min
      else
        # explicit indent (e.g., "key: |2")
        value_stripped[1..].to_i
      end

    # also find the line that the content actually starts on (skipping blank lines at the beginning)

    if line_lens.size <= 1
      # Block scalar has no content lines (only the indicator line)
      raise StandardError, "Block scalar at #{@current_file_path}:#{line_num} has no content lines"
    end

    # Find the first line with actual content (non-zero indentation)
    first_content_line_num = 1
    while first_content_line_num < line_lens.size && T.must(line_lens[first_content_line_num]).zero?
      first_content_line_num += 1
    end

    if first_content_line_num >= line_lens.size
      # No content found - empty literal block
      raise StandardError, "Block scalar at #{@current_file_path}:#{line_num} has no content lines"
    end

    return cumulative_offsets.fetch(first_content_line_num) + T.must(min_indent)
  end

  # For inline plain scalar values (value on the same line as the key)
  value_start = colon_pos + 1
  value_start += 1 while value_start < line.length && line[value_start] == " "

  # Calculate the initial byte offset
  initial_offset = cumulative_offsets.fetch(line_num) + value_start

  # Check if there's actual content on this line
  if value_start < line.length && !T.must(line[value_start..]).strip.empty?
    # Value is on the same line - skip only spaces/tabs on this line, not newlines
    offset = initial_offset
    while offset < file_bytes.bytesize && [" ", "\t"].include?(file_bytes[offset])
      offset += 1
    end

    return offset
  else
    # Value is on the next line(s) - skip whitespace including newlines
    offset = initial_offset
    while offset < file_bytes.bytesize && [" ", "\t", "\n", "\r"].include?(file_bytes[offset])
      offset += 1
    end

    # If we've skipped past the end, return the initial offset
    return offset >= file_bytes.bytesize ? initial_offset : offset
  end
end

#calculate_value_column(line, value_part, line_num, lines) ⇒ Object



811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
# File 'lib/udb/yaml/yaml_resolver.rb', line 811

def calculate_value_column(line, value_part, line_num, lines)
  return 0 if value_part.nil?

  colon_pos = line.index(":")
  return 0 if colon_pos.nil?

  value_stripped = value_part.strip

  if value_stripped.start_with?("|", ">")
    next_line_idx = line_num + 1
    while next_line_idx < lines.length
      next_line = lines.fetch(next_line_idx)
      if !next_line.strip.empty?
        return T.must(next_line[/^\s*/]).length
      end
      next_line_idx += 1
    end
    return colon_pos + 2
  end

  value_start = colon_pos + 1
  while value_start < line.length && line[value_start] == " "
    value_start += 1
  end

  value_start
end

#deep_copy(obj) ⇒ Object



651
652
653
654
655
656
657
658
659
660
661
662
663
664
# File 'lib/udb/yaml/yaml_resolver.rb', line 651

def deep_copy(obj)
  case obj
  when Hash
    obj.transform_values { |v| deep_copy(v) }
  when Array
    obj.map { |item| deep_copy(item) }
  else
    begin
      obj.dup
    rescue TypeError
      obj
    end
  end
end

#deep_merge(base, other) ⇒ Object



645
646
647
648
# File 'lib/udb/yaml/yaml_resolver.rb', line 645

def deep_merge(base, other)
  result = base.dup
  deep_merge!(result, other)
end

#deep_merge!(base, other) ⇒ Object



628
629
630
631
632
633
634
635
636
637
# File 'lib/udb/yaml/yaml_resolver.rb', line 628

def deep_merge!(base, other)
  other.each do |key, value|
    if base[key].is_a?(Hash) && value.is_a?(Hash)
      deep_merge!(base[key], value)
    else
      base[key] = deep_copy(value)
    end
  end
  base
end

#get_resolved_object(rel_path, arch_root, no_checks) ⇒ Object



583
584
585
586
587
588
589
590
591
592
593
594
595
# File 'lib/udb/yaml/yaml_resolver.rb', line 583

def get_resolved_object(rel_path, arch_root, no_checks)
  return @resolved_objs.fetch(rel_path).fetch(:data) if @resolved_objs.key?(rel_path)

  input_path = arch_root / rel_path
  parser = CommentParser.new
  result = parser.parse_file(input_path)
  data = result[:data]

  resolved_data = resolve_object(data, [], rel_path, data, arch_root, no_checks)
  @resolved_objs[rel_path] = { data: resolved_data, comments: result[:comments] }

  resolved_data
end

#json_merge_patch(base, patch) ⇒ Object



603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
# File 'lib/udb/yaml/yaml_resolver.rb', line 603

def json_merge_patch(base, patch)
  return patch unless patch.is_a?(Hash)
  return patch unless base.is_a?(Hash)

  result = base.dup

  patch.each do |key, value|
    if value.nil?
      result.delete(key)
    elsif value.is_a?(Hash) && result[key].is_a?(Hash)
      result[key] = json_merge_patch(result[key], value)
    else
      result[key] = deep_copy(value)
    end
  end

  result
end

#merge_file(rel_path, base_dir, overlay_dir, output_dir) ⇒ Object



144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/udb/yaml/yaml_resolver.rb', line 144

def merge_file(rel_path, base_dir, overlay_dir, output_dir)
  base_path = base_dir / rel_path
  overlay_path = overlay_dir.nil? ? nil : (overlay_dir / rel_path)
  output_path = output_dir / rel_path

  FileUtils.mkdir_p(output_path.dirname)

  if !base_path.exist? && (overlay_path.nil? || !overlay_path.exist?)
    FileUtils.rm_f(output_path) if output_path.exist?
  elsif overlay_path.nil? || !overlay_path.exist?
    if !output_path.exist? || base_path.mtime > output_path.mtime
      FileUtils.cp(base_path, output_path)
    end
  elsif !base_path.exist?
    if !output_path.exist? || overlay_path.mtime > output_path.mtime
      FileUtils.cp(overlay_path, output_path)
    end
  else
    if !output_path.exist? ||
       base_path.mtime > output_path.mtime ||
       overlay_path.mtime > output_path.mtime

      parser = CommentParser.new
      base_result = parser.parse_file(base_path)
      overlay_result = parser.parse_file(overlay_path)

      # Validate IDL scalar styles in both source files before merging so
      # that any error message points to the correct source file rather than
      # the generated merged output.
      [base_path, overlay_path].each do |src_path|
        yaml_string = File.read(src_path, encoding: "utf-8")
        ast = Psych.parse(yaml_string, filename: src_path.to_s)
        validate_idl_scalars(ast, [], src_path)
      end

      merged_data = json_merge_patch(base_result[:data], overlay_result[:data])

      # Fill in styles for keys that exist only in the base file so the
      # emitter uses the correct (base-file) style for those keys.
      overlay_result[:comments].merge_styles_from(base_result[:comments])

      emitter = PreservingEmitter.new(overlay_result[:comments])
      emitter.emit_file(merged_data, output_path)
    end
  end
end

#merge_files(base_dir, overlay_dir, output_dir) ⇒ Object



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/udb/yaml/yaml_resolver.rb', line 109

def merge_files(base_dir, overlay_dir, output_dir)
  base_dir = Pathname.new(base_dir)
  overlay_dir = overlay_dir.nil? ? nil : Pathname.new(overlay_dir)
  output_dir = Pathname.new(output_dir)

  base_files = Dir.glob((base_dir / "**" / "*.yaml").to_s).map { |f| Pathname.new(f).relative_path_from(base_dir).to_s }
  overlay_files = overlay_dir.nil? ? [] : Dir.glob((overlay_dir / "**" / "*.yaml").to_s).map { |f| Pathname.new(f).relative_path_from(overlay_dir).to_s }

  # Include existing output files to detect stale entries
  existing_output_files = Dir.glob((output_dir / "**" / "*.yaml").to_s).map { |f| Pathname.new(f).relative_path_from(output_dir).to_s }

  all_files = (base_files + overlay_files + existing_output_files).uniq

  pb =
      Udb.create_progressbar(
        "Merging spec files [:bar] :current/:total",
        total: all_files.size,
        clear: true
      )
  all_files.each do |rel_path|
    pb.advance
    merge_file(rel_path, base_dir, overlay_dir, output_dir)
  end

  Udb.logger.info "Merged architecture files written to #{output_dir}" unless @quiet
end

#resolve_file(rel_path, input_dir, output_dir, no_checks) ⇒ Object



248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
# File 'lib/udb/yaml/yaml_resolver.rb', line 248

def resolve_file(rel_path, input_dir, output_dir, no_checks)
  input_path = input_dir / rel_path

  return unless input_path.exist?

  parser = CommentParser.new
  result = parser.parse_file(input_path)
  data = result[:data]

  # Validate that multiline IDL functions use literal block scalars
  # We need to check the raw YAML to detect multiline plain scalars
  yaml_string = File.read(input_path, encoding: "utf-8")
  ast = Psych.parse(yaml_string, filename: input_path.to_s)
  validate_idl_scalars(ast, [], input_path)

  track_source_locations(input_path, result[:comments])
  @current_comment_map = result[:comments]

  if !no_checks && data.key?("name")
    fn_name = Pathname.new(rel_path).basename(".yaml").to_s
    if fn_name != data["name"]
      raise "ERROR: 'name' key (#{data["name"]}) must match filename (#{fn_name}) in #{rel_path}"
    end
  end

  resolved_data = resolve_object(data, [], rel_path, data, input_dir, no_checks)

  # Second pass: set $parent_of on parent objects based on $child_of relationships.
  # This must be done after the full document is resolved because a child (e.g. "bottom")
  # may be processed after its parent (e.g. "middle") is already in resolved_data.
  set_parent_of_relationships(resolved_data, rel_path)

  @resolved_objs[rel_path] = { data: resolved_data, comments: result[:comments] }
end

#resolve_files(input_dir, output_dir, options = {}) ⇒ Object



198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# File 'lib/udb/yaml/yaml_resolver.rb', line 198

def resolve_files(input_dir, output_dir, options = {})
  input_dir = Pathname.new(input_dir)
  output_dir = Pathname.new(output_dir)
  no_checks = options[:no_checks] || false

  yaml_files = Dir.glob((input_dir / "**" / "*.yaml").to_s).map do |f|
    Pathname.new(f).relative_path_from(input_dir).to_s
  end

  pb =
      Udb.create_progressbar(
        "Resolving spec files [:bar] :current/:total",
        total: yaml_files.size,
        clear: true
      )
  yaml_files.each do |rel_path|
    pb.advance
    resolve_file(rel_path, input_dir, output_dir, no_checks)
  end

  yaml_files.each do |rel_path|
    write_resolved_file(rel_path, input_dir, output_dir, no_checks)
  end

  # Remove stale resolved files that no longer have a corresponding input
  existing_output_files = Dir.glob((output_dir / "**" / "*.yaml").to_s).map do |f|
    Pathname.new(f).relative_path_from(output_dir).to_s
  end.reject { |rel| rel == "index.yaml" || rel == "index.json" }

  stale_files = existing_output_files - yaml_files
  stale_files.each do |rel_path|
    output_path = output_dir / rel_path
    FileUtils.rm_f(output_path) if output_path.exist?
  end

  FileUtils.mkdir_p(output_dir)
  File.write(output_dir / "index.yaml", Psych.dump(yaml_files))
  File.write(output_dir / "index.json", JSON.pretty_generate(yaml_files))

  Udb.logger.info "Resolved architecture files written to #{output_dir}" unless @quiet
end

#resolve_inherits(obj, obj_path, obj_file_path, doc_obj, arch_root, no_checks) ⇒ Object



418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
# File 'lib/udb/yaml/yaml_resolver.rb', line 418

def resolve_inherits(obj, obj_path, obj_file_path, doc_obj, arch_root, no_checks)
  inherits_value = obj["$inherits"]
  inherits_targets = inherits_value.is_a?(Array) ? inherits_value : [inherits_value]

  # Build a new hash instead of mutating obj in-place.
  # Mutating obj would corrupt doc_obj (the original parsed data), causing subsequent
  # resolutions of the same key to see the already-mutated version without $inherits.
  obj = obj.reject { |k, _| k == "$inherits" }.merge("$child_of" => inherits_value)

  parent_obj = T.let({}, T::Hash[String, T.untyped])

  inherits_targets.each do |inherits_target|
    if inherits_target.include?("#")
      ref_file_path, ref_obj_path_str = inherits_target.split("#", 2)
    else
      ref_file_path = ""
      ref_obj_path_str = inherits_target.start_with?("/") ? inherits_target : "/#{inherits_target}"
    end

    ref_obj_path = ref_obj_path_str.split("/").drop(1)

    ref_obj = T.let(nil, T.nilable(T::Hash[String, T.untyped]))
    if ref_file_path.empty?
      ref_obj = if ref_obj_path.empty?
                  doc_obj
      else
        T.unsafe(doc_obj).dig(*ref_obj_path)
      end
      raise "#{ref_obj_path.join("/")} cannot be found in #{obj_file_path}" if ref_obj.nil?
      ref_obj = resolve_object(ref_obj, ref_obj_path, obj_file_path, doc_obj, arch_root, no_checks)
    else
      ref_full_path = arch_root / ref_file_path
      raise "#{ref_file_path} does not exist in #{arch_root}/" unless ref_full_path.exist?

      ref_doc_obj = get_resolved_object(ref_file_path, arch_root, no_checks)
      ref_obj = if ref_obj_path.empty?
                  ref_doc_obj
      else
        T.unsafe(ref_doc_obj).dig(*ref_obj_path)
      end
      raise "#{ref_obj_path.join("/")} cannot be found in #{ref_file_path}" if ref_obj.nil?
    end

    ref_obj.each do |key, value|
      next if key == "$parent_of" || key == "$child_of"

      if parent_obj.key?(key) && parent_obj[key].is_a?(Hash) && value.is_a?(Hash)
        deep_merge!(parent_obj[key], value)
      else
        parent_obj[key] = deep_copy(value)
      end
    end

  end

  final_obj = T.let({}, T::Hash[String, T.untyped])
  all_keys = (parent_obj.keys + obj.keys).uniq

  all_keys.each do |key|
    if !obj.key?(key)
      final_obj[key] = parent_obj[key]
    elsif !parent_obj.key?(key)
      final_obj[key] = resolve_object(obj[key], obj_path + [key], obj_file_path, doc_obj, arch_root, no_checks)
    else
      if parent_obj[key].is_a?(Hash) && obj[key].is_a?(Hash)
        final_obj[key] = deep_merge(parent_obj[key], resolve_object(obj[key], obj_path + [key], obj_file_path, doc_obj, arch_root, no_checks))
      else
        final_obj[key] = resolve_object(obj[key], obj_path + [key], obj_file_path, doc_obj, arch_root, no_checks)
      end
    end
  end

  if final_obj.key?("$remove")
    remove_keys = final_obj["$remove"]
    remove_keys = [remove_keys] unless remove_keys.is_a?(Array)
    remove_keys.each { |key| final_obj.delete(key) }
    final_obj.delete("$remove")
  end

  final_obj
end

#resolve_object(obj, obj_path, obj_file_path, doc_obj, arch_root, no_checks) ⇒ Object



332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
# File 'lib/udb/yaml/yaml_resolver.rb', line 332

def resolve_object(obj, obj_path, obj_file_path, doc_obj, arch_root, no_checks)
  return obj unless obj.is_a?(Hash) || obj.is_a?(Array)

  if obj.is_a?(Array)
    return obj.map.with_index do |item, idx|
      resolve_object(item, obj_path + [idx], obj_file_path, doc_obj, arch_root, no_checks)
    end
  end

  if obj.key?("$inherits")
    return resolve_inherits(obj, obj_path, obj_file_path, doc_obj, arch_root, no_checks)
  end

  resolved = T.let({}, T::Hash[String, T.untyped])
  obj.each do |key, value|
    resolved[key] = resolve_object(value, obj_path + [key], obj_file_path, doc_obj, arch_root, no_checks)
  end

  if resolved.key?("$remove")
    remove_keys = resolved["$remove"]
    remove_keys = [remove_keys] unless remove_keys.is_a?(Array)
    remove_keys.each { |key| resolved.delete(key) }
    resolved.delete("$remove")
  end

  if @compile_idl
    idl_keys = obj.keys.select { |k| k.end_with?(")") }
    idl_keys.each do |key|
      idl_source = obj[key]

      # Skip compilation for nil or blank IDL blocks, matching previous resolver behavior.
      next if idl_source.nil?
      if idl_source.respond_to?(:strip) && idl_source.strip.empty?
        next
      end

      unless idl_source.is_a?(String)
        raise TypeError, "Expected IDL body for #{(obj_path + [key]).join('.')} to be a String, got #{idl_source.class}"
      end

      key_minus_args = key.split("(")[0] + "_ast"
      source_loc = @current_comment_map&.get_source_location(obj_path + [key])
      # :line is 1-based; set_input_file expects 0-based, so subtract 1
      starting_line = source_loc ? source_loc[:line] - 1 : 0
      starting_offset = source_loc ? (source_loc[:offset] || 0) : 0
      line_file_offsets = source_loc ? source_loc[:line_file_offsets] : nil
      parse_root =
        if key == "operation()"
          :instruction_operation
        elsif obj_path.include?("requirements")
          :constraint_body
        else
          :function_body
        end
      compiler = T.must(@compiler)
      compiler.parser.set_input_file(obj_file_path.to_s, starting_line, starting_offset, line_file_offsets)
      m = compiler.parser.parse(idl_source, root: parse_root)
      if m.nil?
        raise SyntaxError, <<~MSG
          While parsing #{obj_file_path}:#{compiler.parser.failure_line}

          #{compiler.parser.failure_reason}
        MSG
      end
      ast = m.to_ast
      if ast.nil?
        raise "IDL compiler could not convert to ast"
      end
      ast.set_input_file_unless_already_set(obj_file_path, starting_line, starting_offset, line_file_offsets)
      resolved[key_minus_args] = ast.to_h
    end
  end

  resolved
end

#schema_version_mapObject



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/udb/yaml/yaml_resolver.rb', line 60

def schema_version_map
  if @schema_version_map.nil?
    version_map = T.let({}, T::Hash[String, String])
    if schemas_path.exist?
      schemas_path.glob("*.json").each do |schema_file|
        next if schema_file.basename.to_s == "json-schema-draft-07.json"

        begin
          schema_data = JSON.parse(schema_file.read)
          version = schema_data["$id"]
          version_map[schema_file.basename.to_s] = version if version.is_a?(String) && !version.start_with?("http")
        rescue StandardError
          # Silently skip files that can't be parsed
        end
      end
    end
    @schema_version_map = version_map
  end
  T.must(@schema_version_map)
end

#schemas_pathObject



50
51
52
53
54
55
# File 'lib/udb/yaml/yaml_resolver.rb', line 50

def schemas_path
  if @schemas_path.nil?
    @schemas_path = Udb.default_schemas_path
  end
  T.must(@schemas_path)
end

#set_parent_of_relationships(resolved_data, rel_path) ⇒ Object



506
507
508
# File 'lib/udb/yaml/yaml_resolver.rb', line 506

def set_parent_of_relationships(resolved_data, rel_path)
  walk_for_parent_of(resolved_data, [], resolved_data, rel_path)
end

#track_source_locations(file_path, comment_map) ⇒ Object



785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
# File 'lib/udb/yaml/yaml_resolver.rb', line 785

def track_source_locations(file_path, comment_map)
  yaml_string = File.read(file_path, encoding: "utf-8")
  lines = yaml_string.lines

  cumulative_offsets = T.let([], T::Array[Integer])
  offset = 0
  lines.each do |line|
    cumulative_offsets << offset
    offset += line.bytesize
  end

  # Use binary encoding for all byte-offset operations so that multi-byte
  # UTF-8 characters don't cause character/byte index mismatches.
  yaml_bytes = yaml_string.b
  ast = Psych.parse(yaml_string, filename: file_path.to_s)
  track_source_locations_helper([], yaml_bytes, file_path, cumulative_offsets, comment_map, ast)
end

#track_source_locations_helper(keys, contents, file, cumulative_offsets, offset_map, node) ⇒ Object



716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
# File 'lib/udb/yaml/yaml_resolver.rb', line 716

def track_source_locations_helper(keys, contents, file, cumulative_offsets, offset_map, node)
  case node
  when Psych::Nodes::Document
    node.children.each do |child|
      track_source_locations_helper([], contents, file, cumulative_offsets, offset_map, child)
    end
  when Psych::Nodes::Mapping
    i = 0
    while i < node.children.size
      key_text = node.children.fetch(i).value
      track_source_locations_helper(keys + [key_text], contents, file, cumulative_offsets, offset_map, node.children.fetch(i + 1))
      i += 2
    end
    # Don't track source locations for mappings - only for IDL function scalar values
  when Psych::Nodes::Sequence
    node.children.each_with_index do |child, idx|
      track_source_locations_helper(keys + [idx.to_s], contents, file, cumulative_offsets, offset_map, child)
    end
    # Don't track source locations for sequences - only for IDL function scalar values
  when Psych::Nodes::Scalar
    return unless keys.any?

    is_idl_key = keys.last.is_a?(String) && T.must(keys.last).end_with?(")")
    marked_offset = cumulative_offsets.fetch(node.start_line) + node.start_column

    if is_idl_key
      actual_offset =
        if node.value.empty?
          marked_offset
        elsif node.style == Psych::Nodes::Scalar::LITERAL
          # The first content line always starts at the beginning of the line
          # immediately after the key line (node.start_line + 1).
          cumulative_offsets[node.start_line + 1]
        elsif node.style == Psych::Nodes::Scalar::PLAIN
          # Single-line plain scalar - find it directly
          contents.index(node.value, marked_offset)
        else
          style_name = case node.style
                       when Psych::Nodes::Scalar::SINGLE_QUOTED then "SINGLE_QUOTED"
                       when Psych::Nodes::Scalar::DOUBLE_QUOTED then "DOUBLE_QUOTED"
                       when Psych::Nodes::Scalar::FOLDED then "FOLDED"
                       else "UNKNOWN (#{node.style})"
                       end
          raise "ERROR: Unsupported YAML style for IDL function '#{keys.last}' in #{file}.\n" \
            "IDL functions must use either PLAIN (single-line) or LITERAL block scalar (|) style.\n" \
            "Examples:\n" \
            "  PLAIN (single-line):     #{keys.last}: x = 5\n" \
            "  LITERAL (multi-line):    #{keys.last}: |\n" \
            "                             x = 5\n" \
            "                             y = 10\n" \
            "Found style: #{style_name}"
        end
      line_file_offsets =
        if node.style == Psych::Nodes::Scalar::LITERAL && !node.value.empty? && actual_offset
          build_line_file_offsets(node.value, actual_offset, contents)
        end
      offset_map.set_source_location(keys, file, node.start_line + 1, node.start_column + 1, actual_offset, line_file_offsets)
    else
      offset_map.set_source_location(keys, file, node.start_line + 1, node.start_column + 1)
    end
  end
end

#validate_against_schema(resolved_obj, rel_path) ⇒ Object



1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
# File 'lib/udb/yaml/yaml_resolver.rb', line 1000

def validate_against_schema(resolved_obj, rel_path)
  schema_uri = resolved_obj["$schema"]
  schema_file = schema_uri.split("#").first
  schema_basename = File.basename(T.must(schema_file))
  schema_path = schemas_path / schema_basename

  unless schema_path.exist?
    Udb.logger.warn "Schema file not found: #{schema_path}" unless @quiet
    return
  end

  ref_resolver = proc do |uri|
    local_path = schemas_path / File.basename(uri.to_s)
    JSON.parse(local_path.read)
  end

  schema = JSONSchemer.schema(
    JSON.parse(schema_path.read),
    regexp_resolver: "ecma",
    ref_resolver: ref_resolver,
    insert_property_defaults: false
  )

  # Convert through JSON to normalize YAML-specific types (e.g. integer keys)
  jsonified_obj = JSON.parse(JSON.generate(resolved_obj))

  # Normalize $schema to bare name so the schema enum matches bare refs
  if jsonified_obj.key?("$schema")
    bare_schema = File.basename(T.must(jsonified_obj["$schema"].split("#").first)) + "#"
    jsonified_obj["$schema"] = bare_schema
  end

  unless schema.valid?(jsonified_obj)
    errors = schema.validate(jsonified_obj).to_a
    error_msgs = errors.map { |e| "  - #{e["data_pointer"]}: #{e["type"]}" }.join("\n")
    raise "Schema validation failed for #{rel_path}:\n#{error_msgs}"
  end
end

#validate_idl_scalars(node, keys, file_path) ⇒ Object



666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
# File 'lib/udb/yaml/yaml_resolver.rb', line 666

def validate_idl_scalars(node, keys, file_path)
  case node
  when Psych::Nodes::Document
    node.children.each do |child|
      validate_idl_scalars(child, [], file_path)
    end
  when Psych::Nodes::Mapping
    i = 0
    while i < node.children.size
      key_node = node.children.fetch(i)
      value_node = node.children.fetch(i + 1)
      key_text = key_node.value

      # Check if this is an IDL function key
      if key_text.is_a?(String) && key_text.end_with?(")")
        # Validate the value node
        if value_node.is_a?(Psych::Nodes::Scalar)
          # Check if it's a multiline plain scalar
          if value_node.style == Psych::Nodes::Scalar::PLAIN &&
             value_node.end_line > value_node.start_line
            raise "ERROR: Multiline IDL function '#{key_text}' in #{file_path} must use literal block scalar (|).\n" \
                  "Found plain scalar spanning lines #{value_node.start_line + 1}-#{value_node.end_line + 1}.\n" \
                  "Please change the YAML to use:\n" \
                  "  #{key_text}: |\n" \
                  "    <your IDL code here>"
          end
        end
      end

      # Recurse into the value
      validate_idl_scalars(value_node, keys + [key_text], file_path)
      i += 2
    end
  when Psych::Nodes::Sequence
    node.children.each_with_index do |child, idx|
      validate_idl_scalars(child, keys + [idx.to_s], file_path)
    end
  end
end

#versioned_schema_uri(uri) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/udb/yaml/yaml_resolver.rb', line 85

def versioned_schema_uri(uri)
  fragment_sep = uri.index("#")
  if fragment_sep
    base = T.must(uri[0...fragment_sep])
    fragment = T.must(uri[fragment_sep..])
  else
    base = uri
    fragment = ""
  end

  # Already has a version prefix
  return uri if base.include?("/")

  version = schema_version_map[base]
  version ? "#{version}/#{base}#{fragment}" : uri
end

#walk_for_parent_of(obj, path, doc_root, rel_path) ⇒ Object



518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
# File 'lib/udb/yaml/yaml_resolver.rb', line 518

def walk_for_parent_of(obj, path, doc_root, rel_path)
  return unless obj.is_a?(Hash)

  if obj.key?("$child_of")
    child_of = obj["$child_of"]
    targets = child_of.is_a?(Array) ? child_of : [child_of]
    child_ref = path.empty? ? "#{rel_path}#/" : "#{rel_path}#/#{path.join("/")}"

    targets.each do |target|
      next unless target.is_a?(String)

      if target.start_with?("#")
        # Same-document reference
        ref_path_str = T.must(target.split("#", 2)).fetch(1)
        ref_path = ref_path_str.split("/").drop(1)
        parent_obj = T.unsafe(doc_root).dig(*ref_path)
        next if parent_obj.nil? || !parent_obj.is_a?(Hash)

        add_parent_of_reference(parent_obj, child_ref)
      elsif target.include?("#")
        # Cross-file reference
        ref_file_path, ref_obj_path_str = target.split("#", 2)
        ref_obj_path = T.must(ref_obj_path_str).split("/").drop(1)

        # Get the resolved object from the cache
        next unless @resolved_objs.key?(T.must(ref_file_path))

        ref_doc = @resolved_objs.fetch(T.must(ref_file_path)).fetch(:data)
        parent_obj = ref_obj_path.empty? ? ref_doc : T.unsafe(ref_doc).dig(*ref_obj_path)
        next if parent_obj.nil? || !parent_obj.is_a?(Hash)

        add_parent_of_reference(parent_obj, child_ref)
      end
    end
  end

  obj.each do |key, value|
    walk_for_parent_of(value, path + [key], doc_root, rel_path)
  end
end

#write_resolved_file(rel_path, input_dir, output_dir, no_checks) ⇒ Object



291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
# File 'lib/udb/yaml/yaml_resolver.rb', line 291

def write_resolved_file(rel_path, input_dir, output_dir, no_checks)
  output_path = output_dir / rel_path

  return unless @resolved_objs.key?(rel_path)

  resolved_obj = @resolved_objs.fetch(rel_path).fetch(:data)
  comments = @resolved_objs.fetch(rel_path).fetch(:comments)

  resolved_obj["$source"] = (input_dir / rel_path).realpath.to_s

  # Phase 1: Validate against bare (unversioned) $schema URI before rewriting.
  # Source files use bare names like 'csr_schema.json#', so the schema enum
  # only needs to list bare names.
  if !no_checks && resolved_obj.key?("$schema")
    validate_against_schema(resolved_obj, rel_path)
  end

  # Phase 2: Rewrite $schema to include the version prefix so the output
  # file records the exact schema version used (e.g. 'v0.1/csr_schema.json#').
  if resolved_obj.key?("$schema")
    resolved_obj["$schema"] = versioned_schema_uri(resolved_obj["$schema"])
  end

  FileUtils.mkdir_p(output_path.dirname)

  emitter = PreservingEmitter.new(comments)
  emitter.emit_file(resolved_obj, output_path)

  FileUtils.chmod(0o666, output_path)
end