Class: Canon::Comparison::DiffNodeBuilder

Inherits:
Object
  • Object
show all
Defined in:
lib/canon/comparison/diff_node_builder.rb

Overview

Single factory for DiffNode creation in the DOM comparison path.

Centralises reason building, metadata enrichment (path, serialization, attributes), and whitespace visualization — previously duplicated across MarkupComparator and XmlComparator.

Class Method Summary collapse

Class Method Details

.build(node1:, node2:, diff1:, diff2:, dimension:, **_opts) ⇒ Object

Build an enriched DiffNode.

Raises:

  • (ArgumentError)


14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/canon/comparison/diff_node_builder.rb', line 14

def self.build(node1:, node2:, diff1:, diff2:, dimension:, **_opts)
  raise ArgumentError, "dimension required for DiffNode" if dimension.nil?

  reason = build_reason(node1, node2, diff1, diff2, dimension)
   = (node1, node2)

  Canon::Diff::DiffNode.new(
    node1: node1,
    node2: node2,
    dimension: dimension,
    reason: reason,
    **,
  )
end

.build_attribute_difference_reason(attrs1, attrs2) ⇒ Object

— Attribute reason builders —————————————–



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/canon/comparison/diff_node_builder.rb', line 96

def self.build_attribute_difference_reason(attrs1, attrs2)
  unless attrs1 && attrs2
    return "#{attrs1&.keys&.size || 0} vs #{attrs2&.keys&.size || 0} attributes"
  end

  keys1 = attrs1.keys.to_set
  keys2 = attrs2.keys.to_set

  only_in_first = keys1 - keys2
  only_in_second = keys2 - keys1
  different_values = (keys1 & keys2).reject { |k| attrs1[k] == attrs2[k] }

  parts = []
  parts << "only in first: #{only_in_first.to_a.sort.join(', ')}" if only_in_first.any?
  parts << "only in second: #{only_in_second.to_a.sort.join(', ')}" if only_in_second.any?
  parts << "different values: #{different_values.sort.join(', ')}" if different_values.any?

  parts.empty? ? "#{keys1.size} vs #{keys2.size} attributes (same names)" : parts.join("; ")
end

.build_attribute_order_reason(node1, node2) ⇒ Object



131
132
133
134
135
# File 'lib/canon/comparison/diff_node_builder.rb', line 131

def self.build_attribute_order_reason(node1, node2)
  keys1 = extract_attributes(node1)&.keys || []
  keys2 = extract_attributes(node2)&.keys || []
  "Attribute order changed: [#{keys1.join(', ')}] → [#{keys2.join(', ')}]"
end

.build_attribute_values_reason(node1, node2) ⇒ Object



116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/canon/comparison/diff_node_builder.rb', line 116

def self.build_attribute_values_reason(node1, node2)
  attrs1 = extract_attributes(node1) || {}
  attrs2 = extract_attributes(node2) || {}

  differing = (attrs1.keys | attrs2.keys).sort.reject do |k|
    attrs1[k.to_s] == attrs2[k.to_s]
  end

  changed = differing.map do |k|
    "Changed: #{k}=\"#{attrs1[k.to_s]}\" → \"#{attrs2[k.to_s]}\""
  end

  changed.empty? ? "attributes differ" : "Attributes differ (#{changed.join('; ')})"
end

.build_comment_difference_reason(node1, node2) ⇒ Object

— Comment reason —————————————————-



176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/canon/comparison/diff_node_builder.rb', line 176

def self.build_comment_difference_reason(node1, node2)
  cm1 = node1 && NodeInspector.comment_node?(node1)
  cm2 = node2 && NodeInspector.comment_node?(node2)

  return nil unless cm1 || cm2

  if cm1 && !cm2
    "Comment present on EXPECTED only: <!--#{truncate(comment_text(node1))}-->"
  elsif cm2 && !cm1
    "Comment present on ACTUAL only: <!--#{truncate(comment_text(node2))}-->"
  else
    t1 = truncate(comment_text(node1))
    t2 = truncate(comment_text(node2))
    "Comment text differs: <!--#{t1}--> vs <!--#{t2}-->"
  end
end

.build_reason(node1, node2, diff1, diff2, dimension) ⇒ Object

— Reason building —————————————————



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/canon/comparison/diff_node_builder.rb', line 31

def self.build_reason(node1, node2, diff1, diff2, dimension)
  # Nil-node text content with namespace info
  if dimension == :text_content && (node1.nil? || node2.nil?)
    node = node1 || node2
    if node.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node)
      ns = Canon::XmlParsing.namespace_uri(node)
      ns_info = ns.nil? || ns.empty? ? "" : " (namespace: #{ns})"
      label = Canon::Comparison.code_pair_label(diff1, diff2)
      return "element '#{node.name}'#{ns_info}: #{label}"
    end
  end

  case dimension
  when :attribute_presence
    build_attribute_difference_reason(
      extract_attributes(node1), extract_attributes(node2)
    )
  when :attribute_values
    build_attribute_values_reason(node1, node2)
  when :text_content
    build_text_difference_reason(
      extract_text_content(node1), extract_text_content(node2)
    )
  when :attribute_order
    build_attribute_order_reason(node1, node2)
  when :comments
    build_comment_difference_reason(node1,
                                    node2) || fallback_reason(diff1,
                                                              diff2, dimension, node1, node2)
  when :whitespace_adjacency
    build_whitespace_adjacency_reason(node1, node2)
  else
    fallback_reason(diff1, diff2, dimension, node1, node2)
  end
end

.build_text_difference_reason(text1, text2) ⇒ Object

— Text diff reason ————————————————–



162
163
164
165
166
167
168
169
170
171
172
# File 'lib/canon/comparison/diff_node_builder.rb', line 162

def self.build_text_difference_reason(text1, text2)
  return "missing vs '#{truncate(text2)}'" if text1.nil? && text2
  return "'#{truncate(text1)}' vs missing" if text1 && text2.nil?
  return "both missing" if text1.nil? && text2.nil?

  if whitespace_only?(text1) && whitespace_only?(text2)
    return "whitespace: #{describe_whitespace(text1)} vs #{describe_whitespace(text2)}"
  end

  "Text: \"#{visualize_whitespace(text1)}\" vs \"#{visualize_whitespace(text2)}\""
end

.build_whitespace_adjacency_reason(node1, node2) ⇒ Object

— Whitespace adjacency reason (#137) ——————————–



199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# File 'lib/canon/comparison/diff_node_builder.rb', line 199

def self.build_whitespace_adjacency_reason(node1, node2)
  text1 = extract_text_content(node1)
  text2 = extract_text_content(node2)

  ws_on_first = NodeInspector.whitespace_only_text?(node1) &&
    !NodeInspector.whitespace_only_text?(node2)
  ws_on_second = NodeInspector.whitespace_only_text?(node2) &&
    !NodeInspector.whitespace_only_text?(node1)

  unless ws_on_first || ws_on_second
    return build_text_difference_reason(text1, text2)
  end

  if ws_on_first
    build_adjacency_side(text1, text2, node1, "EXPECTED", "ACTUAL")
  else
    build_adjacency_side(text2, text1, node2, "ACTUAL", "EXPECTED")
  end
end

.comment_text(node) ⇒ Object



193
194
195
# File 'lib/canon/comparison/diff_node_builder.rb', line 193

def self.comment_text(node)
  NodeInspector.text_content(node).to_s
end

.describe_whitespace(text) ⇒ Object



228
229
230
231
232
233
234
235
236
237
238
# File 'lib/canon/comparison/diff_node_builder.rb', line 228

def self.describe_whitespace(text)
  return "0 chars" if text.nil? || text.empty?

  char_count = text.length
  parts = []
  parts << "#{text.count("\n")} newlines" if text.include?("\n")
  parts << "#{text.count(' ')} spaces" if text.include?(" ")
  parts << "#{text.count("\t")} tabs" if text.include?("\t")

  "#{char_count} chars (#{parts.join(', ')})"
end

.enrich_metadata(node1, node2) ⇒ Object

— Metadata enrichment ———————————————–



69
70
71
72
73
74
75
76
77
78
# File 'lib/canon/comparison/diff_node_builder.rb', line 69

def self.(node1, node2)
  {
    path: Canon::Diff::PathBuilder.build(node1 || node2,
                                         format: :document),
    serialized_before: serialize(node1),
    serialized_after: serialize(node2),
    attributes_before: extract_attributes(node1),
    attributes_after: extract_attributes(node2),
  }
end

.extract_attributes(node) ⇒ Object



88
89
90
91
92
# File 'lib/canon/comparison/diff_node_builder.rb', line 88

def self.extract_attributes(node)
  return nil if node.nil?

  Canon::Diff::NodeSerializer.extract_attributes(node)
end

.extract_text_content(node) ⇒ Object

— Text content extraction ——————————————-



139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/canon/comparison/diff_node_builder.rb', line 139

def self.extract_text_content(node)
  return nil if node.nil?

  case node
  when Canon::Xml::Nodes::TextNode
    node.value
  when Canon::Xml::Node
    node.text_content
  else
    if Canon::XmlBackend.nokogiri? && node.is_a?(Nokogiri::XML::Node)
      node.content.to_s
    elsif Canon::XmlParsing.xml_node?(node)
      Canon::XmlParsing.text_content(node)
    else
      node.to_s
    end
  end
rescue StandardError
  nil
end

.serialize(node) ⇒ Object

— Node queries (delegate to NodeSerializer) ————————-



82
83
84
85
86
# File 'lib/canon/comparison/diff_node_builder.rb', line 82

def self.serialize(node)
  return nil if node.nil?

  Canon::Diff::NodeSerializer.serialize(node)
end

.truncate(text, max_length = 40) ⇒ Object



246
247
248
249
250
251
252
253
# File 'lib/canon/comparison/diff_node_builder.rb', line 246

def self.truncate(text, max_length = 40)
  return "" if text.nil?

  text = text.to_s
  return text if text.length <= max_length

  "#{text[0...max_length]}..."
end

.visualize_whitespace(text) ⇒ Object

— Whitespace visualization ——————————————



221
222
223
224
225
226
# File 'lib/canon/comparison/diff_node_builder.rb', line 221

def self.visualize_whitespace(text)
  return "" if text.nil?

  viz_map = character_visualization_map
  text.chars.map { |char| viz_map[char] || char }.join
end

.whitespace_only?(text) ⇒ Boolean

Returns:

  • (Boolean)


240
241
242
243
244
# File 'lib/canon/comparison/diff_node_builder.rb', line 240

def self.whitespace_only?(text)
  return false if text.nil?

  text.to_s.strip.empty?
end