Module: Canon::DiffFormatter::DiffDetailFormatterHelpers::NodeUtils

Defined in:
lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb

Overview

Node utility methods

Provides helper methods for extracting information from nodes.

Constant Summary collapse

ASCII_WHITESPACE_BYTES =

Strip only ASCII whitespace (space, tab, CR, LF) but preserve Unicode whitespace like non-breaking space (u00A0). Ruby’s String#strip removes all Unicode whitespace, which destroys meaningful content like u00A0.

Returns:

  • (String)

    String with leading/trailing ASCII whitespace removed

[32, 9, 13, 10].freeze

Class Method Summary collapse

Class Method Details

.find_all_differing_attributes(node1, node2) ⇒ Array<String>

Find all differing attributes between two nodes

Parameters:

  • node1 (Object)

    First node

  • node2 (Object)

    Second node

Returns:

  • (Array<String>)

    Array of attribute names with different values



48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 48

def self.find_all_differing_attributes(node1, node2)
  return [] unless node1 && node2

  attrs1 = get_attributes_hash(node1)
  attrs2 = get_attributes_hash(node2)

  all_keys = (attrs1.keys | attrs2.keys)

  all_keys.reject do |key|
    attrs1[key.to_s] == attrs2[key.to_s]
  end
end

.format_node_brief(node) ⇒ String

Format node briefly for display

Parameters:

  • node (Object)

    Node to format

Returns:

  • (String)

    Brief node description



251
252
253
254
255
256
257
258
259
260
261
262
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 251

def self.format_node_brief(node)
  return "" unless node

  name = get_element_name_for_display(node)
  text = get_node_text(node)

  if text && !text.empty?
    "#{name}(\"#{text}\")"
  else
    name
  end
end

.get_attribute_names(node) ⇒ Array<String>

Get attribute names from a node

Parameters:

  • node (Object)

    Node to extract attributes from

Returns:

  • (Array<String>)

    Array of attribute names



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 17

def self.get_attribute_names(node)
  return [] unless node

  attrs = if node.respond_to?(:attribute_nodes)
            node.attribute_nodes
          elsif node.respond_to?(:attributes)
            node.attributes
          elsif node.respond_to?(:[]) && node.respond_to?(:each)
            # Hash-like node
            node.keys
          else
            []
          end

  return [] unless attrs

  # Handle different attribute formats
  if attrs.is_a?(Array)
    attrs.map { |attr| attr.respond_to?(:name) ? attr.name : attr.to_s }
  elsif attrs.respond_to?(:keys)
    attrs.keys.map(&:to_s)
  else
    []
  end
end

.get_attribute_names_in_order(node) ⇒ Array<String>

Get attribute names in order from a node

Parameters:

  • node (Object)

    Node to extract from

Returns:

  • (Array<String>)

    Ordered array of attribute names



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 65

def self.get_attribute_names_in_order(node)
  return [] unless node

  attrs = if node.respond_to?(:attribute_nodes)
            node.attribute_nodes
          elsif node.respond_to?(:attributes)
            node.attributes
          else
            []
          end

  return [] unless attrs

  if attrs.is_a?(Array)
    attrs.map { |attr| attr.respond_to?(:name) ? attr.name : attr.to_s }
  else
    attrs.keys.map(&:to_s)
  end
end

.get_attribute_value(node, attr_name) ⇒ String?

Get attribute value from a node

Parameters:

  • node (Object)

    Node to extract from

  • attr_name (String)

    Attribute name

Returns:

  • (String, nil)

    Attribute value or nil



131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 131

def self.get_attribute_value(node, attr_name)
  return nil unless node && attr_name

  if node.respond_to?(:[])
    value = node[attr_name]
    if value.respond_to?(:value)
      value.value
    elsif value.respond_to?(:content)
      value.content
    elsif value.respond_to?(:to_s)
      value.to_s
    else
      value
    end
  elsif node.respond_to?(:get_attribute)
    attr = node.get_attribute(attr_name)
    attr.respond_to?(:value) ? attr.value : attr
  elsif node.respond_to?(:attribute_nodes)
    attribute_node = node.attribute_nodes.find do |attr|
      attr.name == attr_name.to_s
    end
    attribute_node&.value
  end
end

.get_attributes_hash(node) ⇒ Hash

Get attributes as a hash

Parameters:

  • node (Object)

    Node to extract from

Returns:

  • (Hash)

    Attributes hash



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 89

def self.get_attributes_hash(node)
  return {} unless node

  attrs = if node.respond_to?(:attribute_nodes)
            node.attribute_nodes
          elsif node.respond_to?(:attributes)
            node.attributes
          else
            {}
          end

  return {} unless attrs

  result = {}
  if attrs.is_a?(Array)
    attrs.each do |attr|
      name = attr.respond_to?(:name) ? attr.name : attr.to_s
      value = attr.respond_to?(:value) ? attr.value : attr.to_s
      result[name] = value
    end
  elsif attrs.respond_to?(:each)
    attrs.each do |key, val|
      name = key.to_s
      value = if val.respond_to?(:value)
                val.value
              elsif val.respond_to?(:content)
                val.content
              else
                val.to_s
              end
      result[name] = value
    end
  end

  result
end

.get_element_name_for_display(node) ⇒ String

Get element name for display

Parameters:

  • node (Object)

    Node to get name from

Returns:

  • (String)

    Element name



210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 210

def self.get_element_name_for_display(node)
  return "" unless node

  # Handle TextNode specially since it doesn't respond to :name
  if node.is_a?(Canon::Xml::Nodes::TextNode)
    return "text"
  end

  # Handle CommentNode specially since it doesn't respond to :name
  if node.is_a?(Canon::Xml::Nodes::CommentNode)
    return "comment"
  end

  if node.respond_to?(:name)
    node.name.to_s
  else
    node.class.name
  end
end

.get_namespace_uri_for_display(node) ⇒ String

Get namespace URI for display

Parameters:

  • node (Object)

    Node to get namespace from

Returns:

  • (String)

    Namespace URI



234
235
236
237
238
239
240
241
242
243
244
245
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 234

def self.get_namespace_uri_for_display(node)
  return "" unless node

  if node.respond_to?(:namespace_uri)
    node.namespace_uri.to_s
  elsif node.respond_to?(:namespace)
    ns = node.namespace
    ns.respond_to?(:href) ? ns.href.to_s : ""
  else
    ""
  end
end

.get_node_text(node) ⇒ String

Get text content from a node

Parameters:

  • node (Object)

    Node to extract from

Returns:

  • (String)

    Text content



160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 160

def self.get_node_text(node)
  return "" unless node

  text = if node.respond_to?(:text)
           node.text
         elsif node.respond_to?(:content)
           node.content
         elsif node.respond_to?(:inner_text)
           node.inner_text
         elsif node.respond_to?(:value)
           node.value
         elsif node.respond_to?(:node_info)
           node.node_info
         elsif node.respond_to?(:to_s)
           node.to_s
         else
           ""
         end

  strip_ascii_whitespace(text.to_s)
end

.inside_preserve_element?(node) ⇒ Boolean

Check if node is inside a preserve-whitespace element

Parameters:

  • node (Object)

    Node to check

Returns:

  • (Boolean)

    true if inside preserve element



413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 413

def self.inside_preserve_element?(node)
  return false unless node

  preserve_elements = %w[pre code textarea script style]

  # Check the node itself
  if node.respond_to?(:name) && preserve_elements.include?(node.name.to_s.downcase)
    return true
  end

  # Check ancestors
  current = node
  while current
    if current.respond_to?(:parent)
      current = current.parent
    elsif current.respond_to?(:parent_node)
      current = current.parent_node
    else
      break
    end

    next unless current

    if current.respond_to?(:name) && preserve_elements.include?(current.name.to_s.downcase)
      return true
    end
  end

  false
end

.node_to_display(node, compact: false) ⇒ String

Return the best display string for a node.

When compact: true and the node is a Canon ElementNode, returns a compact XML serialization (e.g. <strong>Annex</strong>) instead of the node_info description string that get_node_text would produce. In all other cases, delegates to get_node_text.

Parameters:

  • node (Object)

    Node to display

  • compact (Boolean) (defaults to: false)

    Whether to use compact XML for element nodes

Returns:

  • (String)

    Display string



384
385
386
387
388
389
390
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 384

def self.node_to_display(node, compact: false)
  if compact && node.is_a?(Canon::Xml::Nodes::ElementNode)
    serialize_node_compact(node)
  else
    get_node_text(node)
  end
end

.parent_of(node) ⇒ Object?

Return the parent of a node, or nil, regardless of the node API.

Canon::Xml nodes expose parent; some Nokogiri-shaped nodes expose parent_node. This helper abstracts over both.

Parameters:

  • node (Object)

    Node to query

Returns:

  • (Object, nil)

    Parent node or nil



399
400
401
402
403
404
405
406
407
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 399

def self.parent_of(node)
  return nil unless node

  if node.respond_to?(:parent)
    node.parent
  elsif node.respond_to?(:parent_node)
    node.parent_node
  end
end

.raw_text_value(node) ⇒ String

Return the raw text content of a text node without stripping whitespace. get_node_text strips ASCII whitespace, which destroys whitespace-only payloads that callers (e.g. one-sided text-content diff rendering) need to display verbatim.

Parameters:

  • node (Object)

    Text node

Returns:

  • (String)

    Raw text content, or “” if not a text-bearing node



361
362
363
364
365
366
367
368
369
370
371
372
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 361

def self.raw_text_value(node)
  return "" unless node

  case node
  when Canon::Xml::Node
    node.value.to_s
  when Nokogiri::XML::Node
    node.content.to_s
  else
    ""
  end
end

.serialize_node_compact(node) ⇒ String

Serialize a node tree as compact XML for display.

Produces a human-readable inline XML string without namespace declarations and without indentation — suitable for use in Semantic Diff Report entries. Handles both Canon::Xml::Nodes types and Nokogiri XML/HTML nodes (the html DOM comparison path uses Nokogiri nodes, so element-structure diffs originating there must be rendered structurally too — see issue #120). For any other node type, falls back to get_node_text.

Parameters:

  • node (Object)

    Node to serialize

Returns:

  • (String)

    Compact XML string



276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 276

def self.serialize_node_compact(node)
  require "cgi"
  return "" unless node

  case node
  when Canon::Xml::Nodes::TextNode
    CGI.escapeHTML(node.value.to_s)
  when Canon::Xml::Nodes::ElementNode
    tag = node.name.to_s
    attrs = node.attribute_nodes.map do |attr|
      attr_name  = attr.respond_to?(:name)  ? attr.name.to_s  : attr.to_s
      attr_value = attr.respond_to?(:value) ? attr.value.to_s : ""
      " #{attr_name}=\"#{CGI.escapeHTML(attr_value)}\""
    end.join
    children_xml = node.children.map do |c|
      serialize_node_compact(c)
    end.join
    if children_xml.empty?
      "<#{tag}#{attrs}/>"
    else
      "<#{tag}#{attrs}>#{children_xml}</#{tag}>"
    end
  when Canon::Xml::Nodes::CommentNode
    text = node.respond_to?(:value) ? node.value.to_s : ""
    "<!--#{CGI.escapeHTML(text)}-->"
  when Nokogiri::XML::Text, Nokogiri::XML::CDATA
    CGI.escapeHTML(node.content.to_s)
  when Nokogiri::XML::Comment
    "<!--#{CGI.escapeHTML(node.content.to_s)}-->"
  when Nokogiri::XML::Element
    tag = node.name.to_s
    attrs = node.attribute_nodes.map do |a|
      " #{a.name}=\"#{CGI.escapeHTML(a.value.to_s)}\""
    end.join
    children_xml = node.children.map do |c|
      serialize_node_compact(c)
    end.join
    if children_xml.empty?
      "<#{tag}#{attrs}/>"
    else
      "<#{tag}#{attrs}>#{children_xml}</#{tag}>"
    end
  else
    # Unknown node types — fall back to text extraction
    get_node_text(node)
  end
end

.serialize_open_tag(node) ⇒ String

Serialize a node’s open tag only — name + attributes, no children, no closing tag. Used by format_text_content_one_sided to render a brief parent-element context hint (e.g. <div id=“A”>) for a one-sided text diff, instead of the full ancestor subtree that serialize_node_compact would produce. See lutaml/canon#125.

Parameters:

  • node (Object)

    Element node to serialize

Returns:

  • (String)

    Open-tag string, or “” for non-elements / nil



332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 332

def self.serialize_open_tag(node)
  require "cgi"
  return "" unless node

  case node
  when Canon::Xml::Nodes::ElementNode
    tag = node.name.to_s
    attrs = node.attribute_nodes.map do |attr|
      " #{attr.name}=\"#{CGI.escapeHTML(attr.value.to_s)}\""
    end.join
    "<#{tag}#{attrs}>"
  when Nokogiri::XML::Element
    tag = node.name.to_s
    attrs = node.attribute_nodes.map do |a|
      " #{a.name}=\"#{CGI.escapeHTML(a.value.to_s)}\""
    end.join
    "<#{tag}#{attrs}>"
  else
    ""
  end
end

.strip_ascii_whitespace(str) ⇒ Object



190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 190

def self.strip_ascii_whitespace(str)
  return "" if str.nil?
  return str if str.empty?

  # Find first non-ASCII-whitespace character position
  first_pos = str.index(/[^ \t\r\n]/)
  return "" unless first_pos

  # Find last non-ASCII-whitespace character position (from end)
  # Use reverse and index, then convert back to forward position
  reversed_pos = str.reverse.index(/[^ \t\r\n]/)
  last_pos = str.length - 1 - reversed_pos

  str[first_pos..last_pos]
end