Module: Canon::DiffFormatter::DiffDetailFormatterHelpers::NodeUtils

Defined in:
lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb

Overview

Node utility methods

Provides helper methods for extracting information from nodes.

Constant Summary collapse

ASCII_WHITESPACE_BYTES =

Strip only ASCII whitespace (space, tab, CR, LF) but preserve Unicode whitespace like non-breaking space (u00A0). Ruby’s String#strip removes all Unicode whitespace, which destroys meaningful content like u00A0.

Returns:

  • (String)

    String with leading/trailing ASCII whitespace removed

[32, 9, 13, 10].freeze

Class Method Summary collapse

Class Method Details

.find_all_differing_attributes(node1, node2) ⇒ Array<String>

Find all differing attributes between two nodes

Parameters:

  • node1 (Object)

    First node

  • node2 (Object)

    Second node

Returns:

  • (Array<String>)

    Array of attribute names with different values



47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 47

def self.find_all_differing_attributes(node1, node2)
  return [] unless node1 && node2

  attrs1 = get_attributes_hash(node1)
  attrs2 = get_attributes_hash(node2)

  all_keys = (attrs1.keys | attrs2.keys)

  all_keys.reject do |key|
    attrs1[key.to_s] == attrs2[key.to_s]
  end
end

.format_node_brief(node) ⇒ String

Format node briefly for display

Parameters:

  • node (Object)

    Node to format

Returns:

  • (String)

    Brief node description



250
251
252
253
254
255
256
257
258
259
260
261
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 250

def self.format_node_brief(node)
  return "" unless node

  name = get_element_name_for_display(node)
  text = get_node_text(node)

  if text && !text.empty?
    "#{name}(\"#{text}\")"
  else
    name
  end
end

.get_attribute_names(node) ⇒ Array<String>

Get attribute names from a node

Parameters:

  • node (Object)

    Node to extract attributes from

Returns:

  • (Array<String>)

    Array of attribute names



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 16

def self.get_attribute_names(node)
  return [] unless node

  attrs = if node.respond_to?(:attribute_nodes)
            node.attribute_nodes
          elsif node.respond_to?(:attributes)
            node.attributes
          elsif node.respond_to?(:[]) && node.respond_to?(:each)
            # Hash-like node
            node.keys
          else
            []
          end

  return [] unless attrs

  # Handle different attribute formats
  if attrs.is_a?(Array)
    attrs.map { |attr| attr.respond_to?(:name) ? attr.name : attr.to_s }
  elsif attrs.respond_to?(:keys)
    attrs.keys.map(&:to_s)
  else
    []
  end
end

.get_attribute_names_in_order(node) ⇒ Array<String>

Get attribute names in order from a node

Parameters:

  • node (Object)

    Node to extract from

Returns:

  • (Array<String>)

    Ordered array of attribute names



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 64

def self.get_attribute_names_in_order(node)
  return [] unless node

  attrs = if node.respond_to?(:attribute_nodes)
            node.attribute_nodes
          elsif node.respond_to?(:attributes)
            node.attributes
          else
            []
          end

  return [] unless attrs

  if attrs.is_a?(Array)
    attrs.map { |attr| attr.respond_to?(:name) ? attr.name : attr.to_s }
  else
    attrs.keys.map(&:to_s)
  end
end

.get_attribute_value(node, attr_name) ⇒ String?

Get attribute value from a node

Parameters:

  • node (Object)

    Node to extract from

  • attr_name (String)

    Attribute name

Returns:

  • (String, nil)

    Attribute value or nil



130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 130

def self.get_attribute_value(node, attr_name)
  return nil unless node && attr_name

  if node.respond_to?(:[])
    value = node[attr_name]
    if value.respond_to?(:value)
      value.value
    elsif value.respond_to?(:content)
      value.content
    elsif value.respond_to?(:to_s)
      value.to_s
    else
      value
    end
  elsif node.respond_to?(:get_attribute)
    attr = node.get_attribute(attr_name)
    attr.respond_to?(:value) ? attr.value : attr
  elsif node.respond_to?(:attribute_nodes)
    attribute_node = node.attribute_nodes.find do |attr|
      attr.name == attr_name.to_s
    end
    attribute_node&.value
  end
end

.get_attributes_hash(node) ⇒ Hash

Get attributes as a hash

Parameters:

  • node (Object)

    Node to extract from

Returns:

  • (Hash)

    Attributes hash



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 88

def self.get_attributes_hash(node)
  return {} unless node

  attrs = if node.respond_to?(:attribute_nodes)
            node.attribute_nodes
          elsif node.respond_to?(:attributes)
            node.attributes
          else
            {}
          end

  return {} unless attrs

  result = {}
  if attrs.is_a?(Array)
    attrs.each do |attr|
      name = attr.respond_to?(:name) ? attr.name : attr.to_s
      value = attr.respond_to?(:value) ? attr.value : attr.to_s
      result[name] = value
    end
  elsif attrs.respond_to?(:each)
    attrs.each do |key, val|
      name = key.to_s
      value = if val.respond_to?(:value)
                val.value
              elsif val.respond_to?(:content)
                val.content
              else
                val.to_s
              end
      result[name] = value
    end
  end

  result
end

.get_element_name_for_display(node) ⇒ String

Get element name for display

Parameters:

  • node (Object)

    Node to get name from

Returns:

  • (String)

    Element name



209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 209

def self.get_element_name_for_display(node)
  return "" unless node

  # Handle TextNode specially since it doesn't respond to :name
  if node.is_a?(Canon::Xml::Nodes::TextNode)
    return "text"
  end

  # Handle CommentNode specially since it doesn't respond to :name
  if node.is_a?(Canon::Xml::Nodes::CommentNode)
    return "comment"
  end

  if node.respond_to?(:name)
    node.name.to_s
  else
    node.class.name
  end
end

.get_namespace_uri_for_display(node) ⇒ String

Get namespace URI for display

Parameters:

  • node (Object)

    Node to get namespace from

Returns:

  • (String)

    Namespace URI



233
234
235
236
237
238
239
240
241
242
243
244
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 233

def self.get_namespace_uri_for_display(node)
  return "" unless node

  if node.respond_to?(:namespace_uri)
    node.namespace_uri.to_s
  elsif node.respond_to?(:namespace)
    ns = node.namespace
    ns.respond_to?(:href) ? ns.href.to_s : ""
  else
    ""
  end
end

.get_node_text(node) ⇒ String

Get text content from a node

Parameters:

  • node (Object)

    Node to extract from

Returns:

  • (String)

    Text content



159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 159

def self.get_node_text(node)
  return "" unless node

  text = if node.respond_to?(:text)
           node.text
         elsif node.respond_to?(:content)
           node.content
         elsif node.respond_to?(:inner_text)
           node.inner_text
         elsif node.respond_to?(:value)
           node.value
         elsif node.respond_to?(:node_info)
           node.node_info
         elsif node.respond_to?(:to_s)
           node.to_s
         else
           ""
         end

  strip_ascii_whitespace(text.to_s)
end

.inside_preserve_element?(node) ⇒ Boolean

Check if node is inside a preserve-whitespace element

Parameters:

  • node (Object)

    Node to check

Returns:

  • (Boolean)

    true if inside preserve element



325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 325

def self.inside_preserve_element?(node)
  return false unless node

  preserve_elements = %w[pre code textarea script style]

  # Check the node itself
  if node.respond_to?(:name) && preserve_elements.include?(node.name.to_s.downcase)
    return true
  end

  # Check ancestors
  current = node
  while current
    if current.respond_to?(:parent)
      current = current.parent
    elsif current.respond_to?(:parent_node)
      current = current.parent_node
    else
      break
    end

    next unless current

    if current.respond_to?(:name) && preserve_elements.include?(current.name.to_s.downcase)
      return true
    end
  end

  false
end

.node_to_display(node, compact: false) ⇒ String

Return the best display string for a node.

When compact: true and the node is a Canon ElementNode, returns a compact XML serialization (e.g. <strong>Annex</strong>) instead of the node_info description string that get_node_text would produce. In all other cases, delegates to get_node_text.

Parameters:

  • node (Object)

    Node to display

  • compact (Boolean) (defaults to: false)

    Whether to use compact XML for element nodes

Returns:

  • (String)

    Display string



313
314
315
316
317
318
319
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 313

def self.node_to_display(node, compact: false)
  if compact && node.is_a?(Canon::Xml::Nodes::ElementNode)
    serialize_node_compact(node)
  else
    get_node_text(node)
  end
end

.serialize_node_compact(node) ⇒ String

Serialize a Canon Xml node tree as compact XML for display.

Produces a human-readable inline XML string without namespace declarations and without indentation — suitable for use in Semantic Diff Report entries. Only handles Canon::Xml::Nodes types; for any other node (Nokogiri, etc.) falls back to get_node_text.

Parameters:

  • node (Object)

    Node to serialize

Returns:

  • (String)

    Compact XML string



272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 272

def self.serialize_node_compact(node)
  require "cgi"
  return "" unless node

  case node
  when Canon::Xml::Nodes::TextNode
    CGI.escapeHTML(node.value.to_s)
  when Canon::Xml::Nodes::ElementNode
    tag = node.name.to_s
    attrs = node.attribute_nodes.map do |attr|
      attr_name  = attr.respond_to?(:name)  ? attr.name.to_s  : attr.to_s
      attr_value = attr.respond_to?(:value) ? attr.value.to_s : ""
      " #{attr_name}=\"#{CGI.escapeHTML(attr_value)}\""
    end.join
    children_xml = node.children.map do |c|
      serialize_node_compact(c)
    end.join
    if children_xml.empty?
      "<#{tag}#{attrs}/>"
    else
      "<#{tag}#{attrs}>#{children_xml}</#{tag}>"
    end
  when Canon::Xml::Nodes::CommentNode
    text = node.respond_to?(:value) ? node.value.to_s : ""
    "<!--#{CGI.escapeHTML(text)}-->"
  else
    # Nokogiri nodes or other unknown types — fall back to text extraction
    get_node_text(node)
  end
end

.strip_ascii_whitespace(str) ⇒ Object



189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# File 'lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb', line 189

def self.strip_ascii_whitespace(str)
  return "" if str.nil?
  return str if str.empty?

  # Find first non-ASCII-whitespace character position
  first_pos = str.index(/[^ \t\r\n]/)
  return "" unless first_pos

  # Find last non-ASCII-whitespace character position (from end)
  # Use reverse and index, then convert back to forward position
  reversed_pos = str.reverse.index(/[^ \t\r\n]/)
  last_pos = str.length - 1 - reversed_pos

  str[first_pos..last_pos]
end