Module: Canon::Comparison::WhitespaceSensitivity

Defined in:
lib/canon/comparison/whitespace_sensitivity.rb

Overview

Whitespace sensitivity utilities for element-level control

Constant Summary collapse

HTML_COLLAPSE_ELEMENTS =

HTML mixed-content “leaf block” elements where whitespace presence matters but all forms are equivalent (CSS block whitespace collapsing).

%w[
  p li dt dd td th caption figcaption label legend summary
  h1 h2 h3 h4 h5 h6
  blockquote address button
].freeze
HTML_PRESERVE_ELEMENTS =

HTML elements where every whitespace character is significant.

%w[pre code textarea script style].freeze
INLINE_ELEMENTS =

HTML inline elements — whitespace between these is semantically significant (renders as a visible space). Whitespace-only text nodes that sit between two inline siblings must not be stripped.

%w[
  a abbr acronym b bdo big br button cite code dfn em i img input kbd
  label map object output q s samp select small span strong sub sup
  time tt u var wbr
].freeze

Class Method Summary collapse

Class Method Details

.classify_element(element, match_opts) ⇒ Object

Classify the whitespace behaviour for an element using ancestor walk.



29
30
31
32
33
34
35
36
37
38
39
# File 'lib/canon/comparison/whitespace_sensitivity.rb', line 29

def classify_element(element, match_opts)
  return :strip unless element
  return :strip unless node_name(element)

  preserve_set  = resolved_preserve_elements_set(match_opts)
  collapse_set  = resolved_collapse_elements_set(match_opts)
  strip_set = resolved_strip_elements_set(match_opts)

  walk_ancestor_classification(element, preserve_set, collapse_set,
                               strip_set)
end

.classify_text_node(node, opts) ⇒ Object

Return the whitespace class for a text node used during comparison.



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/canon/comparison/whitespace_sensitivity.rb', line 69

def classify_text_node(node, opts)
  match_opts = opts[:match_opts]
  return :strip unless match_opts
  return :strip unless text_node_parent?(node)

  parent = node_parent(node)

  unless respect_xml_space?(match_opts)
    return user_config_sensitive?(parent,
                                  match_opts) ? :preserve : :strip
  end

  return :preserve if xml_space_preserve?(parent)
  return :strip if xml_space_default?(parent)

  classify_element(parent, match_opts)
end

.contains_nbsp?(text) ⇒ Boolean

Returns:

  • (Boolean)


160
161
162
# File 'lib/canon/comparison/whitespace_sensitivity.rb', line 160

def contains_nbsp?(text)
  text.to_s.include?(" ")
end

.default_sensitive_element?(element_name, match_opts) ⇒ Boolean

Returns:

  • (Boolean)


126
127
128
129
# File 'lib/canon/comparison/whitespace_sensitivity.rb', line 126

def default_sensitive_element?(element_name, match_opts)
  format_default_preserve_elements(match_opts)
    .include?(element_name.to_sym)
end

.element_sensitive?(node, opts) ⇒ Boolean

Check if an element is whitespace-sensitive based on configuration.

Returns:

  • (Boolean)


42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/canon/comparison/whitespace_sensitivity.rb', line 42

def element_sensitive?(node, opts)
  match_opts = opts[:match_opts]
  return false unless match_opts
  return false unless text_node_parent?(node)

  parent = node_parent(node)

  unless respect_xml_space?(match_opts)
    return user_config_sensitive?(parent, match_opts)
  end

  return true if xml_space_preserve?(parent)
  return false if xml_space_default?(parent)

  classification = classify_element(parent, match_opts)
  %i[preserve collapse].include?(classification)
end

.format_default_collapse_elements(match_opts) ⇒ Object



116
117
118
119
120
121
122
123
124
# File 'lib/canon/comparison/whitespace_sensitivity.rb', line 116

def format_default_collapse_elements(match_opts)
  format = match_opts[:format] || :xml
  case format
  when :html, :html4, :html5
    HTML_COLLAPSE_ELEMENTS.map(&:to_sym).freeze
  else
    [].freeze
  end
end

.format_default_preserve_elements(match_opts) ⇒ Object



106
107
108
109
110
111
112
113
114
# File 'lib/canon/comparison/whitespace_sensitivity.rb', line 106

def format_default_preserve_elements(match_opts)
  format = match_opts[:format] || :xml
  case format
  when :html, :html4, :html5
    HTML_PRESERVE_ELEMENTS.map(&:to_sym).freeze
  else
    [].freeze
  end
end

.inline_whitespace_significant?(text_node) ⇒ Boolean

Check if whitespace-only text node sits between two inline element siblings, making the whitespace semantically significant.

Returns:

  • (Boolean)


133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/canon/comparison/whitespace_sensitivity.rb', line 133

def inline_whitespace_significant?(text_node)
  parent = NodeInspector.parent(text_node)
  return false unless parent

  siblings = NodeInspector.children(parent)
  idx = siblings.index(text_node)
  return false unless idx

  prev_neighbour = nearest_non_whitespace_sibling(siblings, idx, -1)
  next_neighbour = nearest_non_whitespace_sibling(siblings, idx,  1)

  inline_element?(prev_neighbour) && inline_element?(next_neighbour)
end

.nearest_non_whitespace_sibling(siblings, idx, direction) ⇒ Object



147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/canon/comparison/whitespace_sensitivity.rb', line 147

def nearest_non_whitespace_sibling(siblings, idx, direction)
  i = idx + direction
  while i >= 0 && i < siblings.length
    s = siblings[i]
    unless whitespace_text_node?(s)
      return s
    end

    i += direction
  end
  nil
end

.preserve_whitespace_node?(node, opts) ⇒ Boolean

Check if whitespace-only text node should be filtered

Returns:

  • (Boolean)


61
62
63
64
65
66
# File 'lib/canon/comparison/whitespace_sensitivity.rb', line 61

def preserve_whitespace_node?(node, opts)
  parent = node_parent(node)
  return false unless parent

  element_sensitive?(node, opts)
end

.resolved_collapse_elements(match_opts) ⇒ Object



102
103
104
# File 'lib/canon/comparison/whitespace_sensitivity.rb', line 102

def resolved_collapse_elements(match_opts)
  resolved_collapse_elements_set(match_opts).to_a
end

.resolved_preserve_elements(match_opts) ⇒ Object



98
99
100
# File 'lib/canon/comparison/whitespace_sensitivity.rb', line 98

def resolved_preserve_elements(match_opts)
  resolved_preserve_elements_set(match_opts).to_a
end

.whitespace_preserved?(element, match_opts) ⇒ Boolean

Check if structural whitespace is preserved (not stripped) for an element.

Returns:

  • (Boolean)


88
89
90
91
92
93
94
95
96
# File 'lib/canon/comparison/whitespace_sensitivity.rb', line 88

def whitespace_preserved?(element, match_opts)
  if respect_xml_space?(match_opts)
    return true  if xml_space_preserve?(element)
    return false if xml_space_default?(element)
  end

  classification = classify_element(element, match_opts)
  %i[preserve collapse].include?(classification)
end