Class: Rng::SchemaValidator

Inherits:
Object
  • Object
show all
Defined in:
lib/rng/schema_validator.rb

Overview

Validates raw XML against RELAX NG structural rules. Operates on the Nokogiri XML tree BEFORE Lutaml deserialization, because Lutaml silently drops unmapped content.

Constant Summary collapse

RNG_NS =
'http://relaxng.org/ns/structure/1.0'
ANNOTATIONS_NS =
'http://relaxng.org/ns/compatibility/annotations/1.0'
LEAF_ELEMENTS =
%w[empty text notAllowed ref parentRef value].freeze
OBSOLETE_ELEMENTS =
%w[not difference key keyRef].freeze
OBSOLETE_ATTRS =
%w[key keyRef global].freeze
NAME_REQUIRED =
%w[element attribute].freeze
CONTAINER_ELEMENTS =
%w[group choice interleave optional zeroOrMore oneOrMore list mixed define start].freeze
VALID_ROOT_ELEMENTS =
%w[grammar element group choice interleave notAllowed externalRef data].freeze
NO_ATTR_LEAF_ELEMENTS =
%w[empty text notAllowed].freeze
KNOWN_ATTRS =
{
  'element' => %w[name ns],
  'attribute' => %w[name ns],
  'ref' => %w[name],
  'parentRef' => %w[name],
  'define' => %w[name combine],
  'data' => %w[type datatypeLibrary combine],
  'value' => %w[type datatypeLibrary combine],
  'list' => %w[datatypeLibrary combine],
  'externalRef' => %w[href ns],
  'include' => %w[href],
  'param' => %w[name],
  'grammar' => %w[ns datatypeLibrary],
  'start' => %w[combine],
  'anyName' => %w[],
  'nsName' => %w[ns],
  'except' => %w[]
}.freeze
VALID_UNPREFIXED_ATTRS =
%w[name ns type datatypeLibrary combine href key keyRef global].freeze
GENERIC_ATTRS =
%w[name ns type datatypeLibrary combine href].freeze
ATTR_DISALLOWED =

Elements not allowed as content in attribute

%w[element attribute group interleave mixed].freeze
LIST_DISALLOWED =

Elements not allowed in list content

%w[element attribute list interleave mixed].freeze
DATA_EXCEPT_DISALLOWED =

Elements not allowed in data/except content

%w[element attribute text list interleave mixed group choice].freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(collect_all: false) ⇒ SchemaValidator

Returns a new instance of SchemaValidator.



110
111
112
113
# File 'lib/rng/schema_validator.rb', line 110

def initialize(collect_all: false)
  @errors = []
  @collect_all = collect_all
end

Instance Attribute Details

#errorsObject (readonly)

Returns the value of attribute errors.



108
109
110
# File 'lib/rng/schema_validator.rb', line 108

def errors
  @errors
end

Class Method Details

.valid?(xml_input) ⇒ Boolean

Returns:

  • (Boolean)


91
92
93
94
95
96
97
98
99
100
101
# File 'lib/rng/schema_validator.rb', line 91

def valid?(xml_input)
  doc = Nokogiri::XML(xml_input)
  root = doc.root
  return false unless root

  validator = new
  validator.validate_node(root)
  true
rescue SchemaValidationError
  false
end

.validate(xml_input) ⇒ Object



68
69
70
71
72
73
74
75
# File 'lib/rng/schema_validator.rb', line 68

def validate(xml_input)
  doc = Nokogiri::XML(xml_input)
  root = doc.root
  report_error('Document has no root element') unless root
  validator = new
  validator.validate_node(root)
  true
end

.validate_all(xml_input) ⇒ Object



77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/rng/schema_validator.rb', line 77

def validate_all(xml_input)
  doc = Nokogiri::XML(xml_input)
  root = doc.root
  return [SchemaValidationError.new('Document has no root element')] unless root

  validator = new(collect_all: true)
  begin
    validator.validate_node(root)
  rescue SchemaValidationError
    # Continue collecting
  end
  validator.errors
end

.validate_with_location(xml_input) ⇒ Object



103
104
105
# File 'lib/rng/schema_validator.rb', line 103

def validate_with_location(xml_input)
  validate_all(xml_input)
end

Instance Method Details

#report_error(message, xpath: nil, node: nil) ⇒ Object



202
203
204
205
206
207
208
209
# File 'lib/rng/schema_validator.rb', line 202

def report_error(message, xpath: nil, node: nil)
  line = node&.line if node
  error = SchemaValidationError.new(message, xpath: xpath, line: line)
  raise error unless @collect_all

  @errors << error
  nil
end

#validate_node(node, parent_context: nil) ⇒ Object



115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# File 'lib/rng/schema_validator.rb', line 115

def validate_node(node, parent_context: nil)
  return unless node.is_a?(Nokogiri::XML::Element)

  ns = node.namespace&.href
  local_name = node.name
  is_root = node.parent&.document&.root == node
  xpath = node.path.delete_prefix('/')

  # For root elements, check validity first
  if is_root
    if ns != RNG_NS && !VALID_ROOT_ELEMENTS.include?(local_name)
      report_error("Invalid root element '#{local_name}'", xpath: xpath, node: node)
      return
    end
  else
    # Skip foreign elements (non-RNG namespace) for non-root elements
    return unless ns == RNG_NS || (ns.nil? && !local_name.empty?)
    return if ns == ANNOTATIONS_NS
  end

  # ---- Existing rules ----
  report_obsolete_element(local_name, xpath)
  report_invalid_root(local_name, node, xpath)
  validate_obsolete_attrs(node, xpath)
  validate_leaf_no_children(local_name, node, xpath)
  validate_required_attrs(local_name, node, xpath)
  validate_name_attr(local_name, node, xpath)
  validate_ncname_strict(node, xpath)
  validate_datatype_library(node, xpath)
  validate_href(node, xpath)
  validate_single_except(local_name, node, xpath)
  validate_container_children(local_name, node, xpath)
  validate_name_class_and_pattern(local_name, node, xpath)
  validate_content_model(node, xpath, parent_context)
  validate_context(local_name, node, xpath)
  validate_name_value_purity(local_name, node, xpath)
  validate_xmlns_restrictions(local_name, node, xpath)
  validate_name_class_except(local_name, node, xpath)
  validate_name_conflict(local_name, node, xpath)
  validate_group_content(local_name, node, xpath)
  validate_leaf_no_attrs(local_name, node, xpath)
  validate_unknown_attrs(node, xpath)
  validate_single_attribute_pattern(local_name, node, xpath)
  validate_no_duplicate_attribute_names(local_name, node, xpath)

  # ---- New rules for spectest coverage ----
  validate_except_not_empty(local_name, node, xpath)
  validate_xmlns_in_name_class(local_name, node, xpath)
  validate_xmlns_in_anyname_attribute(local_name, node, xpath)
  validate_xmlns_in_anyname_attribute(local_name, node, xpath)
  validate_name_not_empty(local_name, node, xpath)
  validate_grammar_structure(local_name, node, xpath)
  validate_define_combine(local_name, node, xpath)
  validate_combine_consistency(local_name, node, xpath)
  validate_attribute_name_class_overlap(local_name, node, xpath)
  validate_nsname_except_rules(local_name, node, xpath)
  validate_param_for_builtin_types(local_name, node, xpath)
  validate_data_except_strict(node, xpath)
  validate_attribute_choice_content(local_name, node, xpath)
  validate_interleave_attribute_overlap(local_name, node, xpath)
  validate_interleave_name_class_overlap(local_name, node, xpath)
  validate_list_content_strict(local_name, node, xpath)
  validate_element_attribute_overlap(local_name, node, xpath)
  validate_grammar_root_element(local_name, node, xpath)
  validate_grammar_must_have_start(local_name, node, xpath)
  validate_grammar_nesting(local_name, node, xpath)
  validate_ref_resolution(local_name, node, xpath)
  validate_recursive_ref(local_name, node, xpath)
  validate_xmlns_in_name_class_choice(local_name, node, xpath)
  validate_builtin_type(local_name, node, xpath)
  validate_datatype_library_empty(local_name, node, xpath)
  validate_start_content(local_name, node, xpath)
  validate_start_element_conflicts(local_name, node, xpath)
  validate_group_text_data(local_name, node, xpath)
  validate_data_except_content_types(local_name, node, xpath)
  validate_infinite_attribute_name_class(local_name, node, xpath)
  validate_oneOrMore_attribute_overlap(local_name, node, xpath)
  validate_oneOrMore_infinite_attribute_name(local_name, node, xpath)

  # Recurse
  node.element_children.each do |child|
    next if child.namespace&.href == ANNOTATIONS_NS

    validate_node(child, parent_context: context_for_child(local_name, node))
  end
end