Module: Metanorma::Standoc::Validate::Schema

Includes:
Utils
Included in:
Metanorma::Standoc::Validate
Defined in:
lib/metanorma/validate/schema.rb

Constant Summary collapse

SVG_NS =
"http://www.w3.org/2000/svg".freeze
WILDCARD_ATTRS =
"//stem | //metanorma-extension".freeze

Constants included from Utils

Utils::SECTION_CONTAINERS, Utils::SUBCLAUSE_XPATH

Instance Method Summary collapse

Methods included from Utils

#add_id, #add_id_text, #add_noko_elem, adoc2xml, #asciimath_key, #attr_code, #complete_and_compare_dates, #complete_iso_date, #complete_year_month, #complete_year_only, #convert, #csv_split, #dl_to_attrs, #dl_to_elems, #document_ns_attributes, #grkletters, #insert_before, #isodoc, #isolated_asciidoctor_convert, #kv_parse, #link_unwrap, #noko, #parse_complete_date, #parse_partial_date, #processor, #quoted_csv_split, #refid?, #section_containers, #separate_numbering_footnotes, #term_expr, #textcleanup, #to_xml, #wrap_in_para, #xml_encode

Instance Method Details

#add_ns_to_fragment(xml_fragment) ⇒ Object



96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/metanorma/validate/schema.rb', line 96

def add_ns_to_fragment(xml_fragment)
  f = Nokogiri::XML(xml_fragment, &:strict)
  f.errors.any? || f.root.nil? and return nil
  root_tag = f.root.name
  f.root.namespace or
    f = Nokogiri::XML(xml_fragment
    .sub(/<#{root_tag}([^>]*)>/,
         "<#{root_tag}\\1 xmlns='#{@conv.xml_namespace}'>"))
  f
rescue StandardError
  nil
end

#formattedstr_strip(doc) ⇒ Object

RelaxNG cannot cope well with wildcard attributes. So we strip any attributes from FormattedString instances (which can contain xs:any markup, and are signalled with @format) before validation.



133
134
135
136
137
138
139
140
141
142
143
# File 'lib/metanorma/validate/schema.rb', line 133

def formattedstr_strip(doc)
  doc.xpath(WILDCARD_ATTRS, "m" => SVG_NS).each do |n|
    n.elements.each do |e|
      e.traverse do |e1|
        e1.element? and e1.each { |k, _v| e1.delete(k) } # rubocop:disable Style/HashEachMethods
      end
    end
  end
  doc.xpath("//m:svg", "m" => SVG_NS).each { |n| n.replace("<svg/>") }
  doc
end

#fragment_schema(root_element) ⇒ Object



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/metanorma/validate/schema.rb', line 109

def fragment_schema(root_element)
  temp_schema = Tempfile.new(["dynamic_schema", ".rng"])
  temp_schema.write(<<~SCHEMA)
            <grammar xmlns="http://relaxng.org/ns/structure/1.0">
      <include href="#{schema_location}">
        <start combine="choice">
            <ref name="#{root_element}"/>
        </start>
    </include>
            </grammar>
  SCHEMA
  temp_schema.close
  [temp_schema, Nokogiri::XML::RelaxNG(File.open(temp_schema.path))]
rescue StandardError # error because root_element is not in schema
  [temp_schema, nil]
end

#schema_fileObject



14
15
16
# File 'lib/metanorma/validate/schema.rb', line 14

def schema_file
  "isodoc-compile.rng"
end

#schema_locationObject



7
8
9
10
11
12
# File 'lib/metanorma/validate/schema.rb', line 7

def schema_location
  self.class.respond_to?(:_file) and ret = self.class::_file
  ret ||= caller_locations(1..1).first.absolute_path
  ret ||= __FILE__
  File.join(File.dirname(ret), schema_file)
end

#schema_validate(doc, schema) ⇒ Object



18
19
20
21
22
23
24
25
26
# File 'lib/metanorma/validate/schema.rb', line 18

def schema_validate(doc, schema)
  Tempfile.open(["tmp", ".xml"], encoding: "UTF-8") do |f|
    schema_validate1(f, doc, schema)
  rescue Jing::Error => e
    @conv.clean_abort("Jing failed with error: #{e}", doc)
  ensure
    f.close!
  end
end

#schema_validate1(file, doc, schema) ⇒ Object

Force UTF-8 encoding for Java console output to fix Japanese Windows issue See: github.com/metanorma/mn-samples-plateau/issues/248 The -Dsun.jnu.encoding parameter controls Java’s native interface encoding (console I/O)



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/metanorma/validate/schema.rb', line 33

def schema_validate1(file, doc, schema)
  file.write(to_xml(doc))
  file.close
  old_java_opts = ENV["_JAVA_OPTIONS"]
  ENV["_JAVA_OPTIONS"] =
    "-Dfile.encoding=UTF-8 -Dsun.jnu.encoding=UTF-8"
  begin
    errors = schema_validate_with_retry(schema, file.path)
    warn "Syntax Valid!" if errors.none?
    errors.each do |e|
      @log.add("STANDOC_7",
               "XML Line #{'%06d' % e[:line]}:#{e[:column]}",
               params: [e[:message]])
    end
  ensure
    # Restore original _JAVA_OPTIONS
    ENV["_JAVA_OPTIONS"] = old_java_opts
  end
end

#schema_validate_with_retry(schema, file_path, max_retries: 3) ⇒ Object

Retry Jing validation with exponential backoff to handle “Too many open files” errors. This can occur when validating large documents or when multiple validations happen in quick succession, exhausting the system’s file descriptor limit. Java’s Jing validator opens multiple file handles for the JAR, schema, and XML files, # and the OS may not clean them up fast enough.



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/metanorma/validate/schema.rb', line 60

def schema_validate_with_retry(schema, file_path, max_retries: 3)
  retries = 0
  begin
    Jing.new(schema, encoding: "UTF-8").validate(file_path)
  rescue Jing::ExecutionError => e
    # Check if this is a "Too many open files" error
    if e.message.include?("Too many open files") && retries < max_retries
      retries += 1
      delay = 0.1 * (2**(retries - 1)) # Exponential backoff: 0.1s, 0.2s, 0.4s
      warn "Jing validation encountered 'Too many open files' error. " \
           "Retrying (attempt #{retries}/#{max_retries}) after #{delay}s delay..."
      sleep(delay)
      retry
    else
      # Re-raise if not a file descriptor issue or max retries exceeded
      raise
    end
  end
end

#validate_document_fragment(xml_fragment) ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/metanorma/validate/schema.rb', line 80

def validate_document_fragment(xml_fragment)
  f = add_ns_to_fragment(xml_fragment) or
    return [true,
            "Fragment is not well-formed XML, not validating"]
  begin
    temp_schema, schema = fragment_schema(f.root.name)
    schema or return [false, "Did not expect element #{f.root.name}"]
    validation_errors = schema.validate(f)
    [validation_errors.none? do |x|
      x.to_s.include?("Did not expect element")
    end, validation_errors]
  ensure
    temp_schema.unlink
  end
end