Module: Metanorma::Standoc::Maths

Includes:
Regex
Included in:
Cleanup
Defined in:
lib/metanorma/cleanup/maths.rb,
lib/metanorma/cleanup/mathvariant.rb

Constant Summary collapse

MATHML_NS =
"http://www.w3.org/1998/Math/MathML".freeze
UNITSML_NS =
"https://schema.unitsml.org/unitsml/1.0".freeze
MATHVARIANT_OVERRIDE =
{
  bold: { normal: "bold", italic: "bold-italic", fraktur: "bold-fraktur",
          script: "bold-script", "sans-serif": "bold-sans-serif",
          "sans-serif-italic": "sans-serif-bold-italic" },
  italic: { normal: "italic", bod: "bold-italic",
            "sans-serif": "sans-serif-italic",
            "bold-sans-serif": "sans-serif-bold-italic" },
  "bold-italic": { normal: "bold-italic", bold: "bold-italic",
                   italic: "bold-italic",
                   "sans-serif": "sans-serif-bold-italic",
                   "bold-sans-serif": "sans-serif-bold-italic",
                   "sans-serif-italic": "sans-serif-bold-italic" },
  fraktur: { normal: "fraktur", bold: "bold-fraktur" },
  "bold-fraktur": { normal: "bold-fraktur", fraktur: "bold-fraktur" },
  script: { normal: "script", bold: "bold-script" },
  "bold-script": { normal: "script", script: "bold-script" },
  "sans-serif": { normal: "sans-serif", bold: "bold-sans-serif",
                  italic: "sans-serif-italic",
                  "bold-italic": "sans-serif-bold-italic" },
  "bold-sans-serif": { normal: "bold-sans-serif", bold: "bold-sans-serif",
                       "sans-serif": "bold-sans-serif",
                       italic: "sans-serif-bold-italic",
                       "bold-italic": "sans-serif-bold-italic",
                       "sans-serif-italic": "sans-serif-bold-italic" },
  "sans-serif-italic": { normal: "sans-serif-italic",
                         italic: "sans-serif-italic",
                         "sans-serif": "sans-serif-italic",
                         bold: "sans-serif-bold-italic",
                         "bold-italic": "sans-serif-bold-italic",
                         "sans-serif-bold": "sans-serif-bold-italic" },
  "sans-serif-bold-italic": { normal: "sans-serif-bold-italic",
                              italic: "sans-serif-bold-italic",
                              "sans-serif": "sans-serif-bold-italic",
                              "sans-serif-italic": "sans-serif-bold-italic",
                              bold: "sans-serif-bold-italic",
                              "bold-italic": "sans-serif-bold-italic",
                              "sans-serif-bold": "sans-serif-bold-italic" },
}.freeze

Constants included from Regex

Regex::CONN_REGEX_STR, Regex::ISO_REF, Regex::ISO_REF_ALL_PARTS, Regex::ISO_REF_NO_YEAR, Regex::LOCALITIES, Regex::LOCALITY_REGEX_STR, Regex::LOCALITY_REGEX_STR_TRIPLEDASH, Regex::LOCALITY_REGEX_VALUE_ONLY_STR, Regex::NON_ISO_REF, Regex::NON_ISO_REF1, Regex::NUMERIC_REGEX, Regex::TERM_REFERENCE_RE, Regex::TERM_REFERENCE_RE_STR

Instance Method Summary collapse

Methods included from Regex

#to_regex

Instance Method Details

#add_misc_container(xmldoc) ⇒ Object



97
98
99
100
101
102
103
104
105
# File 'lib/metanorma/cleanup/maths.rb', line 97

def add_misc_container(xmldoc)
  unless ins = xmldoc.at("//metanorma-extension")
    a = xmldoc.xpath("//termdocsource")&.last || xmldoc.at("//bibdata") ||
      xmldoc.root.children.first
    a.next = "<metanorma-extension/>"
    ins = xmldoc.at("//metanorma-extension")
  end
  ins
end

#asciimath2mathml(xml) ⇒ Object



10
11
12
13
14
15
16
17
# File 'lib/metanorma/cleanup/maths.rb', line 10

def asciimath2mathml(xml)
  xpath = xml.xpath("//stem[@type = 'AsciiMath']")
  xpath.each_with_index do |x, i|
    progress_conv(i, 500, xpath.size, 1000, "AsciiMath")
    asciimath2mathml_indiv(x)
  end
  asciimath2mathml_wrap(xml)
end

#asciimath2mathml_err(text, expr) ⇒ Object



49
50
51
# File 'lib/metanorma/cleanup/maths.rb', line 49

def asciimath2mathml_err(text, expr)
  @log.add("STANDOC_6", nil, params: [expr, text])
end

#asciimath2mathml_indiv(elem) ⇒ Object



19
20
21
22
23
24
25
26
27
28
# File 'lib/metanorma/cleanup/maths.rb', line 19

def asciimath2mathml_indiv(elem)
  elem["type"] = "MathML"
  expr = @c.decode(elem.text)
  expr.strip.empty? and return
  ret = asciimath_parse(expr, elem)&.strip
  ret += "<asciimath>#{@c.encode(expr, :basic)}</asciimath>"
  elem.children = ret
rescue StandardError => e
  asciimath2mathml_err(elem.to_xml, e)
end

#asciimath2mathml_wrap(xml) ⇒ Object



53
54
55
56
57
58
59
60
61
# File 'lib/metanorma/cleanup/maths.rb', line 53

def asciimath2mathml_wrap(xml)
  xml.xpath("//*[local-name() = 'math'][@display]").each do |y|
    y.delete("display")
  end
  # x.xpath("//stem").each do |y|
  # y.next_element&.name == "asciimath" and y << y.next_element
  # end
  xml
end

#asciimath2unitsml_optionsObject



126
127
128
# File 'lib/metanorma/cleanup/maths.rb', line 126

def asciimath2unitsml_options
  { multiplier: :space }
end

#asciimath_cleanup(xml) ⇒ Object



6
7
8
# File 'lib/metanorma/cleanup/maths.rb', line 6

def asciimath_cleanup(xml)
  !@keepasciimath and asciimath2mathml(xml)
end

#asciimath_parse(expr, elem) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/metanorma/cleanup/maths.rb', line 32

def asciimath_parse(expr, elem)
  if NUMERIC_REGEX.match?(expr)
    @novalid or elem["validate"] = "false"
    <<~MATH
      <math xmlns='#{MATHML_NS}'><mstyle displaystyle='false'><mn>#{expr}</mn></mstyle></math>
    MATH
  else
    expr.strip.empty? and return
    unitsml = if expr.include?("unitsml")
                { unitsml: { xml: true, multiplier: :space } }
              else {}
              end
    Plurimath::Math.parse(expr, "asciimath")
      .to_mathml(**{ display_style: elem["block"] }.merge(unitsml))
  end
end

#gather_unitsml(unitsml, xmldoc, tag) ⇒ Object



116
117
118
119
120
121
122
123
124
# File 'lib/metanorma/cleanup/maths.rb', line 116

def gather_unitsml(unitsml, xmldoc, tag)
  tags = xmldoc.xpath(".//m:#{tag}", "m" => UNITSML_NS)
    .each_with_object({}) do |x, m|
    m[x["id"]] = x.remove
  end
  tags.empty? and return
  set = unitsml.add_child("<#{tag}Set/>").first
  tags.each_value { |v| set << v }
end

#mathml_cleanup(xmldoc) ⇒ Object



182
183
184
185
186
187
188
189
190
191
192
# File 'lib/metanorma/cleanup/maths.rb', line 182

def mathml_cleanup(xmldoc)
  mathml_number_to_mathml(xmldoc)
  xmldoc.xpath("//stem[@type = 'MathML'][not(@validate = 'false')]")
    .each do |x|
    mathml_xml_cleanup(x)
    mathml_mathvariant(x)
  end
  xmldoc.xpath("//stem[@type = 'MathML']")
    .each { |x| mathml_number_format(x) }
  mathml_unitsml(xmldoc)
end

#mathml_italicise(xml) ⇒ Object

presuppose multichar mi upright, singlechar mi MathML default italic



10
11
12
13
14
15
16
# File 'lib/metanorma/cleanup/mathvariant.rb', line 10

def mathml_italicise(xml)
  xml.xpath(".//m:mi[not(ancestor::*[@mathvariant])]",
            "m" => MATHML_NS).each do |i|
    char = @c.decode(i.text)
    i["mathvariant"] = "normal" if mi_italicise?(char)
  end
end

#mathml_mathvariant(math) ⇒ Object



77
78
79
80
81
82
83
84
85
# File 'lib/metanorma/cleanup/mathvariant.rb', line 77

def mathml_mathvariant(math)
  math.xpath(".//*[@mathvariant]").each do |outer|
    outer.xpath(".//*[@mathvariant]").each do |inner|
      inner["mathvariant"] =
        mathvariant_override(inner["mathvariant"], outer["mathvariant"])
    end
  end
  mathml_italicise(math)
end

#mathml_mi_italicsObject



4
5
6
7
# File 'lib/metanorma/cleanup/mathvariant.rb', line 4

def mathml_mi_italics
  { uppergreek: true, upperroman: true,
    lowergreek: true, lowerroman: true }
end

#mathml_mn_format(math) ⇒ Object



130
131
132
133
134
135
136
137
138
139
140
# File 'lib/metanorma/cleanup/maths.rb', line 130

def mathml_mn_format(math)
  math["number-format"] or return
  math.xpath(".//m:mn", "m" => MATHML_NS).each do |m|
    profile = mathml_mn_profile(m)
    attr = profile.each_with_object([]) do |(k, v), acc|
      v == "nil" and next
      acc << "#{k}='#{@c.decode v}'"
    end.join(",")
    attr.empty? or m["data-metanorma-numberformat"] = attr
  end
end

#mathml_mn_profile(mnum) ⇒ Object



142
143
144
145
146
147
148
149
150
151
# File 'lib/metanorma/cleanup/maths.rb', line 142

def mathml_mn_profile(mnum)
  fmt = @numberfmt_default&.dup || {}
  fmt1 = {}
  fmt2 = kv_parse(mnum["data-metanorma-numberformat"] || "")
  if fmt2["profile"]
    fmt1 = @numberfmt_prof[fmt2["profile"]] || {}
    fmt2.delete("profile")
  end
  fmt.merge(fmt1).merge(fmt2)
end

#mathml_namespace(stem) ⇒ Object



89
90
91
92
93
# File 'lib/metanorma/cleanup/maths.rb', line 89

def mathml_namespace(stem)
  stem.xpath("./*[local-name() = 'math']").each do |x|
    x.default_namespace = MATHML_NS
  end
end

#mathml_number_format(stem) ⇒ Object



176
177
178
179
180
# File 'lib/metanorma/cleanup/maths.rb', line 176

def mathml_number_format(stem)
  mathml_stem_format(stem)
  mathml_mn_format(stem)
  stem.delete("number-format")
end

#mathml_number_to_mathml(xmldoc) ⇒ Object



194
195
196
197
198
199
200
201
# File 'lib/metanorma/cleanup/maths.rb', line 194

def mathml_number_to_mathml(xmldoc)
  xmldoc.xpath("//mathml-number").each do |n|
    n.name = "stem"
    n["type"] = "MathML"
    n.children =
      "<math xmlns='#{MATHML_NS}'><mn>#{n.children.to_xml}</mn></math>"
  end
end

#mathml_preserve_space(math) ⇒ Object



82
83
84
85
86
87
# File 'lib/metanorma/cleanup/maths.rb', line 82

def mathml_preserve_space(math)
  math.xpath(".//m:mtext", "m" => MATHML_NS).each do |x|
    x.children = x.children.to_xml
      .gsub(/^\s/, "&#xA0;").gsub(/\s$/, "&#xA0;")
  end
end

#mathml_stem_format(stem) ⇒ Object



153
154
155
156
157
158
159
160
161
162
# File 'lib/metanorma/cleanup/maths.rb', line 153

def mathml_stem_format(stem)
  f = mathml_stem_format_attr(stem) or return
  attr = quoted_csv_split(f, ",").map do |x|
    m = /^(.+?)=(.+)?$/.match(x) or next
    "#{m[1]}='#{@c.decode m[2]}'"
  end.join(",")
  stem.xpath(".//m:mn", "m" => MATHML_NS).each do |m|
    attr.empty? or m["data-metanorma-numberformat"] = attr
  end
end

#mathml_stem_format_attr(stem) ⇒ Object



164
165
166
167
168
169
170
171
172
173
174
# File 'lib/metanorma/cleanup/maths.rb', line 164

def mathml_stem_format_attr(stem)
  f = stem["number-format"] || @numberfmt_formula or return
  if f == "nil"
    stem.delete("number-format")
    return
  end
  f == "default" or return f
  if @numberfmt_default.empty? then "notation='basic'"
  else @numberfmt_default&.map { |k, v| "#{k}='#{v}'" }&.join(",")
  end
end

#mathml_unitsml(xmldoc) ⇒ Object



107
108
109
110
111
112
113
114
# File 'lib/metanorma/cleanup/maths.rb', line 107

def mathml_unitsml(xmldoc)
  xmldoc.at(".//m:*", "m" => UNITSML_NS) or return
  misc = add_misc_container(xmldoc)
  unitsml = misc.add_child("<UnitsML xmlns='#{UNITSML_NS}'/>").first
  %w(Unit CountedItem Quantity Dimension Prefix).each do |t|
    gather_unitsml(unitsml, xmldoc, t)
  end
end

#mathml_xml_cleanup(stem) ⇒ Object



63
64
65
66
67
# File 'lib/metanorma/cleanup/maths.rb', line 63

def mathml_xml_cleanup(stem)
  xml_unescape_mathml(stem)
  mathml_namespace(stem)
  mathml_preserve_space(stem)
end

#mathvariant_override(inner, outer) ⇒ Object



70
71
72
73
74
75
# File 'lib/metanorma/cleanup/mathvariant.rb', line 70

def mathvariant_override(inner, outer)
  o = outer.to_sym
  i = inner.to_sym
  MATHVARIANT_OVERRIDE[o] or return inner
  MATHVARIANT_OVERRIDE[o][i] || inner
end

#mi_italicise?(char) ⇒ Boolean

Returns:

  • (Boolean)


18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/metanorma/cleanup/mathvariant.rb', line 18

def mi_italicise?(char)
  char.length > 1 and return false
  case char
  when /\p{Greek}/
    (/\p{Lower}/.match(char) && !mathml_mi_italics[:lowergreek]) ||
      (/\p{Upper}/.match(char) && !mathml_mi_italics[:uppergreek])
  when /\p{Latin}/
    (/\p{Lower}/.match(char) && !mathml_mi_italics[:lowerroman]) ||
      (/\p{Upper}/.match(char) && !mathml_mi_italics[:upperroman])
  else false
  end
end

#progress_conv(idx, step, total, threshold, msg) ⇒ Object



69
70
71
72
# File 'lib/metanorma/cleanup/maths.rb', line 69

def progress_conv(idx, step, total, threshold, msg)
  (idx % step).zero? && total > threshold && idx.positive? or return
  warn "#{msg} #{idx} of #{total}"
end

#xml_unescape_mathml(xml) ⇒ Object



74
75
76
77
78
79
80
# File 'lib/metanorma/cleanup/maths.rb', line 74

def xml_unescape_mathml(xml)
  xml.children.any?(&:element?) and return
  math = xml.text.gsub("&lt;", "<").gsub("&gt;", ">")
    .gsub("&quot;", '"').gsub("&apos;", "'").gsub("&amp;", "&")
    .gsub(/<[^: \r\n\t\/]+:/, "<").gsub(/<\/[^ \r\n\t:]+:/, "</")
  xml.children = math
end