Module: Metanorma::Standoc::Maths
- Includes:
- Regex
- Included in:
- Cleanup
- Defined in:
- lib/metanorma/cleanup/maths.rb,
lib/metanorma/cleanup/mathvariant.rb
Constant Summary collapse
- MATHML_NS =
"http://www.w3.org/1998/Math/MathML".freeze
- UNITSML_NS =
"https://schema.unitsml.org/unitsml/1.0".freeze
- MATHVARIANT_OVERRIDE =
{ bold: { normal: "bold", italic: "bold-italic", fraktur: "bold-fraktur", script: "bold-script", "sans-serif": "bold-sans-serif", "sans-serif-italic": "sans-serif-bold-italic" }, italic: { normal: "italic", bod: "bold-italic", "sans-serif": "sans-serif-italic", "bold-sans-serif": "sans-serif-bold-italic" }, "bold-italic": { normal: "bold-italic", bold: "bold-italic", italic: "bold-italic", "sans-serif": "sans-serif-bold-italic", "bold-sans-serif": "sans-serif-bold-italic", "sans-serif-italic": "sans-serif-bold-italic" }, fraktur: { normal: "fraktur", bold: "bold-fraktur" }, "bold-fraktur": { normal: "bold-fraktur", fraktur: "bold-fraktur" }, script: { normal: "script", bold: "bold-script" }, "bold-script": { normal: "script", script: "bold-script" }, "sans-serif": { normal: "sans-serif", bold: "bold-sans-serif", italic: "sans-serif-italic", "bold-italic": "sans-serif-bold-italic" }, "bold-sans-serif": { normal: "bold-sans-serif", bold: "bold-sans-serif", "sans-serif": "bold-sans-serif", italic: "sans-serif-bold-italic", "bold-italic": "sans-serif-bold-italic", "sans-serif-italic": "sans-serif-bold-italic" }, "sans-serif-italic": { normal: "sans-serif-italic", italic: "sans-serif-italic", "sans-serif": "sans-serif-italic", bold: "sans-serif-bold-italic", "bold-italic": "sans-serif-bold-italic", "sans-serif-bold": "sans-serif-bold-italic" }, "sans-serif-bold-italic": { normal: "sans-serif-bold-italic", italic: "sans-serif-bold-italic", "sans-serif": "sans-serif-bold-italic", "sans-serif-italic": "sans-serif-bold-italic", bold: "sans-serif-bold-italic", "bold-italic": "sans-serif-bold-italic", "sans-serif-bold": "sans-serif-bold-italic" }, }.freeze
Constants included from Regex
Regex::CONN_REGEX_STR, Regex::ISO_REF, Regex::ISO_REF_ALL_PARTS, Regex::ISO_REF_NO_YEAR, Regex::LOCALITIES, Regex::LOCALITY_REGEX_STR, Regex::LOCALITY_REGEX_STR_TRIPLEDASH, Regex::LOCALITY_REGEX_VALUE_ONLY_STR, Regex::NON_ISO_REF, Regex::NON_ISO_REF1, Regex::NUMERIC_REGEX, Regex::TERM_REFERENCE_RE, Regex::TERM_REFERENCE_RE_STR
Instance Method Summary collapse
- #add_misc_container(xmldoc) ⇒ Object
- #asciimath2mathml(xml) ⇒ Object
- #asciimath2mathml_err(text, expr) ⇒ Object
- #asciimath2mathml_indiv(elem) ⇒ Object
- #asciimath2mathml_wrap(xml) ⇒ Object
- #asciimath2unitsml_options ⇒ Object
- #asciimath_cleanup(xml) ⇒ Object
- #asciimath_parse(expr, elem) ⇒ Object
- #gather_unitsml(unitsml, xmldoc, tag) ⇒ Object
- #mathml_cleanup(xmldoc) ⇒ Object
-
#mathml_italicise(xml) ⇒ Object
presuppose multichar mi upright, singlechar mi MathML default italic.
- #mathml_mathvariant(math) ⇒ Object
- #mathml_mi_italics ⇒ Object
- #mathml_mn_format(math) ⇒ Object
- #mathml_mn_profile(mnum) ⇒ Object
- #mathml_namespace(stem) ⇒ Object
- #mathml_number_format(stem) ⇒ Object
- #mathml_number_to_mathml(xmldoc) ⇒ Object
- #mathml_preserve_space(math) ⇒ Object
- #mathml_stem_format(stem) ⇒ Object
- #mathml_stem_format_attr(stem) ⇒ Object
- #mathml_unitsml(xmldoc) ⇒ Object
- #mathml_xml_cleanup(stem) ⇒ Object
- #mathvariant_override(inner, outer) ⇒ Object
- #mi_italicise?(char) ⇒ Boolean
- #progress_conv(idx, step, total, threshold, msg) ⇒ Object
- #xml_unescape_mathml(xml) ⇒ Object
Methods included from Regex
Instance Method Details
#add_misc_container(xmldoc) ⇒ Object
97 98 99 100 101 102 103 104 105 |
# File 'lib/metanorma/cleanup/maths.rb', line 97 def add_misc_container(xmldoc) unless ins = xmldoc.at("//metanorma-extension") a = xmldoc.xpath("//termdocsource")&.last || xmldoc.at("//bibdata") || xmldoc.root.children.first a.next = "<metanorma-extension/>" ins = xmldoc.at("//metanorma-extension") end ins end |
#asciimath2mathml(xml) ⇒ Object
10 11 12 13 14 15 16 17 |
# File 'lib/metanorma/cleanup/maths.rb', line 10 def asciimath2mathml(xml) xpath = xml.xpath("//stem[@type = 'AsciiMath']") xpath.each_with_index do |x, i| progress_conv(i, 500, xpath.size, 1000, "AsciiMath") asciimath2mathml_indiv(x) end asciimath2mathml_wrap(xml) end |
#asciimath2mathml_err(text, expr) ⇒ Object
49 50 51 |
# File 'lib/metanorma/cleanup/maths.rb', line 49 def asciimath2mathml_err(text, expr) @log.add("STANDOC_6", nil, params: [expr, text]) end |
#asciimath2mathml_indiv(elem) ⇒ Object
19 20 21 22 23 24 25 26 27 28 |
# File 'lib/metanorma/cleanup/maths.rb', line 19 def asciimath2mathml_indiv(elem) elem["type"] = "MathML" expr = @c.decode(elem.text) expr.strip.empty? and return ret = asciimath_parse(expr, elem)&.strip ret += "<asciimath>#{@c.encode(expr, :basic)}</asciimath>" elem.children = ret rescue StandardError => e asciimath2mathml_err(elem.to_xml, e) end |
#asciimath2mathml_wrap(xml) ⇒ Object
53 54 55 56 57 58 59 60 61 |
# File 'lib/metanorma/cleanup/maths.rb', line 53 def asciimath2mathml_wrap(xml) xml.xpath("//*[local-name() = 'math'][@display]").each do |y| y.delete("display") end # x.xpath("//stem").each do |y| # y.next_element&.name == "asciimath" and y << y.next_element # end xml end |
#asciimath2unitsml_options ⇒ Object
126 127 128 |
# File 'lib/metanorma/cleanup/maths.rb', line 126 def { multiplier: :space } end |
#asciimath_cleanup(xml) ⇒ Object
6 7 8 |
# File 'lib/metanorma/cleanup/maths.rb', line 6 def asciimath_cleanup(xml) !@keepasciimath and asciimath2mathml(xml) end |
#asciimath_parse(expr, elem) ⇒ Object
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/metanorma/cleanup/maths.rb', line 32 def asciimath_parse(expr, elem) if NUMERIC_REGEX.match?(expr) @novalid or elem["validate"] = "false" <<~MATH <math xmlns='#{MATHML_NS}'><mstyle displaystyle='false'><mn>#{expr}</mn></mstyle></math> MATH else expr.strip.empty? and return unitsml = if expr.include?("unitsml") { unitsml: { xml: true, multiplier: :space } } else {} end Plurimath::Math.parse(expr, "asciimath") .to_mathml(**{ display_style: elem["block"] }.merge(unitsml)) end end |
#gather_unitsml(unitsml, xmldoc, tag) ⇒ Object
116 117 118 119 120 121 122 123 124 |
# File 'lib/metanorma/cleanup/maths.rb', line 116 def gather_unitsml(unitsml, xmldoc, tag) = xmldoc.xpath(".//m:#{tag}", "m" => UNITSML_NS) .each_with_object({}) do |x, m| m[x["id"]] = x.remove end .empty? and return set = unitsml.add_child("<#{tag}Set/>").first .each_value { |v| set << v } end |
#mathml_cleanup(xmldoc) ⇒ Object
182 183 184 185 186 187 188 189 190 191 192 |
# File 'lib/metanorma/cleanup/maths.rb', line 182 def mathml_cleanup(xmldoc) mathml_number_to_mathml(xmldoc) xmldoc.xpath("//stem[@type = 'MathML'][not(@validate = 'false')]") .each do |x| mathml_xml_cleanup(x) mathml_mathvariant(x) end xmldoc.xpath("//stem[@type = 'MathML']") .each { |x| mathml_number_format(x) } mathml_unitsml(xmldoc) end |
#mathml_italicise(xml) ⇒ Object
presuppose multichar mi upright, singlechar mi MathML default italic
10 11 12 13 14 15 16 |
# File 'lib/metanorma/cleanup/mathvariant.rb', line 10 def mathml_italicise(xml) xml.xpath(".//m:mi[not(ancestor::*[@mathvariant])]", "m" => MATHML_NS).each do |i| char = @c.decode(i.text) i["mathvariant"] = "normal" if mi_italicise?(char) end end |
#mathml_mathvariant(math) ⇒ Object
77 78 79 80 81 82 83 84 85 |
# File 'lib/metanorma/cleanup/mathvariant.rb', line 77 def mathml_mathvariant(math) math.xpath(".//*[@mathvariant]").each do |outer| outer.xpath(".//*[@mathvariant]").each do |inner| inner["mathvariant"] = mathvariant_override(inner["mathvariant"], outer["mathvariant"]) end end mathml_italicise(math) end |
#mathml_mi_italics ⇒ Object
4 5 6 7 |
# File 'lib/metanorma/cleanup/mathvariant.rb', line 4 def mathml_mi_italics { uppergreek: true, upperroman: true, lowergreek: true, lowerroman: true } end |
#mathml_mn_format(math) ⇒ Object
130 131 132 133 134 135 136 137 138 139 140 |
# File 'lib/metanorma/cleanup/maths.rb', line 130 def mathml_mn_format(math) math["number-format"] or return math.xpath(".//m:mn", "m" => MATHML_NS).each do |m| profile = mathml_mn_profile(m) attr = profile.each_with_object([]) do |(k, v), acc| v == "nil" and next acc << "#{k}='#{@c.decode v}'" end.join(",") attr.empty? or m["data-metanorma-numberformat"] = attr end end |
#mathml_mn_profile(mnum) ⇒ Object
142 143 144 145 146 147 148 149 150 151 |
# File 'lib/metanorma/cleanup/maths.rb', line 142 def mathml_mn_profile(mnum) fmt = @numberfmt_default&.dup || {} fmt1 = {} fmt2 = kv_parse(mnum["data-metanorma-numberformat"] || "") if fmt2["profile"] fmt1 = @numberfmt_prof[fmt2["profile"]] || {} fmt2.delete("profile") end fmt.merge(fmt1).merge(fmt2) end |
#mathml_namespace(stem) ⇒ Object
89 90 91 92 93 |
# File 'lib/metanorma/cleanup/maths.rb', line 89 def mathml_namespace(stem) stem.xpath("./*[local-name() = 'math']").each do |x| x.default_namespace = MATHML_NS end end |
#mathml_number_format(stem) ⇒ Object
176 177 178 179 180 |
# File 'lib/metanorma/cleanup/maths.rb', line 176 def mathml_number_format(stem) mathml_stem_format(stem) mathml_mn_format(stem) stem.delete("number-format") end |
#mathml_number_to_mathml(xmldoc) ⇒ Object
194 195 196 197 198 199 200 201 |
# File 'lib/metanorma/cleanup/maths.rb', line 194 def mathml_number_to_mathml(xmldoc) xmldoc.xpath("//mathml-number").each do |n| n.name = "stem" n["type"] = "MathML" n.children = "<math xmlns='#{MATHML_NS}'><mn>#{n.children.to_xml}</mn></math>" end end |
#mathml_preserve_space(math) ⇒ Object
82 83 84 85 86 87 |
# File 'lib/metanorma/cleanup/maths.rb', line 82 def mathml_preserve_space(math) math.xpath(".//m:mtext", "m" => MATHML_NS).each do |x| x.children = x.children.to_xml .gsub(/^\s/, " ").gsub(/\s$/, " ") end end |
#mathml_stem_format(stem) ⇒ Object
153 154 155 156 157 158 159 160 161 162 |
# File 'lib/metanorma/cleanup/maths.rb', line 153 def mathml_stem_format(stem) f = mathml_stem_format_attr(stem) or return attr = quoted_csv_split(f, ",").map do |x| m = /^(.+?)=(.+)?$/.match(x) or next "#{m[1]}='#{@c.decode m[2]}'" end.join(",") stem.xpath(".//m:mn", "m" => MATHML_NS).each do |m| attr.empty? or m["data-metanorma-numberformat"] = attr end end |
#mathml_stem_format_attr(stem) ⇒ Object
164 165 166 167 168 169 170 171 172 173 174 |
# File 'lib/metanorma/cleanup/maths.rb', line 164 def mathml_stem_format_attr(stem) f = stem["number-format"] || @numberfmt_formula or return if f == "nil" stem.delete("number-format") return end f == "default" or return f if @numberfmt_default.empty? then "notation='basic'" else @numberfmt_default&.map { |k, v| "#{k}='#{v}'" }&.join(",") end end |
#mathml_unitsml(xmldoc) ⇒ Object
107 108 109 110 111 112 113 114 |
# File 'lib/metanorma/cleanup/maths.rb', line 107 def mathml_unitsml(xmldoc) xmldoc.at(".//m:*", "m" => UNITSML_NS) or return misc = add_misc_container(xmldoc) unitsml = misc.add_child("<UnitsML xmlns='#{UNITSML_NS}'/>").first %w(Unit CountedItem Quantity Dimension Prefix).each do |t| gather_unitsml(unitsml, xmldoc, t) end end |
#mathml_xml_cleanup(stem) ⇒ Object
63 64 65 66 67 |
# File 'lib/metanorma/cleanup/maths.rb', line 63 def mathml_xml_cleanup(stem) xml_unescape_mathml(stem) mathml_namespace(stem) mathml_preserve_space(stem) end |
#mathvariant_override(inner, outer) ⇒ Object
70 71 72 73 74 75 |
# File 'lib/metanorma/cleanup/mathvariant.rb', line 70 def mathvariant_override(inner, outer) o = outer.to_sym i = inner.to_sym MATHVARIANT_OVERRIDE[o] or return inner MATHVARIANT_OVERRIDE[o][i] || inner end |
#mi_italicise?(char) ⇒ Boolean
18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/metanorma/cleanup/mathvariant.rb', line 18 def mi_italicise?(char) char.length > 1 and return false case char when /\p{Greek}/ (/\p{Lower}/.match(char) && !mathml_mi_italics[:lowergreek]) || (/\p{Upper}/.match(char) && !mathml_mi_italics[:uppergreek]) when /\p{Latin}/ (/\p{Lower}/.match(char) && !mathml_mi_italics[:lowerroman]) || (/\p{Upper}/.match(char) && !mathml_mi_italics[:upperroman]) else false end end |
#progress_conv(idx, step, total, threshold, msg) ⇒ Object
69 70 71 72 |
# File 'lib/metanorma/cleanup/maths.rb', line 69 def progress_conv(idx, step, total, threshold, msg) (idx % step).zero? && total > threshold && idx.positive? or return warn "#{msg} #{idx} of #{total}" end |
#xml_unescape_mathml(xml) ⇒ Object
74 75 76 77 78 79 80 |
# File 'lib/metanorma/cleanup/maths.rb', line 74 def xml_unescape_mathml(xml) xml.children.any?(&:element?) and return math = xml.text.gsub("<", "<").gsub(">", ">") .gsub(""", '"').gsub("'", "'").gsub("&", "&") .gsub(/<[^: \r\n\t\/]+:/, "<").gsub(/<\/[^ \r\n\t:]+:/, "</") xml.children = math end |