Module: Metanorma::Standoc::Utils
- Included in:
- Cleanup::SpansToBibitem, Converter, Inline, NumberInlineMacro, TermLookupCleanup, Validate::Schema
- Defined in:
- lib/metanorma/converter/utils.rb,
lib/metanorma/converter/date_utils.rb,
lib/metanorma/converter/isolated_converter.rb
Constant Summary collapse
- SUBCLAUSE_XPATH =
"//clause[not(parent::sections)]" \ "[not(ancestor::boilerplate)]".freeze
- SECTION_CONTAINERS =
%w(foreword introduction acknowledgements executivesummary abstract clause references terms definitions annex appendix indexsect executivesummary).freeze
Class Method Summary collapse
-
.adoc2xml(text, flavour) ⇒ Object
wrapped in <sections>.
Instance Method Summary collapse
- #add_id(node) ⇒ Object
- #add_id_text ⇒ Object
- #add_noko_elem(node, name, val, attrs = {}) ⇒ Object
- #asciimath_key(sym) ⇒ Object
- #attr_code(attributes) ⇒ Object
-
#complete_and_compare_dates(dates) ⇒ Object
Convert dates to ISO format, complete partial dates, and return the latest.
-
#complete_iso_date(date_str) ⇒ Object
Complete partial ISO dates (non-inclusive cutoff: adds 1 day) YYYY -> YYYY+1-01-01, YYYY-MM -> YYYY-MM+1-01, YYYY-MM-DD -> unchanged.
-
#complete_year_month(date_str) ⇒ Object
Complete YYYY-MM format to first day of next month (non-inclusive cutoff).
-
#complete_year_only(date_str) ⇒ Object
Complete YYYY format to next year (non-inclusive cutoff).
- #convert(node, transform = nil, opts = {}) ⇒ Object
- #csv_split(text, delim = ";", encode: true) ⇒ Object
- #dl_to_attrs(elem, dlist, name) ⇒ Object
- #dl_to_elems(ins, elem, dlist, name) ⇒ Object
- #document_ns_attributes(_doc) ⇒ Object
- #grkletters(text) ⇒ Object
- #insert_before(xmldoc, xpath) ⇒ Object
- #isodoc(lang, script, locale, i18nyaml = nil) ⇒ Object
-
#isolated_asciidoctor_convert(content, options = {}) ⇒ Object
Create an isolated Asciidoctor conversion that doesn’t interfere with the current converter’s instance variables.
- #kv_parse(text, delim = ",", eql = "=") ⇒ Object
- #link_unwrap(para) ⇒ Object
- #noko ⇒ Object
-
#parse_complete_date(date_str) ⇒ Object
Parse complete date or return nil.
-
#parse_partial_date(date_str) ⇒ Object
Parse and complete partial date strings.
- #processor ⇒ Object
-
#quoted_csv_split(text, delim = ",", eql = "=") ⇒ Object
quoted strings: key=“va,lue”,.
- #refid?(ref) ⇒ Boolean
- #section_containers ⇒ Object
-
#separate_numbering_footnotes(docxml) ⇒ Object
separate numbering of externally sourced footnotes from that of current doc.
- #term_expr(elem) ⇒ Object
- #textcleanup(result) ⇒ Object
- #to_xml(node) ⇒ Object
- #wrap_in_para(node, out) ⇒ Object
- #xml_encode(text) ⇒ Object
Class Method Details
.adoc2xml(text, flavour) ⇒ Object
wrapped in <sections>
146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
# File 'lib/metanorma/converter/utils.rb', line 146 def adoc2xml(text, flavour) Nokogiri::XML(text).root and return text f = @flush_caches ? ":flush-caches:\n" : "" doc = <<~ADOC = X A :semantic-metadata-headless: true :no-isobib: #{f}:novalid: :!sectids: #{text} ADOC c = isolated_asciidoctor_convert(doc, backend: flavour, header_footer: true) ret = Nokogiri::XML(c).at("//xmlns:sections") separate_numbering_footnotes(ret) end |
Instance Method Details
#add_id(node) ⇒ Object
42 43 44 |
# File 'lib/metanorma/converter/utils.rb', line 42 def add_id(node) node["id"] = "_#{UUIDTools::UUID.random_create}" end |
#add_id_text ⇒ Object
46 47 48 |
# File 'lib/metanorma/converter/utils.rb', line 46 def add_id_text %(id = "_#{UUIDTools::UUID.random_create}") end |
#add_noko_elem(node, name, val, attrs = {}) ⇒ Object
197 198 199 200 201 202 |
# File 'lib/metanorma/converter/utils.rb', line 197 def add_noko_elem(node, name, val, attrs = {}) (val and !val.empty?) or return node.send name, **attr_code(attrs) do |n| n << val end end |
#asciimath_key(sym) ⇒ Object
174 175 176 177 178 179 180 181 182 183 184 185 |
# File 'lib/metanorma/converter/utils.rb', line 174 def asciimath_key(sym) key = sym.dup key.traverse do |n| if n.name == "stem" && a = n.at(".//asciimath") n.children = @c.encode( @c.decode(grkletters(a.text)), :basic ) end end key.xpath(".//asciimath").each(&:remove) Nokogiri::XML(key.to_xml) end |
#attr_code(attributes) ⇒ Object
38 39 40 |
# File 'lib/metanorma/converter/utils.rb', line 38 def attr_code(attributes) Metanorma::Utils::attr_code(attributes) end |
#complete_and_compare_dates(dates) ⇒ Object
Convert dates to ISO format, complete partial dates, and return the latest
7 8 9 10 11 12 13 |
# File 'lib/metanorma/converter/date_utils.rb', line 7 def complete_and_compare_dates(dates) completed_dates = dates.map do |date_str| complete_iso_date(date_str) end.compact completed_dates.empty? and return nil completed_dates.max end |
#complete_iso_date(date_str) ⇒ Object
Complete partial ISO dates (non-inclusive cutoff: adds 1 day) YYYY -> YYYY+1-01-01, YYYY-MM -> YYYY-MM+1-01, YYYY-MM-DD -> unchanged
17 18 19 20 |
# File 'lib/metanorma/converter/date_utils.rb', line 17 def complete_iso_date(date_str) date_str.is_a?(String) or return nil parse_partial_date(date_str.strip) end |
#complete_year_month(date_str) ⇒ Object
Complete YYYY-MM format to first day of next month (non-inclusive cutoff)
36 37 38 39 40 41 42 |
# File 'lib/metanorma/converter/date_utils.rb', line 36 def complete_year_month(date_str) date_str =~ /^(\d{4})-(\d{1,2})$/ or return nil year = $1.to_i month = $2.to_i last_day = Date.new(year, month, -1).day Date.new(year, month, last_day) + 1 end |
#complete_year_only(date_str) ⇒ Object
Complete YYYY format to next year (non-inclusive cutoff)
30 31 32 33 |
# File 'lib/metanorma/converter/date_utils.rb', line 30 def complete_year_only(date_str) /^\d{4}$/.match?(date_str) or return nil Date.new(date_str.to_i, 12, 31) + 1 end |
#convert(node, transform = nil, opts = {}) ⇒ Object
15 16 17 18 |
# File 'lib/metanorma/converter/utils.rb', line 15 def convert(node, transform = nil, opts = {}) transform ||= node.node_name opts.empty? ? (send transform, node) : (send transform, node, opts) end |
#csv_split(text, delim = ";", encode: true) ⇒ Object
50 51 52 53 54 55 56 |
# File 'lib/metanorma/converter/utils.rb', line 50 def csv_split(text, delim = ";", encode: true) text ||= "" ret = Metanorma::Utils::csv_split(@c.decode(text), delim) encode and ret.map! { |x| @c.encode(x.strip, :basic, :hexadecimal) } ret end |
#dl_to_attrs(elem, dlist, name) ⇒ Object
100 101 102 |
# File 'lib/metanorma/converter/utils.rb', line 100 def dl_to_attrs(elem, dlist, name) Metanorma::Utils::dl_to_attrs(elem, dlist, name) end |
#dl_to_elems(ins, elem, dlist, name) ⇒ Object
104 105 106 |
# File 'lib/metanorma/converter/utils.rb', line 104 def dl_to_elems(ins, elem, dlist, name) Metanorma::Utils::dl_to_elems(ins, elem, dlist, name) end |
#document_ns_attributes(_doc) ⇒ Object
30 31 32 |
# File 'lib/metanorma/converter/utils.rb', line 30 def document_ns_attributes(_doc) nil end |
#grkletters(text) ⇒ Object
187 188 189 190 191 |
# File 'lib/metanorma/converter/utils.rb', line 187 def grkletters(text) text.gsub(/\b(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa| lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi| psi|omega)\b/xi, "&\\1;") end |
#insert_before(xmldoc, xpath) ⇒ Object
120 121 122 123 124 125 126 |
# File 'lib/metanorma/converter/utils.rb', line 120 def insert_before(xmldoc, xpath) unless ins = xmldoc.at(xpath).children.first xmldoc.at(xpath) << " " ins = xmldoc.at(xpath).children.first end ins end |
#isodoc(lang, script, locale, i18nyaml = nil) ⇒ Object
93 94 95 96 97 98 |
# File 'lib/metanorma/converter/utils.rb', line 93 def isodoc(lang, script, locale, i18nyaml = nil) conv = presentation_xml_converter(EmptyAttr.new) Metanorma::Core::Isodoc.init(conv, lang: lang, script: script, locale: locale, i18nyaml: i18nyaml, localdir: @localdir) end |
#isolated_asciidoctor_convert(content, options = {}) ⇒ Object
Create an isolated Asciidoctor conversion that doesn’t interfere with the current converter’s instance variables
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/metanorma/converter/isolated_converter.rb', line 6 def isolated_asciidoctor_convert(content, = {}) # Track that we're in an isolated conversion (for nested calls) @isolated_conversion_stack ||= [] @isolated_conversion_stack << true begin = () # Merge with isolated options to ensure clean state and skip validation = .merge().merge( attributes: ([:attributes] || {}).merge( "novalid" => "", # Force no validation for isolated documents ), ) Asciidoctor.convert(content, ) ensure @isolated_conversion_stack.pop end end |
#kv_parse(text, delim = ",", eql = "=") ⇒ Object
72 73 74 75 76 77 78 79 |
# File 'lib/metanorma/converter/utils.rb', line 72 def kv_parse(text, delim = ",", eql = "=") text or return {} c = HTMLEntities.new quoted_csv_split(text, delim).each_with_object({}) do |k, m| x = k.split(eql, 2) m[x[0]] = c.decode(x[1]) end end |
#link_unwrap(para) ⇒ Object
112 113 114 115 116 117 118 |
# File 'lib/metanorma/converter/utils.rb', line 112 def link_unwrap(para) elems = para.elements if elems.size == 1 && elems[0].name == "link" para.at("./link").replace(elems[0]["target"].strip) end para end |
#noko ⇒ Object
34 35 36 |
# File 'lib/metanorma/converter/utils.rb', line 34 def noko(&) Metanorma::Utils::noko(@script, &) end |
#parse_complete_date(date_str) ⇒ Object
Parse complete date or return nil
45 46 47 48 49 |
# File 'lib/metanorma/converter/date_utils.rb', line 45 def parse_complete_date(date_str) Date.parse(date_str) rescue ArgumentError nil end |
#parse_partial_date(date_str) ⇒ Object
Parse and complete partial date strings
23 24 25 26 27 |
# File 'lib/metanorma/converter/date_utils.rb', line 23 def parse_partial_date(date_str) complete_year_only(date_str) || complete_year_month(date_str) || parse_complete_date(date_str) end |
#processor ⇒ Object
20 21 22 23 24 25 26 27 28 |
# File 'lib/metanorma/converter/utils.rb', line 20 def processor parent_type = self.class.name.split("::")[0...-1] parent_type << "Processor" begin Object.const_get(parent_type.join("::")) rescue NameError nil end end |
#quoted_csv_split(text, delim = ",", eql = "=") ⇒ Object
quoted strings: key=“va,lue”,
59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/metanorma/converter/utils.rb', line 59 def quoted_csv_split(text, delim = ",", eql = "=") c = HTMLEntities.new text = c.decode(text).gsub(/([a-zA-Z_]+)#{eql}(["'])(.*?)\2/) do |_| key = Regexp.last_match(1) value = Regexp.last_match(3).gsub(" ", " ") "\"#{key}#{eql}#{value}\"" end Metanorma::Utils::csv_split(text, delim).map do |x| c.encode(x.sub(/^(["'])(.*?)\1$/, "\\2"), :basic, :hexadecimal) end end |
#refid?(ref) ⇒ Boolean
193 194 195 |
# File 'lib/metanorma/converter/utils.rb', line 193 def refid?(ref) @refids.include? ref end |
#section_containers ⇒ Object
141 142 143 |
# File 'lib/metanorma/converter/utils.rb', line 141 def section_containers SECTION_CONTAINERS end |
#separate_numbering_footnotes(docxml) ⇒ Object
separate numbering of externally sourced footnotes from that of current doc
167 168 169 170 171 172 |
# File 'lib/metanorma/converter/utils.rb', line 167 def separate_numbering_footnotes(docxml) docxml.xpath("//xmlns:fn").each do |f| f["reference"] = "_#{UUIDTools::UUID.random_create}_#{f['reference']}" end docxml end |
#term_expr(elem) ⇒ Object
108 109 110 |
# File 'lib/metanorma/converter/utils.rb', line 108 def term_expr(elem) "<expression><name>#{elem}</name></expression>" end |
#textcleanup(result) ⇒ Object
206 207 208 209 |
# File 'lib/metanorma/converter/utils.rb', line 206 def textcleanup(result) text = result.flatten.map(&:rstrip) * "\n" text.gsub(/(?<!\s)\s+<fn /, "<fn ") end |
#to_xml(node) ⇒ Object
85 86 87 88 |
# File 'lib/metanorma/converter/utils.rb', line 85 def to_xml(node) node.to_xml(encoding: "UTF-8", indent: 2, save_with: Nokogiri::XML::Node::SaveOptions::AS_XML) end |
#wrap_in_para(node, out) ⇒ Object
81 82 83 |
# File 'lib/metanorma/converter/utils.rb', line 81 def wrap_in_para(node, out) Metanorma::Utils::wrap_in_para(node, out) end |
#xml_encode(text) ⇒ Object
128 129 130 131 132 133 134 |
# File 'lib/metanorma/converter/utils.rb', line 128 def xml_encode(text) @c.encode(text, :basic, :hexadecimal) .gsub("&gt;", ">").gsub("&lt;", "<").gsub("&amp;", "&") .gsub(">", ">").gsub("<", "<").gsub("&", "&") .gsub(""", '"').gsub("
", "\n").gsub("&#", "&#") .gsub("'", "'") end |