Module: Metanorma::Standoc::Asciibib
- Included in:
- Cleanup
- Defined in:
- lib/metanorma/cleanup/asciibib.rb
Instance Method Summary collapse
- #add_to_hash(bib, key, val) ⇒ Object
- #dd_bib_extract(dtd) ⇒ Object
-
#dl_bib_extract(clause, nested: false) ⇒ Object
definition list, with at most one level of unordered lists.
- #dl_bib_extract_title(bib, clause, nested) ⇒ Object
- #extract_from_p(tag, bib, key) ⇒ Object
-
#p_unwrap(para) ⇒ Object
if the content is a single paragraph, replace it with its children single links replaced with uri.
- #ref_dl_cleanup(xmldoc) ⇒ Object
- #ref_dl_cleanup_id(bibitem, clause) ⇒ Object
-
#validate_ref_dl(bib, clause) ⇒ Object
do not accept implicit id.
- #validate_ref_dl1(bib, id, clause) ⇒ Object
Instance Method Details
#add_to_hash(bib, key, val) ⇒ Object
77 78 79 |
# File 'lib/metanorma/cleanup/asciibib.rb', line 77 def add_to_hash(bib, key, val) Metanorma::Utils::set_nested_value(bib, key.split("."), val) end |
#dd_bib_extract(dtd) ⇒ Object
65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/metanorma/cleanup/asciibib.rb', line 65 def dd_bib_extract(dtd) dtd.children.empty? and return nil dtd.at("./dl") and return dl_bib_extract(dtd) elems = dtd.remove.elements return p_unwrap(dtd) unless elems.size == 1 && %w(ol ul).include?(elems[0].name) elems[0].xpath("./li").each_with_object([]) do |li, ret| ret << p_unwrap(li) end end |
#dl_bib_extract(clause, nested: false) ⇒ Object
definition list, with at most one level of unordered lists
82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
# File 'lib/metanorma/cleanup/asciibib.rb', line 82 def dl_bib_extract(clause, nested: false) dl = clause.at("./dl") or return key = "" bib = dl.xpath("./dt | ./dd").each_with_object({}) do |dtd, m| (dtd.name == "dt" and key = dtd.text.sub(/:+$/, "")) and next add_to_hash(m, key, dd_bib_extract(dtd)) end clause.xpath("./clause").each do |c1| key = c1.at("./title")&.text&.downcase&.strip %w(contributor relation series).include?(key) or next add_to_hash(bib, key, dl_bib_extract(c1, nested: true)) end dl_bib_extract_title(bib, clause, nested) end |
#dl_bib_extract_title(bib, clause, nested) ⇒ Object
97 98 99 100 101 102 103 104 105 106 |
# File 'lib/metanorma/cleanup/asciibib.rb', line 97 def dl_bib_extract_title(bib, clause, nested) (!nested && clause.at("./title")) or return bib title = clause.at("./title").remove.children.to_xml bib["title"] = [bib["title"]] if bib["title"].is_a?(Hash) || bib["title"].is_a?(String) bib["title"] ||= [] title.empty? or bib["title"] << { "content" => title, "type" => "main", "language" => @lang } bib end |
#extract_from_p(tag, bib, key) ⇒ Object
48 49 50 51 52 |
# File 'lib/metanorma/cleanup/asciibib.rb', line 48 def extract_from_p(tag, bib, key) return unless bib[tag] "<#{key}>#{bib[tag].at('p').children}</#{key}>" end |
#p_unwrap(para) ⇒ Object
if the content is a single paragraph, replace it with its children single links replaced with uri
56 57 58 59 60 61 62 63 |
# File 'lib/metanorma/cleanup/asciibib.rb', line 56 def p_unwrap(para) elems = para.elements if elems.size == 1 && elems[0].name == "p" link_unwrap(elems[0]).children.to_xml.strip else para.to_xml.strip end end |
#ref_dl_cleanup(xmldoc) ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 18 |
# File 'lib/metanorma/cleanup/asciibib.rb', line 7 def ref_dl_cleanup(xmldoc) xmldoc.xpath("//clause[@bibitem = 'true']").each do |c| bib = dl_bib_extract(c) or next validate_ref_dl(bib, c) xml = Relaton::Bib::ItemData.new( **Relaton::Bib::HashParserV1.hash_to_bib(bib), ).to_xml or next bibitem = Nokogiri::XML(xml) ref_dl_cleanup_id(bibitem.root, c) c.replace(bibitem.root) end end |
#ref_dl_cleanup_id(bibitem, clause) ⇒ Object
20 21 22 23 24 25 |
# File 'lib/metanorma/cleanup/asciibib.rb', line 20 def ref_dl_cleanup_id(bibitem, clause) bibitem["anchor"] = bibitem["id"] clause["anchor"] && !/^_/.match(clause["anchor"]) and bibitem["anchor"] = clause["anchor"] add_id(bibitem) end |
#validate_ref_dl(bib, clause) ⇒ Object
do not accept implicit id
28 29 30 31 32 33 34 35 36 37 |
# File 'lib/metanorma/cleanup/asciibib.rb', line 28 def validate_ref_dl(bib, clause) id = bib["id"] id ||= clause["anchor"] unless /^_/.match?(clause["anchor"]) unless id @log.add("STANDOC_10", clause, params: [clause.to_xml]) return end @refids << id validate_ref_dl1(bib, id, clause) end |
#validate_ref_dl1(bib, id, clause) ⇒ Object
39 40 41 42 43 44 45 46 |
# File 'lib/metanorma/cleanup/asciibib.rb', line 39 def validate_ref_dl1(bib, id, clause) if !bib["title"] @log.add("STANDOC_11", clause, params: [id]) end if !bib["docid"] @log.add("STANDOC_12", clause, params: [id]) end end |