Module: Metanorma::Standoc::Bibitem

Includes:
Attachment
Included in:
Cleanup
Defined in:
lib/metanorma/cleanup/bibitem.rb

Instance Method Summary collapse

Methods included from Attachment

#attachment_cleanup, #attachment_location, #attachment_uri, #datauri_attachment, #init_attachments, #save_attachment, #valid_attachment?

Instance Method Details

#bibitem_cleanup(xmldoc) ⇒ Object



225
226
227
228
229
230
231
232
233
234
235
# File 'lib/metanorma/cleanup/bibitem.rb', line 225

def bibitem_cleanup(xmldoc)
  bibitem_nested_id(xmldoc) # feeds remove_dup_bibtem_id
  ref_dl_cleanup(xmldoc)
  formattedref_spans(xmldoc)
  fetch_local_bibitem(xmldoc)
  remove_empty_docid(xmldoc)
  empty_docid_to_title(xmldoc)
  remove_dup_bibtem_id(xmldoc)
  bibitem_i18n(xmldoc)
  attachment_cleanup(xmldoc)
end

#bibitem_i18n(xmldoc) ⇒ Object



206
207
208
209
210
211
212
213
214
215
# File 'lib/metanorma/cleanup/bibitem.rb', line 206

def bibitem_i18n(xmldoc)
  xmldoc.xpath("//references/bibitem").each do |b|
    s = b.at("./script")
    l = b.at("./language")
    s && l and next
    ins = bibitem_i18n_insert(b)
    s or ins.next = "<script>#{@script}</script>"
    l or ins.next = "<language>#{@lang}</language>"
  end
end

#bibitem_i18n_insert(bib) ⇒ Object



217
218
219
220
221
222
223
# File 'lib/metanorma/cleanup/bibitem.rb', line 217

def bibitem_i18n_insert(bib)
  bib.at("./note[last()]") || bib.at("./version[last()]") ||
    bib.at("./edition[last()]") || bib.at("./contributor[last()]") ||
    bib.at("./date[last()]") || bib.at("./docnumber[last()]") ||
    bib.at("./docidentifier[last()]") || bib.at("./source[last()]") ||
    bib.at("./title[last()]") || bib.children.first
end

#bibitem_id_docid_hash(xmldoc) ⇒ Object



169
170
171
172
173
174
175
176
# File 'lib/metanorma/cleanup/bibitem.rb', line 169

def bibitem_id_docid_hash(xmldoc)
  xmldoc.xpath("//bibitem[@anchor]").each_with_object({}) do |b, m|
    m[b["anchor"]] ||= {}
    docid = b.at("./docidentifier")&.text || "NO ID"
    m[b["anchor"]][docid] ||= []
    m[b["anchor"]][docid] << b
  end
end

#bibitem_nested_id(xmldoc) ⇒ Object



148
149
150
151
152
153
154
155
156
157
# File 'lib/metanorma/cleanup/bibitem.rb', line 148

def bibitem_nested_id(xmldoc)
  xmldoc.xpath("//bibitem//bibitem").each do |b|
    b.delete("id")
    b.delete("anchor")
  end
  xmldoc.xpath("//bibdata//bibitem").each do |b|
    b.delete("id")
    b.delete("anchor")
  end
end

#bibitem_title_to_id(bibitem) ⇒ Object



196
197
198
199
# File 'lib/metanorma/cleanup/bibitem.rb', line 196

def bibitem_title_to_id(bibitem)
  t = bibitem.at("./title") || bibitem.at("./formattedref") or return
  t.text
end

#biblio_hidden_inherit(xmldoc) ⇒ Object



63
64
65
66
67
# File 'lib/metanorma/cleanup/bibitem.rb', line 63

def biblio_hidden_inherit(xmldoc)
  xmldoc.xpath("//references[@hidden = 'true']").each do |r|
    r.xpath("./bibitem").each { |b| b["hidden"] = true }
  end
end

#biblio_no_ext(xmldoc) ⇒ Object



69
70
71
# File 'lib/metanorma/cleanup/bibitem.rb', line 69

def biblio_no_ext(xmldoc)
  xmldoc.xpath("//bibitem/ext").each(&:remove)
end

#empty_docid_to_title(xmldoc) ⇒ Object



183
184
185
186
187
188
189
190
191
192
193
194
# File 'lib/metanorma/cleanup/bibitem.rb', line 183

def empty_docid_to_title(xmldoc)
  xmldoc.xpath("//references/bibitem").each do |b|
    b.at("./docidentifier[not(@type = 'metanorma' or @type = 'DOI' or " \
     "@type = 'metanorma-ordinal')]") and next
    empty_docid_to_title?(b) or next
    ins = b.at("./title[last()]") || b.at("./formattedref")
    id = bibitem_title_to_id(b) or return
    ins.next = <<~XML
      <docidentifier type='title' primary='true'>#{id}</docidentifier>
    XML
  end
end

#empty_docid_to_title?(bibitem) ⇒ Boolean

normative references only, biblio uses ordinal code instead

Returns:

  • (Boolean)


202
203
204
# File 'lib/metanorma/cleanup/bibitem.rb', line 202

def empty_docid_to_title?(bibitem)
  bibitem.parent["normative"] == "true"
end

#extract_notes_from_biblio(refs) ⇒ Object



54
55
56
57
58
59
60
61
# File 'lib/metanorma/cleanup/bibitem.rb', line 54

def extract_notes_from_biblio(refs)
  refs.xpath("./bibitem").each do |r|
    r.xpath("./note[@appended]").reverse_each do |n|
      n.delete("appended")
      r.next = n
    end
  end
end

#fetch_local_bibitem(xmldoc) ⇒ Object

if citation uri points to local file, get bibitem from it



137
138
139
140
141
142
143
144
145
146
# File 'lib/metanorma/cleanup/bibitem.rb', line 137

def fetch_local_bibitem(xmldoc)
  xmldoc.xpath("//bibitem[formattedref][uri[@type = 'citation']]")
    .each do |b|
      uri = b&.at("./uri[@type = 'citation']")&.text
      bibitem = read_local_bibitem(uri) or next
      bibitem["id"] = b["id"]
      bibitem["anchor"] = b["anchor"]
      b.replace(bibitem)
  end
end

#fetch_termbase(_termbase, _id) ⇒ Object



112
113
114
# File 'lib/metanorma/cleanup/bibitem.rb', line 112

def fetch_termbase(_termbase, _id)
  ""
end

#fold_notes_into_biblio(refs) ⇒ Object



45
46
47
48
49
50
51
52
# File 'lib/metanorma/cleanup/bibitem.rb', line 45

def fold_notes_into_biblio(refs)
  refs.xpath("./bibitem").each do |r|
    while r&.next_element&.name == "note"
      r.next_element["appended"] = true
      r << r.next_element.remove
    end
  end
end

#format_ref(ref, type) ⇒ Object



73
74
75
76
77
78
79
80
# File 'lib/metanorma/cleanup/bibitem.rb', line 73

def format_ref(ref, type)
  ret = Nokogiri::XML.fragment(ref)
  ret.traverse { |x| x.remove if x.name == "fn" }
  ref = to_xml(ret)
  type != "metanorma" and return @isodoc.docid_prefix(type, ref)
  /^\d+$/.match(ref) && !/^\[.*\]$/.match(ref) and return "[#{ref}]"
  ref
end

#formattedref_spans(xmldoc) ⇒ Object



8
9
10
11
12
13
14
15
16
# File 'lib/metanorma/cleanup/bibitem.rb', line 8

def formattedref_spans(xmldoc)
  xmldoc.xpath("//bibitem[formattedref//span]").each do |b|
    ret = new_bibitem_from_formattedref_spans(b)
    merge_bibitem_from_formattedref_spans(b, ret)
  end
  xmldoc.xpath("//bibitem[@amend]").each do |b|
    b.delete("amend")
  end
end

#idtype2cit(ref) ⇒ Object



93
94
95
96
97
98
99
# File 'lib/metanorma/cleanup/bibitem.rb', line 93

def idtype2cit(ref)
  ref.xpath("./docidentifier/@type").each_with_object({}) do |t, m|
    m[t.text] and next
    docid = select_docid(ref, t.text) or next
    m[t.text] = format_ref(docid.children.to_xml, docid["type"])
  end
end

#merge_bibitem_from_formattedref_span_attrs(bib, new) ⇒ Object



39
40
41
42
43
# File 'lib/metanorma/cleanup/bibitem.rb', line 39

def merge_bibitem_from_formattedref_span_attrs(bib, new)
  %w(type).each do |k|
    new[k] and bib[k] = new[k]
  end
end

#merge_bibitem_from_formattedref_spans(bib, new) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
# File 'lib/metanorma/cleanup/bibitem.rb', line 27

def merge_bibitem_from_formattedref_spans(bib, new)
  merge_bibitem_from_formattedref_span_attrs(bib, new)
  if bib.at("./title") && bib["amend"]
    # there already is a fetched record here: merge
    bib.children = Metanorma::Standoc::Cleanup::MergeBibitems
      .new(bib.to_xml, new.to_xml).merge.to_noko.children
  elsif bib.at("./title") # replace record
    bib.children = new.children.to_xml
  else bib << new.children.to_xml
  end
end

#new_bibitem_from_formattedref_spans(bib) ⇒ Object



18
19
20
21
22
23
24
25
# File 'lib/metanorma/cleanup/bibitem.rb', line 18

def new_bibitem_from_formattedref_spans(bib)
  ret = Metanorma::Standoc::Cleanup::SpansToBibitem.new(bib).convert
  ret.err.each do |e|
    @log.add(e[:fatal] ? "STANDOC_52" : "STANDOC_53",
             bib, params: [e[:msg]])
  end
  ret.out
end

#read_local_bibitem(uri) ⇒ Object



116
117
118
119
120
121
122
123
124
125
126
# File 'lib/metanorma/cleanup/bibitem.rb', line 116

def read_local_bibitem(uri)
  xml = read_local_bibitem_file(uri) or return nil
  ret = xml.at("//*[local-name() = 'bibdata']") or return nil
  ret = Nokogiri::XML(ret.to_xml
    .sub(%r{(<bibdata[^>]*?) xmlns=("[^"]+"|'[^']+')}, "\\1")).root
  ret.name = "bibitem"
  ins = ret.at("./*[local-name() = 'docidentifier']") or return nil
  ins.previous = %{<uri type="citation">#{uri}</uri>}
  ret&.at("./*[local-name() = 'ext']")&.remove
  ret
end

#read_local_bibitem_file(uri) ⇒ Object



128
129
130
131
132
133
134
# File 'lib/metanorma/cleanup/bibitem.rb', line 128

def read_local_bibitem_file(uri)
  %r{^https?://}.match?(uri) and return nil
  file = "#{@localdir}#{uri}.rxl"
  File.file?(file) or file = "#{@localdir}#{uri}.xml"
  File.file?(file) or return nil
  Nokogiri::XML(File.read(file, encoding: "utf-8"))
end

#reference_names(xmldoc) ⇒ Object



82
83
84
85
86
87
88
89
90
91
# File 'lib/metanorma/cleanup/bibitem.rb', line 82

def reference_names(xmldoc)
  xmldoc.xpath("//bibitem[not(ancestor::bibitem)]").each do |ref|
    @anchors[ref["anchor"]] =
      if docid = select_docid(ref)
        reference = format_ref(docid.children.to_xml, docid["type"])
        { xref: reference, id: idtype2cit(ref) }
      else { xref: ref["anchor"], id: { "" => ref["anchor"] } }
      end
  end
end

#remove_dup_bibtem_id(xmldoc) ⇒ Object

remove dupes if both same ID and same docid, in case dupes introduced through termbases



161
162
163
164
165
166
167
# File 'lib/metanorma/cleanup/bibitem.rb', line 161

def remove_dup_bibtem_id(xmldoc)
  bibitem_id_docid_hash(xmldoc).each_value do |v|
    v.each_value do |v1|
      v1[1..].each(&:remove)
    end
  end
end

#remove_empty_docid(xmldoc) ⇒ Object



178
179
180
181
# File 'lib/metanorma/cleanup/bibitem.rb', line 178

def remove_empty_docid(xmldoc)
  xmldoc.xpath("//bibitem/docidentifier[normalize-space(.)='']")
    .each(&:remove)
end

#select_docid(ref, type = nil) ⇒ Object



101
102
103
104
105
106
107
108
109
110
# File 'lib/metanorma/cleanup/bibitem.rb', line 101

def select_docid(ref, type = nil)
  type and t = "[@type = '#{type}']"
  ref.at("./docidentifier[@type = 'metanorma']#{t}") ||
    ref.at("./docidentifier[@primary = 'true'][@language = '#{@lang}']#{t}") ||
    ref.at("./docidentifier[@primary = 'true'][not(@language)]#{t}") ||
    ref.at("./docidentifier[@primary = 'true']#{t}") ||
    ref.at("./docidentifier[not(@type = 'DOI')][@language = '#{@lang}']#{t}") ||
    ref.at("./docidentifier[not(@type = 'DOI')][not(@language)]#{t}") ||
    ref.at("./docidentifier[not(@type = 'DOI')]#{t}")
end