Class: Metanorma::Standoc::Cleanup::SpansToBibitem

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/metanorma/cleanup/spans_to_bibitem.rb,
lib/metanorma/cleanup/spans_to_bibitem_preprocessing.rb

Constant Summary

Constants included from Utils

Utils::SECTION_CONTAINERS, Utils::SUBCLAUSE_XPATH

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utils

#add_id, #add_id_text, #add_noko_elem, adoc2xml, #asciimath_key, #attr_code, #complete_and_compare_dates, #complete_iso_date, #complete_year_month, #complete_year_only, #csv_split, #dl_to_attrs, #dl_to_elems, #document_ns_attributes, #grkletters, #insert_before, #isodoc, #isolated_asciidoctor_convert, #kv_parse, #link_unwrap, #noko, #parse_complete_date, #parse_partial_date, #processor, #quoted_csv_split, #refid?, #section_containers, #separate_numbering_footnotes, #term_expr, #textcleanup, #to_xml, #wrap_in_para, #xml_encode

Constructor Details

#initialize(bib) ⇒ SpansToBibitem

Returns a new instance of SpansToBibitem.



11
12
13
14
15
16
17
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 11

def initialize(bib)
  @bib = bib
  @err = []
  @spans = spans_preprocess(extract_spans(bib))
  ids = spans_preprocess(extract_docid(bib))
  @spans[:docid] = override_docids(ids[:docid], @spans[:docid])
end

Instance Attribute Details

#errObject (readonly)

Returns the value of attribute err.



9
10
11
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 9

def err
  @err
end

#outObject (readonly)

Returns the value of attribute out.



9
10
11
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 9

def out
  @out
end

Instance Method Details

#convertObject



36
37
38
39
40
41
42
43
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 36

def convert
  ret = spans_to_bibitem(@spans)
  @out = Nokogiri::XML("<bibitem>#{ret}</bibitem>").root
  %i(type language script locale).each do |k|
    @spans[k] and @out[k.to_s] = @spans[k]
  end
  self
end

#empty_span_hashObject



32
33
34
35
# File 'lib/metanorma/cleanup/spans_to_bibitem_preprocessing.rb', line 32

def empty_span_hash
  { contrib: [], docid: [], uri: [], date: [], classification: [],
    keyword: [], image: [], note: [], extent: {}, in: {} }
end

#extract_docid(bib) ⇒ Object



25
26
27
28
29
30
# File 'lib/metanorma/cleanup/spans_to_bibitem_preprocessing.rb', line 25

def extract_docid(bib)
  bib.xpath("./docidentifier").each_with_object([]) do |d, m|
    m << { key: "docid", type: d["type"], val: d.text }
    d.remove unless bib.at("./title")
  end
end

#extract_spans(bib) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
# File 'lib/metanorma/cleanup/spans_to_bibitem_preprocessing.rb', line 5

def extract_spans(bib)
  ret = bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
    s.at("./ancestor::span") and next
    extract_spans1(s, m)
  end
  bib.xpath("./formattedref//image").each do |i|
    i.delete("id")
    ret << { key: "image", type: nil, val: i.remove.to_xml }
  end
  ret
end

#extract_spans1(span, acc) ⇒ Object



17
18
19
20
21
22
23
# File 'lib/metanorma/cleanup/spans_to_bibitem_preprocessing.rb', line 17

def extract_spans1(span, acc)
  keys = span["class"].split(".", 2)
  acc << { key: keys[0], type: keys[1],
           val: span.children.to_xml }
  (span["class"] == "type" and span.remove) or
    span.replace(span.children)
end

#host_rearrange(ret) ⇒ Object



99
100
101
102
103
104
105
106
107
108
109
# File 'lib/metanorma/cleanup/spans_to_bibitem_preprocessing.rb', line 99

def host_rearrange(ret)
  ret[:in][:title] or return ret
  ret[:in].merge!(empty_span_hash, { type: "misc" }) { |_, o, _| o }
  %i(series).each do |k|
    ret[:in][k] = ret[k]
    ret.delete(k)
  end
  /^in/.match?(ret[:type]) and
    ret[:in][:type] = ret[:type].sub(/^in/, "")
  ret
end

#multiple_givennames?(span, contrib) ⇒ Boolean

Returns:

  • (Boolean)


129
130
131
132
133
134
135
136
137
138
# File 'lib/metanorma/cleanup/spans_to_bibitem_preprocessing.rb', line 129

def multiple_givennames?(span, contrib)
  (%w(formatted-initials givenname).include?(span[:key]) &&
    (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])) or
    return false
  if contrib[-1][:"formatted-initials"]
    contrib[-1][:givenname] = contrib[-1][:"formatted-initials"]
    contrib[-1].delete(:"formatted-initials")
  end
  true
end

#override_docids(old, new) ⇒ Object

override old values with new values if type is the same comparison is case-insensitive if types differ in case, use the old value’s type, not the new



22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 22

def override_docids(old, new)
  ret = new
  keys = new.map { |a| a[:type]&.upcase }
  old.each do |e|
    if keys.include?(e[:type]&.upcase)
      ret.each do |a|
        a[:type]&.upcase == e[:type]&.upcase and a[:type] = e[:type]
      end
    else ret << e
    end
  end
  ret
end

#span_preprocess1(span, ret) ⇒ Object



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/metanorma/cleanup/spans_to_bibitem_preprocessing.rb', line 43

def span_preprocess1(span, ret)
  case span[:key]
  when "uri", "docid", "classification", "keyword"
    val = link_unwrap(Nokogiri::XML.fragment(span[:val])).to_xml
    ret[span[:key].to_sym] << { type: span[:type], val: }
  when "date"
    ret[span[:key].to_sym] << { type: span[:type] || "published",
                                val: span[:val] }
  when "pages", "volume", "issue"
    ret[:extent][span[:key].to_sym] ||= []
    ret[:extent][span[:key].to_sym] << span[:val]
  when "pubplace", "title", "type", "series", "edition", "version",
    "abstract", "language", "script", "locale"
    ret[span[:key].to_sym] = span[:val]
  when "image", "note"
    ret[span[:key].to_sym] << { type: span[:type], val: span[:val] }
  when "in_title"
    ret[:in][:title] = span[:val]
  when "publisher"
    ret[:contrib] << { role: "publisher", entity: "organization",
                       name: span[:val] }
  when "surname", "initials", "givenname", "formatted-initials"
    ret[:contrib] = spans_preprocess_contrib(span, ret[:contrib])
  when "fullname"
    ret[:contrib] = spans_preprocess_fullname(span, ret[:contrib])
  when "organization"
    ret[:contrib] = spans_preprocess_org(span, ret[:contrib])
  when "in_surname", "in_initials", "in_givenname",
    "in_formatted-initials"
    ret[:in][:contrib] ||= []
    span[:key].sub!(/^in_/, "")
    ret[:in][:contrib] =
      spans_preprocess_contrib(span, ret[:in][:contrib])
  when "in_fullname"
    ret[:in][:contrib] ||= []
    span[:key].sub!(/^in_/, "")
    ret[:in][:contrib] =
      spans_preprocess_fullname(span, ret[:in][:contrib])
  when "in_organization"
    ret[:in][:contrib] ||= []
    span[:key].sub!(/^in_/, "")
    ret[:in][:contrib] =
      spans_preprocess_org(span, ret[:in][:contrib])
  else
    msg = "unrecognised key '#{span[:key]}' in " \
          "`span:#{span[:key]}[#{span[:val]}]`"
    @err << { msg: }
  end
end

#span_to_contrib(span, title) ⇒ Object



161
162
163
164
165
166
167
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 161

def span_to_contrib(span, title)
  e = if span[:entity] == "organization"
        "<organization><name>#{span[:name]}</name></organization>"
      else span_to_person(span, title)
      end
  "<contributor><role type='#{span[:role]}'/>#{e}</contributor>"
end

#span_to_date(span) ⇒ Object



143
144
145
146
147
148
149
150
151
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 143

def span_to_date(span)
  val = if /[-–](?=\d{4})/.match?(span[:val])
          from, to = span[:val].split(/[-–](?=\d{4})/, 2)
          "<from>#{from}</from><to>#{to}</to>"
        else "<on>#{span[:val]}</on>"
        end
  type = span[:type] ? " type='#{span[:type]}'" : ""
  "<date#{type}>#{val}</date>"
end

#span_to_docid(span, key) ⇒ Object



136
137
138
139
140
141
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 136

def span_to_docid(span, key)
  if span[:type]
    "<#{key} type='#{span[:type]}'>#{span[:val]}</#{key}>"
  else "<#{key}>#{span[:val]}</#{key}>"
  end
end

#span_to_extent(span, key) ⇒ Object



127
128
129
130
131
132
133
134
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 127

def span_to_extent(span, key)
  values = span.split(/[-–]/)
  ret = "<locality type='#{key}'>" \
        "<referenceFrom>#{values[0]}</referenceFrom>"
  values[1] and
    ret += "<referenceTo>#{values[1]}</referenceTo>"
  "#{ret}</locality>"
end

#span_to_person(span, title) ⇒ Object



176
177
178
179
180
181
182
183
184
185
186
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 176

def span_to_person(span, title)
  validate_span_to_person(span, title)
  pre = (span[:"formatted-initials"] and
               "<formatted-initials>" \
               "#{span[:"formatted-initials"]}</formatted-initials>") ||
    Array(span[:givenname]).map do |x|
      "<forename>#{x}</forename>"
    end.join
  "<person><name>#{pre}<surname>#{span[:surname]}</surname></name>" \
    "</person>"
end

#spans_defaults(spans) ⇒ Object



93
94
95
96
97
# File 'lib/metanorma/cleanup/spans_to_bibitem_preprocessing.rb', line 93

def spans_defaults(spans)
  spans[:language] && !spans[:script] and
    spans[:script] = ::Metanorma::Utils.default_script(spans[:language])
  spans
end

#spans_preprocess(spans) ⇒ Object



37
38
39
40
41
# File 'lib/metanorma/cleanup/spans_to_bibitem_preprocessing.rb', line 37

def spans_preprocess(spans)
  ret = empty_span_hash
  spans.each { |s| span_preprocess1(s, ret) }
  spans_defaults(host_rearrange(ret))
end

#spans_preprocess_contrib(span, contrib) ⇒ Object



111
112
113
114
115
116
117
118
119
120
121
# File 'lib/metanorma/cleanup/spans_to_bibitem_preprocessing.rb', line 111

def spans_preprocess_contrib(span, contrib)
  span[:key] == "initials" and span[:key] = "formatted-initials"
  spans_preprocess_new_contrib?(span, contrib) and
    contrib << { role: span[:type] || "author", entity: "person" }
  if multiple_givennames?(span, contrib)
    contrib[-1][:givenname] = [contrib[-1][:givenname],
                               span[:val]].flatten
  else contrib[-1][span[:key].to_sym] = span[:val]
  end
  contrib
end

#spans_preprocess_fullname(span, contrib) ⇒ Object



140
141
142
143
144
145
146
147
148
149
150
# File 'lib/metanorma/cleanup/spans_to_bibitem_preprocessing.rb', line 140

def spans_preprocess_fullname(span, contrib)
  name = span[:val].gsub(/\.(?=\p{Alpha})/, ". ").split(/ /)
  out = { role: span[:type] || "author", entity: "person",
          surname: name[-1] }
  if name.size > 1 && name[0..-2].all? { |x| /\.$/.match?(x) }
    out[:"formatted-initials"] = name[0..-2].join(" ")
  else out[:givenname] = name[0..-2]
  end
  contrib << out
  contrib
end

#spans_preprocess_new_contrib?(span, contrib) ⇒ Boolean

Returns:

  • (Boolean)


123
124
125
126
127
# File 'lib/metanorma/cleanup/spans_to_bibitem_preprocessing.rb', line 123

def spans_preprocess_new_contrib?(span, contrib)
  contrib.empty? || contrib[-1][:entity] == "organization" ||
    (span[:key] == "surname" && contrib[-1][:surname]) ||
    contrib[-1][:role] != (span[:type] || "author")
end

#spans_preprocess_org(span, contrib) ⇒ Object



152
153
154
155
156
# File 'lib/metanorma/cleanup/spans_to_bibitem_preprocessing.rb', line 152

def spans_preprocess_org(span, contrib)
  contrib << { role: span[:type] || "author", entity: "organization",
               name: span[:val] }
  contrib
end

#spans_to_bibitem(spans) ⇒ Object



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 45

def spans_to_bibitem(spans)
  ret = ""
  spans[:title] and ret += "<title>#{spans[:title]}</title>"
  ret += spans_to_bibitem_docid(spans)
  ret += spans_to_contribs(spans)
  ret += spans_to_bibitem_edn(spans)
  ret += spans_to_bibitem_i18n(spans)
  spans[:abstract] and ret += "<abstract>#{spans[:abstract]}</abstract>"
  ret += spans_to_series(spans)
  ret += spans_to_pubplace(spans)
  ret += spans_to_bibitem_host(spans)
  ret += spans_to_bibitem_extent(spans[:extent])
  spans[:classification]&.each do |s|
    ret += span_to_docid(s, "classification")
  end
  spans[:keyword]&.each do |s|
    ret += span_to_docid(s, "keyword")
  end
  spans[:image]&.each do |s|
    ret += "<depiction>#{s[:val]}</depiction>"
  end
  ret
end

#spans_to_bibitem_docid(spans) ⇒ Object



98
99
100
101
102
103
104
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 98

def spans_to_bibitem_docid(spans)
  ret = ""
  spans[:uri]&.each { |s| ret += span_to_docid(s, "uri") }
  spans[:docid]&.each { |s| ret += span_to_docid(s, "docidentifier") }
  spans[:date]&.each { |s| ret += span_to_date(s) }
  ret
end

#spans_to_bibitem_edn(spans) ⇒ Object



106
107
108
109
110
111
112
113
114
115
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 106

def spans_to_bibitem_edn(spans)
  ret = ""
  spans[:edition] and ret += "<edition>#{spans[:edition]}</edition>"
  spans[:version] and ret += "<version>#{spans[:version]}</version>"
  spans[:note]&.each do |n|
    ret += "<note type='#{n[:type]}'>#{n[:val]}</note>"
      .sub("<note type=''>", "<note>")
  end
  ret
end

#spans_to_bibitem_extent(spans) ⇒ Object



117
118
119
120
121
122
123
124
125
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 117

def spans_to_bibitem_extent(spans)
  spans.nil? and return ""
  ret = ""
  { volume: "volume", issue: "issue", pages: "page" }.each do |k, v|
    spans[k]&.each { |s| ret += span_to_extent(s, v) }
  end
  ret.empty? and return ""
  "<extent>#{ret}</extent>"
end

#spans_to_bibitem_host(spans) ⇒ Object



90
91
92
93
94
95
96
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 90

def spans_to_bibitem_host(spans)
  spans[:in].nil? || spans[:in].empty? and return ""
  ret =
    "<relation type='includedIn'><bibitem type='#{spans[:in][:type]}'>"
  spans[:in].delete(:type)
  ret + "#{spans_to_bibitem(spans[:in])}</bibitem></relation>"
end

#spans_to_bibitem_i18n(spans) ⇒ Object



77
78
79
80
81
82
83
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 77

def spans_to_bibitem_i18n(spans)
  ret = ""
  spans[:language] and ret += "<language>#{spans[:language]}</language>"
  spans[:script] and ret += "<script>#{spans[:script]}</script>"
  spans[:locale] and ret += "<locale>#{spans[:locale]}</locale>"
  ret
end

#spans_to_contribs(spans) ⇒ Object



153
154
155
156
157
158
159
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 153

def spans_to_contribs(spans)
  ret = ""
  spans[:contrib]&.each do |s|
    ret += span_to_contrib(s, spans[:title])
  end
  ret
end

#spans_to_pubplace(spans) ⇒ Object



69
70
71
72
73
74
75
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 69

def spans_to_pubplace(spans)
  ret = ""
  spans[:pubplace] and ret += <<~XML
    <place><formattedPlace>#{spans[:pubplace]}</formattedPlace></place>
  XML
  ret
end

#spans_to_series(spans) ⇒ Object



85
86
87
88
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 85

def spans_to_series(spans)
  spans[:series] or return ""
  "<series><title>#{spans[:series]}</title></series>"
end

#validate_span_to_person(span, title) ⇒ Object



169
170
171
172
173
174
# File 'lib/metanorma/cleanup/spans_to_bibitem.rb', line 169

def validate_span_to_person(span, title)
  span[:surname] and return
  msg = "Missing surname: issue with bibliographic markup " \
        "in \"#{title}\": #{span}"
  @err << { msg:, fatal: true }
end