Class: Relaton::Bipm::RawdataBipmMetrologia::ArticleParser

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb

Constant Summary collapse

ATTRS =
%i[docidentifier title contributor date copyright abstract relation series
extent type source ext].freeze

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(doc, journal, volume, article, errors = {}) ⇒ ArticleParser

Initialize parser

Parameters:

  • doc (Nokogiri::XML::Document)

    XML document

  • journal (String)

    journal

  • volume (String)

    volume

  • article (String)

    article

  • errors (Hash) (defaults to: {})

    errors hash



28
29
30
31
32
33
34
35
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 28

def initialize(doc, journal, volume, article, errors = {})
  @doc = doc.at "/article"
  @journal = journal
  @volume = volume
  @article = article
  @meta = doc.at("/article/front/article-meta")
  @errors = errors
end

Class Method Details

.parse(path, errors = {}) ⇒ Relaton::Bipm::ItemDate

Create new parser and parse document

Parameters:

  • path (String)

    path to XML file

Returns:

  • (Relaton::Bipm::ItemDate)

    document



13
14
15
16
17
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 13

def self.parse(path, errors = {})
  doc = Nokogiri::XML(File.read(path, encoding: "UTF-8"))
  journal, volume, article = path.split("/")[-2].split("_")[1..]
  new(doc, journal, volume, article, errors).parse
end

Instance Method Details

#affiliation(contrib) ⇒ Array<Relaton::Bib::Affiliation>

Parse affiliations

Parameters:

  • contrib (Nokogiri::XML::Element)

    contributor element

Returns:

  • (Array<Relaton::Bib::Affiliation>)

    array of affiliations



163
164
165
166
167
168
169
170
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 163

def affiliation(contrib)
  aff = contrib.xpath("./xref[@ref-type='aff']").map do |x|
    a = @meta.at("./contrib-group/aff[@id='#{x[:rid]}']") # /label/following-sibling::node()")
      parse_affiliation a
  end.compact
  @errors[:article_affiliation] &&= aff.empty?
  aff
end

#bibitem(date, type) ⇒ Relaton::Bipm::BipmBibliographicItem

Create bibitem

Parameters:

  • date (String)
  • type (String)

    date type

Returns:

  • (Relaton::Bipm::BipmBibliographicItem)

    bibitem



399
400
401
402
403
404
405
406
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 399

def bibitem(date, type)
  dt = Relaton::Bib::Date.new(type: type, at: date)
  carrier = type == "epub" ? "online" : "print"
  medium = Relaton::Bib::Medium.new carrier: carrier
  fref = Relaton::Bib::Formattedref.new(content: pubid)
  docid = [create_docidentifier(pubid, "BIPM", true)]
  ItemData.new(formattedref: fref, docidentifier: docid, date: [dt], medium: medium)
end

#citation_bibitem(citation) ⇒ Relaton::Bipm::ItemData

Build bibitem from an element-citation

Parameters:

  • citation (Nokogiri::XML::Element)

    element-citation node

Returns:



364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 364

def citation_bibitem(citation)
  attrs = {}
  doi = citation.at("./pub-id[@pub-id-type='doi']")&.text
  if doi && !doi.empty?
    @errors[:article_citation_doi] &&= false
    attrs[:docidentifier] = [Relaton::Bib::Docidentifier.new(content: doi, type: "doi")]
    attrs[:source] = [Relaton::Bib::Uri.new(content: "https://doi.org/#{doi}", type: "doi")]
  else
    @errors[:article_citation_doi] &&= true
  end
  source = citation.at("./source")&.text
  if source && !source.empty?
    @errors[:article_citation_title] &&= false
    attrs[:title] = [Relaton::Bib::Title.new(content: source)]
  else
    @errors[:article_citation_title] &&= true
  end
  year = citation.at("./year")&.text
  if year && !year.empty?
    @errors[:article_citation_date] &&= false
    attrs[:date] = [Relaton::Bib::Date.new(type: "published", at: year)]
  else
    @errors[:article_citation_date] &&= true
  end
  ItemData.new(**attrs)
end

#create_docidentifier(id, type, primary = nil) ⇒ Relaton::Bib::Docidentifier

Create document identifier

Parameters:

  • id (String)

    document id

  • type (String)

    id type

  • primary (Boolean, nil) (defaults to: nil)

    is primary id

Returns:

  • (Relaton::Bib::Docidentifier)

    document identifier



105
106
107
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 105

def create_docidentifier(id, type, primary = nil)
  Relaton::Bib::Docidentifier.new content: id, type: type, primary: primary
end

#create_organization(contrib) ⇒ Object



147
148
149
150
151
152
153
154
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 147

def create_organization(contrib)
  org = contrib.at("./collab")
  @errors[:article_contributor_organization] &&= org.nil? || org.text.empty?
  return if org.nil? || org.text.empty?

  name = Relaton::Bib::TypedLocalizedString.new(content: org.text)
  Relaton::Bib::Organization.new name: [name]
end

#create_person(contrib) ⇒ Object



139
140
141
142
143
144
145
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 139

def create_person(contrib)
  name = contrib.at("./name")
  @errors[:article_contributor_person] &&= name.nil? || name.text.empty?
  return if name.nil? || name.text.empty?

  Relaton::Bib::Person.new name: fullname(name), affiliation: affiliation(contrib)
end

#date_part(date, type) ⇒ Object



284
285
286
287
288
289
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 284

def date_part(date, type)
  part = date.at("./#{type}")&.text
  return "01" if part.nil? || part.empty?

  part.rjust(2, "0")
end

#dates {|date, type| ... } ⇒ Array<String, Object>

Parse date

Yields:

  • (date, type)

    date and type

Returns:

  • (Array<String, Object>)

    string date or whatever block returns



275
276
277
278
279
280
281
282
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 275

def dates
  @meta.xpath("./pub-date").map do |d|
    month = date_part(d, "month")
    day = date_part(d, "day")
    date = "#{d.at('./year').text}-#{month}-#{day}"
    block_given? ? yield(date, d[:"pub-type"]) : date
  end
end

#fullname(name) ⇒ Relaton::Bib::FullName

Create full name

Parameters:

  • contrib (Nokogiri::XML::Element)

    contributor element

Returns:

  • (Relaton::Bib::FullName)

    full name



224
225
226
227
228
229
230
231
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 224

def fullname(name)
  cname = [name.at("./given-names"), name.at("./surname")].compact.map(&:text).join(" ")
  @errors[:article_fullname] &&= cname.empty?
  return if cname.empty?

  completename = Relaton::Bib::LocalizedString.new content: cname, language: "en", script: "Latn"
  Relaton::Bib::FullName.new completename: completename
end

#journal_titleString

Parse journal title

Returns:

  • (String)

    journal title



88
89
90
91
92
93
94
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 88

def journal_title
  return @journal_title if defined? @journal_title

  @journal_title = @doc.at("./front/journal-meta/journal-title-group/journal-title")&.text
  @errors[:journal_title] &&= @journal_title.nil? || @journal_title.empty?
  @journal_title
end

#parseRelaton::Bipm::ItemData

Create new document

Returns:



42
43
44
45
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 42

def parse
  attrs = ATTRS.to_h { |a| [a, send("parse_#{a}")] }
  ItemData.new(**attrs)
end

#parse_abstractArray<Relaton::Bib::LocalizedMarkedUpString>

Parse abstract

Returns:

  • (Array<Relaton::Bib::LocalizedMarkedUpString>)

    array of abstracts



318
319
320
321
322
323
324
325
326
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 318

def parse_abstract
  result = @meta.xpath("./abstract").map do |a|
    Relaton::Bib::Abstract.new(
      content: a.inner_html, language: a[:"xml:lang"], script: "Latn",
    )
  end
  @errors[:article_abstract] &&= result.empty?
  result
end

#parse_address(aff) ⇒ Object



204
205
206
207
208
209
210
211
212
213
214
215
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 204

def parse_address(aff)
  address = []
  addr = aff.xpath("text()[preceding-sibling::institution]").text.gsub(/^\W*|\W*$/, "")
  address << addr unless addr.empty?
  country = aff.at('country')
  address << country.text if country && !country.text.empty?
  address = address.join(", ")
  @errors[:article_affiliation_address] &&= address.empty?
  return [] if address.empty?

  [Relaton::Bib::Address.new(formatted_address: address)]
end

#parse_affiliation(aff) ⇒ Object



172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 172

def parse_affiliation(aff)
  text = aff.xpath("text()|sup|sub").to_xml.split(",").map(&:strip).reject(&:empty?).join(", ")
  text = CGI::unescapeHTML(text)
  return if text.include?("Permanent address:") || text == "Germany" ||
    text.start_with?("Guest") || text.start_with?("Deceased") ||
    text.include?("Author to whom any correspondence should be addressed")

  args = {}
  institution = aff.at('institution')
  if institution
    name = institution.text
    return if name == "1005 Southover Lane"

    args[:subdivision] = parse_division(aff)
    args[:address] = parse_address(aff)
  else
    name = text
  end
  args[:name] = [Relaton::Bib::TypedLocalizedString.new(content: name)]
  org = Relaton::Bib::Organization.new(**args)
  Relaton::Bib::Affiliation.new(organization: org)
end

#parse_contributorArray<Relaton::Bib::Contributor>

Parse contributor

Returns:

  • (Array<Relaton::Bib::Contributor>)

    array of contributors



129
130
131
132
133
134
135
136
137
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 129

def parse_contributor
  result = @meta.xpath("./contrib-group/contrib").map do |c|
    role = Relaton::Bib::Contributor::Role.new(type: c[:"contrib-type"])
    attrs = { person: create_person(c), organization: create_organization(c), role: [role] }
    Relaton::Bib::Contributor.new(**attrs)
  end
  @errors[:article_contributor] &&= result.empty?
  result
end

Parse copyright

Returns:

  • (Array<Relaton::Bib::Copyright>)

    array of copyright associations



296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 296

def parse_copyright
  result = @meta.xpath("./permissions").each_with_object([]) do |l, m|
    from = l.at("./copyright-year")
    next unless from

    owner = l.at("./copyright-statement").text.split(" & ").map do |c|
      /(?<name>\p{L}+(?:\s\p{L}+)*)/ =~ c
      org_name = Relaton::Bib::TypedLocalizedString.new(content: name, language: "en", script: "Latn")
      org = Relaton::Bib::Organization.new name: [org_name]
      Relaton::Bib::ContributionInfo.new(organization: org)
    end
    m << Relaton::Bib::Copyright.new(owner: owner, from: from.text)
  end
  @errors[:article_copyright] &&= result.empty?
  result
end

#parse_dateArray<Relaton::Bib::Date>

Parse date

Returns:

  • (Array<Relaton::Bib::Date>)

    array of dates



260
261
262
263
264
265
266
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 260

def parse_date
  at = dates.min
  @errors[:article_date] &&= at.nil?
  return [] unless at

  [Relaton::Bib::Date.new(type: "published", at: at)]
end

#parse_division(aff) ⇒ Object



195
196
197
198
199
200
201
202
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 195

def parse_division(aff)
  div = aff.xpath("text()[following-sibling::institution]").text.gsub(/^\W*|\W*$/, "")
  @errors[:article_affiliation_division] &&= div.empty?
  return [] if div.empty?

  name = Relaton::Bib::TypedLocalizedString.new(content: div, language: "en", script: "Latn")
  [Relaton::Bib::Subdivision.new(name: [name])]
end

#parse_docidentifierArray<Relaton::Bib::DocumentIdentifier>

Parse docid

Returns:

  • (Array<Relaton::Bib::DocumentIdentifier>)

    array of document identifiers



52
53
54
55
56
57
58
59
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 52

def parse_docidentifier
  primary_id = create_docidentifier pubid, "BIPM", true
  result = @meta.xpath("./article-id[@pub-id-type='doi']").each_with_object([primary_id]) do |id, m|
    m << create_docidentifier(id.text, id["pub-id-type"])
  end
  @errors[:article_docidentifier] &&= result.empty?
  result
end

#parse_doctypeObject



453
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 453

def parse_doctype = Doctype.new(content: "article")

#parse_extObject



451
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 451

def parse_ext = Ext.new(doctype: parse_doctype)

#parse_extentArray<Relaton::Bib::Extent>

Parse extent

Returns:

  • (Array<Relaton::Bib::Extent>)

    array of extents



423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 423

def parse_extent
  locs = @meta.xpath("./volume|./issue|./fpage").map do |e|
    if e.name == "fpage"
      type = "page"
      to = @meta.at("./lpage")&.text
    else
      type = e.name
    end
    Relaton::Bib::Locality.new type: type, reference_from: e.text, reference_to: to
  end
  @errors[:article_extent] &&= locs.empty?
  return [] if locs.empty?

  [Relaton::Bib::Extent.new(locality: locs)]
end

#parse_referencesArray<Relaton::Bib::Relation>

Parse back/ref-list references as “cites” relations

Returns:

  • (Array<Relaton::Bib::Relation>)

    array of “cites” relations



346
347
348
349
350
351
352
353
354
355
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 346

def parse_references
  refs = @doc.xpath("./back/ref-list/ref").filter_map do |ref|
    citation = ref.at("./element-citation")
    next unless citation

    Relaton::Bib::Relation.new(type: "cites", bibitem: citation_bibitem(citation))
  end
  @errors[:article_references] &&= refs.empty?
  refs
end

#parse_relationArray<Relaton::Bib::Relation>

Parese relation

Returns:

  • (Array<Relaton::Bib::Relation>)

    array of document relations



333
334
335
336
337
338
339
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 333

def parse_relation
  rels = dates do |d, t|
    Relaton::Bib::Relation.new(type: "hasManifestation", bibitem: bibitem(d, t))
  end
  @errors[:article_relation] &&= rels.empty?
  rels + parse_references
end

#parse_seriesArray<Relaton::Bib::Series>

Parse series

Returns:

  • (Array<Relaton::Bib::Series>)

    array of series



413
414
415
416
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 413

def parse_series
  title = Relaton::Bib::Title.new(content: journal_title, language: "en", script: "Latn")
  [Relaton::Bib::Series.new(title: [title])]
end

#parse_sourceObject



441
442
443
444
445
446
447
448
449
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 441

def parse_source
  result = @meta.xpath("./article-id[@pub-id-type='doi']").each_with_object([]) do |l, a|
    url = "https://doi.org/#{l.text}"
    a << Relaton::Bib::Uri.new(content: url, type: "src")
    a << Relaton::Bib::Uri.new(content: url, type: "doi")
  end
  @errors[:article_source] &&= result.empty?
  result
end

#parse_titleArray<Relaton::Bib::TypedTitleString>

Parse title

Returns:

  • (Array<Relaton::Bib::TypedTitleString>)

    array of title strings



114
115
116
117
118
119
120
121
122
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 114

def parse_title
  result = @meta.xpath("./title-group/article-title").map do |t|
    next if t.text.empty?

    Relaton::Bib::Title.new(content: t.inner_html, language: t[:"xml:lang"], script: "Latn")
  end.compact
  @errors[:article_title] &&= result.empty?
  result
end

#parse_typeObject



439
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 439

def parse_type = "article"

#pubidString

Build primary publication identifier string (e.g. “Metrologia 55 1 125”)

Returns:

  • (String)

    pubid



66
67
68
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 66

def pubid
  @pubid ||= "#{journal_title} #{volume_issue_article}"
end

#volume_issue_articleString

Parse volume, issue and page

Returns:

  • (String)

    volume issue page



75
76
77
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 75

def volume_issue_article
  [@journal, @volume, @article].compact.join(" ")
end