Class: Relaton::Bipm::RawdataBipmMetrologia::NisoJatsParser

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb

Constant Summary collapse

ATTRS =
%i[docidentifier title contributor date copyright abstract relation series
extent type source ext].freeze

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(doc, journal, volume, article, errors = {}) ⇒ NisoJatsParser

Returns a new instance of NisoJatsParser.

Parameters:

  • doc (Niso::Jats::Article)

    document

  • journal (String)

    journal

  • volume (String)

    volume

  • article (String)

    article

  • errors (Hash) (defaults to: {})

    errors hash



19
20
21
22
23
24
25
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 19

def initialize(doc, journal, volume, article, errors = {})
  @doc = doc
  @journal = journal
  @volume = volume
  @article = article
  @errors = errors
end

Class Method Details

.parse(path, errors = {}) ⇒ Relaton::Bipm::ItemData

Returns document.

Parameters:

  • path (String)

    path to XML file

  • errors (Hash) (defaults to: {})

    errors hash

Returns:



31
32
33
34
35
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 31

def self.parse(path, errors = {})
  doc = Niso::Jats::Article.from_xml(File.read(path, encoding: "UTF-8"))
  journal, volume, article = path.split("/")[-2].split("_")[1..]
  new(doc, journal, volume, article, errors).parse
end

Instance Method Details

#extract_paragraph_text(paragraph) ⇒ Object



133
134
135
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 133

def extract_paragraph_text(paragraph)
  serialize_mixed_content(paragraph)
end

#parseRelaton::Bipm::ItemData

Returns document.

Returns:



38
39
40
41
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 38

def parse
  attrs = ATTRS.to_h { |a| [a, send("parse_#{a}")] }
  ItemData.new(**attrs)
end

#parse_abstractArray<Relaton::Bib::Abstract>

Returns array of abstracts.

Returns:

  • (Array<Relaton::Bib::Abstract>)

    array of abstracts



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 113

def parse_abstract
  abstracts = @doc.front..abstract
  return [] unless abstracts

  result = abstracts.filter_map do |a|
    content_parts = []
    content_parts << Array(a.title.content).join if a.title
    a.p&.each do |paragraph|
      content_parts << "<p>#{extract_paragraph_text(paragraph)}</p>"
    end
    next if content_parts.empty?

    Relaton::Bib::Abstract.new(
      content: content_parts.join, language: a.lang, script: "Latn",
    )
  end
  @errors[:article_abstract] &&= result.empty?
  result
end

#parse_contributorArray<Relaton::Bib::Contributor>

Returns array of contributors.

Returns:

  • (Array<Relaton::Bib::Contributor>)

    array of contributors



72
73
74
75
76
77
78
79
80
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 72

def parse_contributor
  result = @doc.contributors.map do |contrib|
    role = Relaton::Bib::Contributor::Role.new(type: contrib.contrib_type)
    attrs = { person: create_person(contrib), organization: create_organization(contrib), role: [role] }
    Relaton::Bib::Contributor.new(**attrs)
  end
  @errors[:article_contributor] &&= result.empty?
  result
end

Returns array of copyright associations.

Returns:

  • (Array<Relaton::Bib::Copyright>)

    array of copyright associations



92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 92

def parse_copyright
  permissions = @doc.front..permissions
  return [] unless permissions

  from = permissions.copyright_year.first
  return [] unless from

  owner = permissions.copyright_statement.inject([]) do |acc, cs|
    acc + Array(cs.content).join.split(" & ").map do |c|
      /(?<name>[A-Za-z]+(?:\s[A-Za-z]+)*)/ =~ c
      org_name = Relaton::Bib::TypedLocalizedString.new(content: name, language: "en", script: "Latn")
      org = Relaton::Bib::Organization.new name: [org_name]
      Relaton::Bib::ContributionInfo.new(organization: org)
    end
  end
  result = [Relaton::Bib::Copyright.new(owner: owner, from: from.content)]
  @errors[:article_copyright] &&= result.empty?
  result
end

#parse_dateArray<Relaton::Bib::Date>

Returns array of dates.

Returns:

  • (Array<Relaton::Bib::Date>)

    array of dates



83
84
85
86
87
88
89
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 83

def parse_date
  on = @doc.pub_dates.min
  @errors[:article_date] &&= on.nil?
  return [] unless on

  [Relaton::Bib::Date.new(type: "published", at: on)]
end

#parse_docidentifierArray<Relaton::Bib::Docidentifier>

Returns array of document identifiers.

Returns:

  • (Array<Relaton::Bib::Docidentifier>)

    array of document identifiers



44
45
46
47
48
49
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 44

def parse_docidentifier
  ids = [create_docidentifier(pubid, "BIPM", true)]
  ids << create_docidentifier(@doc.doi, "doi") if @doc.doi
  @errors[:article_docidentifier] &&= ids.empty?
  ids
end

#parse_doctypeObject



209
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 209

def parse_doctype = Doctype.new(content: "article")

#parse_extObject



207
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 207

def parse_ext = Ext.new(doctype: parse_doctype)

#parse_extentArray<Relaton::Bib::Extent>

Returns array of extents.

Returns:

  • (Array<Relaton::Bib::Extent>)

    array of extents



191
192
193
194
195
196
197
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 191

def parse_extent
  locality = @doc.locality.map { |e| Relaton::Bib::Locality.new(type: e[0], reference_from: e[1], reference_to: e[2]) }
  @errors[:article_extent] &&= locality.empty?
  return [] if locality.empty?

  [Relaton::Bib::Extent.new(locality: locality)]
end

#parse_relationArray<Relaton::Bib::Relation>

Returns array of document relations.

Returns:

  • (Array<Relaton::Bib::Relation>)

    array of document relations



170
171
172
173
174
175
176
177
178
179
180
181
182
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 170

def parse_relation
  pub_dates = @doc.front..pub_date
  rels = if pub_dates
           pub_dates.sort_by { |pd| pd.pub_type == "ppub" ? 0 : 1 }.map do |pd|
             type = pd.pub_type == "epub" ? "epub" : "ppub"
             Relaton::Bib::Relation.new(type: "hasManifestation", bibitem: bibitem(pd, type))
           end
         else
           []
         end
  @errors[:article_relation] &&= rels.empty?
  rels
end

#parse_seriesArray<Relaton::Bib::Series>

Returns array of series.

Returns:

  • (Array<Relaton::Bib::Series>)

    array of series



185
186
187
188
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 185

def parse_series
  title = Relaton::Bib::Title.new(content: @doc.journal_title, language: "en", script: "Latn")
  [Relaton::Bib::Series.new(title: [title])]
end

#parse_sourceObject



201
202
203
204
205
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 201

def parse_source
  result = @doc.doi_links.map { |link| Relaton::Bib::Uri.new(**link) }
  @errors[:article_source] &&= result.empty?
  result
end

#parse_titleArray<Relaton::Bib::Title>

Returns array of title strings.

Returns:

  • (Array<Relaton::Bib::Title>)

    array of title strings



62
63
64
65
66
67
68
69
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 62

def parse_title
  title = @doc.front..title_group.article_title
  result = [Relaton::Bib::Title.new(
    content: serialize_mixed_content(title), language: title.lang, script: "Latn",
  )]
  @errors[:article_title] &&= result.empty?
  result
end

#parse_typeObject



199
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 199

def parse_type = "article"

#pubidString

Returns primary BIPM publication identifier.

Returns:

  • (String)

    primary BIPM publication identifier



52
53
54
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 52

def pubid
  @pubid ||= "#{@doc.journal_title} #{volume_issue_article}"
end

#serialize_mixed_content(element) ⇒ Object

Reconstruct the marked-up string of a niso-jats mixed_content element (Title, Paragraph, …) by walking element_order in document order. Text nodes are emitted verbatim; recognised inline children are wrapped in their original XML tag so JATS markup like <italic> and <sub> survives into the relaton-bib payload instead of being flattened (paragraphs) or serialised as a stringified Array (titles).



143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 143

def serialize_mixed_content(element)
  return "" unless element.respond_to?(:element_order) && element.element_order

  pools  = INLINE_TYPES.to_h { |t| [t, element.send(t).to_a.dup] }
  cursor = Hash.new(0)
  out    = []
  element.element_order.each do |el|
    case el.type
    when "Text"
      out << el.text_content
    when "Element"
      attr = el.name.tr("-", "_").to_sym
      next unless pools.key?(attr)

      inst = pools[attr][cursor[attr]]
      cursor[attr] += 1
      next unless inst.respond_to?(:content)

      inner = inst.content
      inner = inner.join if inner.is_a?(Array)
      out << "<#{el.name}>#{inner}</#{el.name}>"
    end
  end
  out.join
end

#volume_issue_articleString

Returns volume issue page.

Returns:

  • (String)

    volume issue page



57
58
59
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/niso_jats_parser.rb', line 57

def volume_issue_article
  [@journal, @volume, @article].compact.join(" ")
end