Class: Relaton::Bipm::RawdataBipmMetrologia::ArticleParser
- Inherits:
-
Object
- Object
- Relaton::Bipm::RawdataBipmMetrologia::ArticleParser
- Defined in:
- lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb
Constant Summary collapse
- ATTRS =
%i[docidentifier title contributor date copyright abstract relation series extent type source ext].freeze
Class Method Summary collapse
-
.parse(path, errors = {}) ⇒ Relaton::Bipm::ItemDate
Create new parser and parse document.
Instance Method Summary collapse
-
#affiliation(contrib) ⇒ Array<Relaton::Bib::Affiliation>
Parse affiliations.
-
#bibitem(date, type) ⇒ Relaton::Bipm::BipmBibliographicItem
Create bibitem.
-
#citation_bibitem(citation) ⇒ Relaton::Bipm::ItemData
Build bibitem from an element-citation.
-
#create_docidentifier(id, type, primary = nil) ⇒ Relaton::Bib::Docidentifier
Create document identifier.
- #create_organization(contrib) ⇒ Object
- #create_person(contrib) ⇒ Object
- #date_part(date, type) ⇒ Object
-
#dates {|date, type| ... } ⇒ Array<String, Object>
Parse date.
-
#fullname(name) ⇒ Relaton::Bib::FullName
Create full name.
-
#initialize(doc, journal, volume, article, errors = {}) ⇒ ArticleParser
constructor
Initialize parser.
-
#journal_title ⇒ String
Parse journal title.
-
#parse ⇒ Relaton::Bipm::ItemData
Create new document.
-
#parse_abstract ⇒ Array<Relaton::Bib::LocalizedMarkedUpString>
Parse abstract.
- #parse_address(aff) ⇒ Object
- #parse_affiliation(aff) ⇒ Object
-
#parse_contributor ⇒ Array<Relaton::Bib::Contributor>
Parse contributor.
-
#parse_copyright ⇒ Array<Relaton::Bib::Copyright>
Parse copyright.
-
#parse_date ⇒ Array<Relaton::Bib::Date>
Parse date.
- #parse_division(aff) ⇒ Object
-
#parse_docidentifier ⇒ Array<Relaton::Bib::DocumentIdentifier>
Parse docid.
- #parse_doctype ⇒ Object
- #parse_ext ⇒ Object
-
#parse_extent ⇒ Array<Relaton::Bib::Extent>
Parse extent.
-
#parse_references ⇒ Array<Relaton::Bib::Relation>
Parse back/ref-list references as “cites” relations.
-
#parse_relation ⇒ Array<Relaton::Bib::Relation>
Parese relation.
-
#parse_series ⇒ Array<Relaton::Bib::Series>
Parse series.
- #parse_source ⇒ Object
-
#parse_title ⇒ Array<Relaton::Bib::TypedTitleString>
Parse title.
- #parse_type ⇒ Object
-
#pubid ⇒ String
Build primary publication identifier string (e.g. “Metrologia 55 1 125”).
-
#volume_issue_article ⇒ String
Parse volume, issue and page.
Constructor Details
#initialize(doc, journal, volume, article, errors = {}) ⇒ ArticleParser
Initialize parser
28 29 30 31 32 33 34 35 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 28 def initialize(doc, journal, volume, article, errors = {}) @doc = doc.at "/article" @journal = journal @volume = volume @article = article @meta = doc.at("/article/front/article-meta") @errors = errors end |
Class Method Details
.parse(path, errors = {}) ⇒ Relaton::Bipm::ItemDate
Create new parser and parse document
13 14 15 16 17 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 13 def self.parse(path, errors = {}) doc = Nokogiri::XML(File.read(path, encoding: "UTF-8")) journal, volume, article = path.split("/")[-2].split("_")[1..] new(doc, journal, volume, article, errors).parse end |
Instance Method Details
#affiliation(contrib) ⇒ Array<Relaton::Bib::Affiliation>
Parse affiliations
163 164 165 166 167 168 169 170 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 163 def affiliation(contrib) aff = contrib.xpath("./xref[@ref-type='aff']").map do |x| a = @meta.at("./contrib-group/aff[@id='#{x[:rid]}']") # /label/following-sibling::node()") parse_affiliation a end.compact @errors[:article_affiliation] &&= aff.empty? aff end |
#bibitem(date, type) ⇒ Relaton::Bipm::BipmBibliographicItem
Create bibitem
399 400 401 402 403 404 405 406 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 399 def bibitem(date, type) dt = Relaton::Bib::Date.new(type: type, at: date) carrier = type == "epub" ? "online" : "print" medium = Relaton::Bib::Medium.new carrier: carrier fref = Relaton::Bib::Formattedref.new(content: pubid) docid = [create_docidentifier(pubid, "BIPM", true)] ItemData.new(formattedref: fref, docidentifier: docid, date: [dt], medium: medium) end |
#citation_bibitem(citation) ⇒ Relaton::Bipm::ItemData
Build bibitem from an element-citation
364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 364 def citation_bibitem(citation) attrs = {} doi = citation.at("./pub-id[@pub-id-type='doi']")&.text if doi && !doi.empty? @errors[:article_citation_doi] &&= false attrs[:docidentifier] = [Relaton::Bib::Docidentifier.new(content: doi, type: "doi")] attrs[:source] = [Relaton::Bib::Uri.new(content: "https://doi.org/#{doi}", type: "doi")] else @errors[:article_citation_doi] &&= true end source = citation.at("./source")&.text if source && !source.empty? @errors[:article_citation_title] &&= false attrs[:title] = [Relaton::Bib::Title.new(content: source)] else @errors[:article_citation_title] &&= true end year = citation.at("./year")&.text if year && !year.empty? @errors[:article_citation_date] &&= false attrs[:date] = [Relaton::Bib::Date.new(type: "published", at: year)] else @errors[:article_citation_date] &&= true end ItemData.new(**attrs) end |
#create_docidentifier(id, type, primary = nil) ⇒ Relaton::Bib::Docidentifier
Create document identifier
105 106 107 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 105 def create_docidentifier(id, type, primary = nil) Relaton::Bib::Docidentifier.new content: id, type: type, primary: primary end |
#create_organization(contrib) ⇒ Object
147 148 149 150 151 152 153 154 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 147 def create_organization(contrib) org = contrib.at("./collab") @errors[:article_contributor_organization] &&= org.nil? || org.text.empty? return if org.nil? || org.text.empty? name = Relaton::Bib::TypedLocalizedString.new(content: org.text) Relaton::Bib::Organization.new name: [name] end |
#create_person(contrib) ⇒ Object
139 140 141 142 143 144 145 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 139 def create_person(contrib) name = contrib.at("./name") @errors[:article_contributor_person] &&= name.nil? || name.text.empty? return if name.nil? || name.text.empty? Relaton::Bib::Person.new name: fullname(name), affiliation: affiliation(contrib) end |
#date_part(date, type) ⇒ Object
284 285 286 287 288 289 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 284 def date_part(date, type) part = date.at("./#{type}")&.text return "01" if part.nil? || part.empty? part.rjust(2, "0") end |
#dates {|date, type| ... } ⇒ Array<String, Object>
Parse date
275 276 277 278 279 280 281 282 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 275 def dates @meta.xpath("./pub-date").map do |d| month = date_part(d, "month") day = date_part(d, "day") date = "#{d.at('./year').text}-#{month}-#{day}" block_given? ? yield(date, d[:"pub-type"]) : date end end |
#fullname(name) ⇒ Relaton::Bib::FullName
Create full name
224 225 226 227 228 229 230 231 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 224 def fullname(name) cname = [name.at("./given-names"), name.at("./surname")].compact.map(&:text).join(" ") @errors[:article_fullname] &&= cname.empty? return if cname.empty? completename = Relaton::Bib::LocalizedString.new content: cname, language: "en", script: "Latn" Relaton::Bib::FullName.new completename: completename end |
#journal_title ⇒ String
Parse journal title
88 89 90 91 92 93 94 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 88 def journal_title return @journal_title if defined? @journal_title @journal_title = @doc.at("./front/journal-meta/journal-title-group/journal-title")&.text @errors[:journal_title] &&= @journal_title.nil? || @journal_title.empty? @journal_title end |
#parse ⇒ Relaton::Bipm::ItemData
Create new document
42 43 44 45 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 42 def parse attrs = ATTRS.to_h { |a| [a, send("parse_#{a}")] } ItemData.new(**attrs) end |
#parse_abstract ⇒ Array<Relaton::Bib::LocalizedMarkedUpString>
Parse abstract
318 319 320 321 322 323 324 325 326 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 318 def parse_abstract result = @meta.xpath("./abstract").map do |a| Relaton::Bib::Abstract.new( content: a.inner_html, language: a[:"xml:lang"], script: "Latn", ) end @errors[:article_abstract] &&= result.empty? result end |
#parse_address(aff) ⇒ Object
204 205 206 207 208 209 210 211 212 213 214 215 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 204 def parse_address(aff) address = [] addr = aff.xpath("text()[preceding-sibling::institution]").text.gsub(/^\W*|\W*$/, "") address << addr unless addr.empty? country = aff.at('country') address << country.text if country && !country.text.empty? address = address.join(", ") @errors[:article_affiliation_address] &&= address.empty? return [] if address.empty? [Relaton::Bib::Address.new(formatted_address: address)] end |
#parse_affiliation(aff) ⇒ Object
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 172 def parse_affiliation(aff) text = aff.xpath("text()|sup|sub").to_xml.split(",").map(&:strip).reject(&:empty?).join(", ") text = CGI::unescapeHTML(text) return if text.include?("Permanent address:") || text == "Germany" || text.start_with?("Guest") || text.start_with?("Deceased") || text.include?("Author to whom any correspondence should be addressed") args = {} institution = aff.at('institution') if institution name = institution.text return if name == "1005 Southover Lane" args[:subdivision] = parse_division(aff) args[:address] = parse_address(aff) else name = text end args[:name] = [Relaton::Bib::TypedLocalizedString.new(content: name)] org = Relaton::Bib::Organization.new(**args) Relaton::Bib::Affiliation.new(organization: org) end |
#parse_contributor ⇒ Array<Relaton::Bib::Contributor>
Parse contributor
129 130 131 132 133 134 135 136 137 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 129 def parse_contributor result = @meta.xpath("./contrib-group/contrib").map do |c| role = Relaton::Bib::Contributor::Role.new(type: c[:"contrib-type"]) attrs = { person: create_person(c), organization: create_organization(c), role: [role] } Relaton::Bib::Contributor.new(**attrs) end @errors[:article_contributor] &&= result.empty? result end |
#parse_copyright ⇒ Array<Relaton::Bib::Copyright>
Parse copyright
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 296 def parse_copyright result = @meta.xpath("./permissions").each_with_object([]) do |l, m| from = l.at("./copyright-year") next unless from owner = l.at("./copyright-statement").text.split(" & ").map do |c| /(?<name>\p{L}+(?:\s\p{L}+)*)/ =~ c org_name = Relaton::Bib::TypedLocalizedString.new(content: name, language: "en", script: "Latn") org = Relaton::Bib::Organization.new name: [org_name] Relaton::Bib::ContributionInfo.new(organization: org) end m << Relaton::Bib::Copyright.new(owner: owner, from: from.text) end @errors[:article_copyright] &&= result.empty? result end |
#parse_date ⇒ Array<Relaton::Bib::Date>
Parse date
260 261 262 263 264 265 266 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 260 def parse_date at = dates.min @errors[:article_date] &&= at.nil? return [] unless at [Relaton::Bib::Date.new(type: "published", at: at)] end |
#parse_division(aff) ⇒ Object
195 196 197 198 199 200 201 202 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 195 def parse_division(aff) div = aff.xpath("text()[following-sibling::institution]").text.gsub(/^\W*|\W*$/, "") @errors[:article_affiliation_division] &&= div.empty? return [] if div.empty? name = Relaton::Bib::TypedLocalizedString.new(content: div, language: "en", script: "Latn") [Relaton::Bib::Subdivision.new(name: [name])] end |
#parse_docidentifier ⇒ Array<Relaton::Bib::DocumentIdentifier>
Parse docid
52 53 54 55 56 57 58 59 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 52 def parse_docidentifier primary_id = create_docidentifier pubid, "BIPM", true result = @meta.xpath("./article-id[@pub-id-type='doi']").each_with_object([primary_id]) do |id, m| m << create_docidentifier(id.text, id["pub-id-type"]) end @errors[:article_docidentifier] &&= result.empty? result end |
#parse_doctype ⇒ Object
453 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 453 def parse_doctype = Doctype.new(content: "article") |
#parse_ext ⇒ Object
451 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 451 def parse_ext = Ext.new(doctype: parse_doctype) |
#parse_extent ⇒ Array<Relaton::Bib::Extent>
Parse extent
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 423 def parse_extent locs = @meta.xpath("./volume|./issue|./fpage").map do |e| if e.name == "fpage" type = "page" to = @meta.at("./lpage")&.text else type = e.name end Relaton::Bib::Locality.new type: type, reference_from: e.text, reference_to: to end @errors[:article_extent] &&= locs.empty? return [] if locs.empty? [Relaton::Bib::Extent.new(locality: locs)] end |
#parse_references ⇒ Array<Relaton::Bib::Relation>
Parse back/ref-list references as “cites” relations
346 347 348 349 350 351 352 353 354 355 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 346 def parse_references refs = @doc.xpath("./back/ref-list/ref").filter_map do |ref| citation = ref.at("./element-citation") next unless citation Relaton::Bib::Relation.new(type: "cites", bibitem: citation_bibitem(citation)) end @errors[:article_references] &&= refs.empty? refs end |
#parse_relation ⇒ Array<Relaton::Bib::Relation>
Parese relation
333 334 335 336 337 338 339 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 333 def parse_relation rels = dates do |d, t| Relaton::Bib::Relation.new(type: "hasManifestation", bibitem: bibitem(d, t)) end @errors[:article_relation] &&= rels.empty? rels + parse_references end |
#parse_series ⇒ Array<Relaton::Bib::Series>
Parse series
413 414 415 416 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 413 def parse_series title = Relaton::Bib::Title.new(content: journal_title, language: "en", script: "Latn") [Relaton::Bib::Series.new(title: [title])] end |
#parse_source ⇒ Object
441 442 443 444 445 446 447 448 449 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 441 def parse_source result = @meta.xpath("./article-id[@pub-id-type='doi']").each_with_object([]) do |l, a| url = "https://doi.org/#{l.text}" a << Relaton::Bib::Uri.new(content: url, type: "src") a << Relaton::Bib::Uri.new(content: url, type: "doi") end @errors[:article_source] &&= result.empty? result end |
#parse_title ⇒ Array<Relaton::Bib::TypedTitleString>
Parse title
114 115 116 117 118 119 120 121 122 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 114 def parse_title result = @meta.xpath("./title-group/article-title").map do |t| next if t.text.empty? Relaton::Bib::Title.new(content: t.inner_html, language: t[:"xml:lang"], script: "Latn") end.compact @errors[:article_title] &&= result.empty? result end |
#parse_type ⇒ Object
439 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 439 def parse_type = "article" |
#pubid ⇒ String
Build primary publication identifier string (e.g. “Metrologia 55 1 125”)
66 67 68 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 66 def pubid @pubid ||= "#{journal_title} #{volume_issue_article}" end |
#volume_issue_article ⇒ String
Parse volume, issue and page
75 76 77 |
# File 'lib/relaton/bipm/rawdata_bipm_metrologia/article_parser.rb', line 75 def volume_issue_article [@journal, @volume, @article].compact.join(" ") end |