Class: RelatonIec::DataParser
- Inherits:
-
Object
- Object
- RelatonIec::DataParser
- Defined in:
- lib/relaton_iec/data_parser.rb
Constant Summary collapse
- DOMAIN =
"https://webstore.iec.ch"- ATTRS =
%i[ docid structuredidentifier language script title doctype ics date contributor editorialgroup abstract copyright link relation ].freeze
- ABBREVS =
{ "ISO" => ["International Organization for Standardization", "www.iso.org"], "IEC" => ["International Electrotechnical Commission", "www.iec.ch"], "IEEE" => ["Institute of Electrical and Electronics Engineers", "www.ieee.org"], "ASTM" => ["American Society of Testing Materials", "www.astm.org"], "CISPR" => ["International special committee on radio interference", "www.iec.ch"], }.freeze
- DOCTYPES =
{ "IS" => "international-standard", "TR" => "technical-report", "TS" => "technical-specification", "PAS" => "publicly-available-specification", "SRD" => "system-reference-deliverable", }
Instance Method Summary collapse
-
#abstract ⇒ Array<RelatonBib::FormattedString>
Parse abstract.
-
#contributor ⇒ Array<Hash>
Parse contributors.
- #copyright ⇒ Array<Hash>
-
#create_relations(doc) ⇒ Array<Hash>
Create relations.
-
#date ⇒ Array<RelatonBib::BibliographicDate>
Parse dates.
-
#docid ⇒ Array<RelatonBib::DocumentIdentifier>
Parse document identifiers.
-
#doctype ⇒ String
Parse document type.
-
#editorialgroup ⇒ Hash
Parse editorial group.
-
#ics ⇒ Array<RelatonIsoBib::Ics>
Fetche ics.
-
#initialize(pub) ⇒ DataParser
constructor
Initialize new instance.
-
#lang_to_script(lang) ⇒ String
Detect script.
-
#language ⇒ Array<String>
Parse languages.
-
#link ⇒ Array<RelatonBib::TypedUri>
Parse links.
-
#parse ⇒ RelatonIec::IecBibliographicItem
Parse document.
-
#relation ⇒ Array<RelatonBib::DocumentRelation>
Parse relation.
-
#script ⇒ Array<String>
Parse scripts.
-
#structuredidentifier ⇒ RelatonIsoBib::StructuredIdentifier
Parse structured identifier.
-
#title ⇒ RelatonBib::TypedTitleStringCollection
Parse titles.
-
#urn_id ⇒ String
Extract URN ID from URN.
Constructor Details
#initialize(pub) ⇒ DataParser
Initialize new instance.
31 32 33 |
# File 'lib/relaton_iec/data_parser.rb', line 31 def initialize(pub) @pub = pub end |
Instance Method Details
#abstract ⇒ Array<RelatonBib::FormattedString>
Parse abstract.
144 145 146 147 148 149 150 151 |
# File 'lib/relaton_iec/data_parser.rb', line 144 def abstract @pub["abstract"]&.map do |a| RelatonBib::FormattedString.new( content: a["content"], language: a["lang"], script: lang_to_script(a["lang"]), format: a["format"] ) end end |
#contributor ⇒ Array<Hash>
Parse contributors.
203 204 205 206 207 208 209 |
# File 'lib/relaton_iec/data_parser.rb', line 203 def contributor @pub["reference"].sub(/\s.*/, "").split("/").map do |abbrev| name, url = ABBREVS[abbrev] { entity: { name: name, url: url, abbreviation: abbrev }, role: [type: "publisher"] } end end |
#copyright ⇒ Array<Hash>
154 155 156 157 158 159 160 161 162 163 164 165 |
# File 'lib/relaton_iec/data_parser.rb', line 154 def copyright # rubocop:disable Metrics/AbcSize from = @pub["reference"].match(/(?<=:)\d{4}/).to_s from = @pub["releaseDate"]&.match(/\d{4}/).to_s if from.empty? return [] if from.nil? || from.empty? abbreviation = @pub["reference"].match(/.*?(?=\s)/).to_s owner = abbreviation.split("/").map do |abbrev| name, url = ABBREVS[abbrev] { name: name, abbreviation: abbrev, url: url } end [{ owner: owner, from: from }] end |
#create_relations(doc) ⇒ Array<Hash>
Create relations.
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 |
# File 'lib/relaton_iec/data_parser.rb', line 272 def create_relations(doc) # rubocop:disable Metrics/MethodLength doc.xpath('//ROW[STATUS[.!="PREPARING" and .!="PUBLISHED"]]') .map do |r| r_type = r.at("STATUS").text.downcase type = case r_type when "revised", "replaced" then "updates" when "withdrawn" then "obsoletes" else r_type end ref = r.at("FULL_NAME").text fref = RelatonBib::FormattedRef.new content: ref, format: "text/plain" docid = RelatonBib::DocumentIdentifier.new(id: ref, type: "IEC", primary: true) bibitem = IecBibliographicItem.new(formattedref: fref, docid: [docid]) RelatonBib::DocumentRelation.new type: type, bibitem: bibitem end end |
#date ⇒ Array<RelatonBib::BibliographicDate>
Parse dates.
185 186 187 188 189 190 191 192 193 194 195 196 |
# File 'lib/relaton_iec/data_parser.rb', line 185 def date { "published" => "publicationDate", "stable-until" => "stabilityDate", "confirmed" => "confirmationDate", "obsoleted" => "dateOfWithdrawal", }.reduce([]) do |a, (k, v)| next a unless @pub[v] a << RelatonBib::BibliographicDate.new(type: k, on: @pub[v]) end end |
#docid ⇒ Array<RelatonBib::DocumentIdentifier>
Parse document identifiers.
54 55 56 57 58 59 |
# File 'lib/relaton_iec/data_parser.rb', line 54 def docid ids = [] ids << RelatonBib::DocumentIdentifier.new(id: @pub["reference"], type: "IEC", primary: true) urnid = "urn:#{@pub['urnAlt'][0]}" ids << RelatonBib::DocumentIdentifier.new(id: urnid, type: "URN") end |
#doctype ⇒ String
Parse document type.
241 242 243 244 |
# File 'lib/relaton_iec/data_parser.rb', line 241 def doctype type = DOCTYPES[@pub["stdType"]] || @pub["stdType"].downcase DocumentType.new type: type end |
#editorialgroup ⇒ Hash
Parse editorial group.
126 127 128 129 130 131 132 133 134 135 136 137 |
# File 'lib/relaton_iec/data_parser.rb', line 126 def editorialgroup return unless @pub["committee"] wg = @pub["committee"]["reference"] { technical_committee: [{ name: wg, type: "technicalCommittee", number: wg.match(/\d+/)&.to_s&.to_i, }], } end |
#ics ⇒ Array<RelatonIsoBib::Ics>
Fetche ics.
172 173 174 175 176 177 178 |
# File 'lib/relaton_iec/data_parser.rb', line 172 def ics return [] unless @pub["classifications"] @pub["classifications"].select { |c| c["type"] == "ICS" }.map do |c| RelatonIsoBib::Ics.new(c["value"]) end end |
#lang_to_script(lang) ⇒ String
Detect script.
102 103 104 105 106 |
# File 'lib/relaton_iec/data_parser.rb', line 102 def lang_to_script(lang) case lang when "en", "fr", "es" then "Latn" end end |
#language ⇒ Array<String>
Parse languages.
79 80 81 |
# File 'lib/relaton_iec/data_parser.rb', line 79 def language @pub["title"].map { |t| t["lang"] }.uniq end |
#link ⇒ Array<RelatonBib::TypedUri>
Parse links.
216 217 218 219 220 221 222 223 224 225 |
# File 'lib/relaton_iec/data_parser.rb', line 216 def link url = "#{DOMAIN}/publication/#{urn_id}" l = [RelatonBib::TypedUri.new(content: url, type: "src")] RelatonBib.array(@pub["releaseItems"]).each_with_object(l) do |r, a| next unless r["type"] == "PREVIEW" url = "#{DOMAIN}/preview/#{r['contentRef']['fileName']}" a << RelatonBib::TypedUri.new(content: url, type: "obp") end end |
#parse ⇒ RelatonIec::IecBibliographicItem
Parse document.
40 41 42 43 44 45 46 47 |
# File 'lib/relaton_iec/data_parser.rb', line 40 def parse # rubocop:disable Metrics/AbcSize args = ATTRS.each_with_object({}) { |a, h| h[a] = send a } args[:docstatus] = RelatonBib::DocumentStatus.new stage: @pub["status"] args[:edition] = @pub["edition"] args[:price_code] = @pub["priceInfo"]["priceCode"] args[:place] = ["Geneva"] IecBibliographicItem.new(**args) end |
#relation ⇒ Array<RelatonBib::DocumentRelation>
Parse relation.
251 252 253 254 255 256 257 258 259 260 261 262 263 |
# File 'lib/relaton_iec/data_parser.rb', line 251 def relation # rubocop:disable Metrics/MethodLength try = 0 begin uri = URI "#{DOMAIN}/webstore/webstore.nsf/AjaxRequestXML?" \ "Openagent&url=#{urn_id}" resp = Net::HTTP.get_response uri doc = Nokogiri::XML resp.body create_relations doc rescue StandardError => e try += 1 try < 3 ? retry : raise(e) end end |
#script ⇒ Array<String>
Parse scripts.
88 89 90 91 92 93 |
# File 'lib/relaton_iec/data_parser.rb', line 88 def script language.each_with_object([]) do |l, s| scr = lang_to_script l s << scr if scr && !s.include?(scr) end end |
#structuredidentifier ⇒ RelatonIsoBib::StructuredIdentifier
Parse structured identifier.
66 67 68 69 70 71 72 |
# File 'lib/relaton_iec/data_parser.rb', line 66 def structuredidentifier urn = @pub.dig("project", "urn") return unless urn pnum = urn.split(":").last RelatonIsoBib::StructuredIdentifier.new(project_number: pnum, type: "IEC") end |
#title ⇒ RelatonBib::TypedTitleStringCollection
Parse titles.
113 114 115 116 117 118 119 |
# File 'lib/relaton_iec/data_parser.rb', line 113 def title @pub["title"].reduce(RelatonBib::TypedTitleStringCollection.new) do |a, t| a + RelatonBib::TypedTitleString.from_string( t["value"], t["lang"], lang_to_script(t["lang"]) ) end end |
#urn_id ⇒ String
Extract URN ID from URN.
232 233 234 |
# File 'lib/relaton_iec/data_parser.rb', line 232 def urn_id @pub["urn"].split(":").last end |