Class: RelatonIec::DataParser
- Inherits:
-
Object
- Object
- RelatonIec::DataParser
- Defined in:
- lib/relaton_iec/data_parser.rb
Constant Summary collapse
- DOMAIN =
"https://webstore.iec.ch"- ATTRS =
%i[ docid structuredidentifier language script title doctype ics date contributor editorialgroup abstract copyright link relation ].freeze
- ABBREVS =
{ "ISO" => ["International Organization for Standardization", "www.iso.org"], "IEC" => ["International Electrotechnical Commission", "www.iec.ch"], "IEEE" => ["Institute of Electrical and Electronics Engineers", "www.ieee.org"], "ASTM" => ["American Society of Testing Materials", "www.astm.org"], "CISPR" => ["International special committee on radio interference", "www.iec.ch"], }.freeze
- DOCTYPES =
{ "IS" => "international-standard", "TR" => "technical-report", "TS" => "technical-specification", "PAS" => "publicly-available-specification", "SRD" => "system-reference-deliverable", }
Instance Method Summary collapse
-
#abstract ⇒ Array<RelatonBib::FormattedString>
Parse abstract.
-
#contributor ⇒ Array<Hash>
Parse contributors.
- #copyright ⇒ Array<Hash>
-
#create_relations(doc) ⇒ Array<Hash>
Create relations.
-
#date ⇒ Array<RelatonBib::BibliographicDate>
Parse dates.
-
#docid ⇒ Array<RelatonBib::DocumentIdentifier>
Parse document identifiers.
-
#doctype ⇒ String
Parse document type.
-
#editorialgroup ⇒ Hash
Parse editorial group.
-
#ics ⇒ Array<RelatonIsoBib::Ics>
Fetche ics.
-
#initialize(pub) ⇒ DataParser
constructor
Initialize new instance.
-
#lang_to_script(lang) ⇒ String
Detect script.
-
#language ⇒ Array<String>
Parse languages.
-
#link ⇒ Array<RelatonBib::TypedUri>
Parse links.
-
#parse ⇒ RelatonIec::IecBibliographicItem
Parse document.
-
#relation ⇒ Array<RelatonBib::DocumentRelation>
Parse relation.
-
#script ⇒ Array<String>
Parse scripts.
-
#structuredidentifier ⇒ RelatonIsoBib::StructuredIdentifier
Parse structured identifier.
-
#title ⇒ RelatonBib::TypedTitleStringCollection
Parse titles.
-
#urn_id ⇒ String
Extract URN ID from URN.
Constructor Details
#initialize(pub) ⇒ DataParser
Initialize new instance.
31 32 33 |
# File 'lib/relaton_iec/data_parser.rb', line 31 def initialize(pub) @pub = pub end |
Instance Method Details
#abstract ⇒ Array<RelatonBib::FormattedString>
Parse abstract.
149 150 151 152 153 154 155 156 |
# File 'lib/relaton_iec/data_parser.rb', line 149 def abstract @pub["abstract"]&.map do |a| RelatonBib::FormattedString.new( content: a["content"], language: a["lang"], script: lang_to_script(a["lang"]), format: a["format"] ) end end |
#contributor ⇒ Array<Hash>
Parse contributors.
208 209 210 211 212 213 214 |
# File 'lib/relaton_iec/data_parser.rb', line 208 def contributor @pub["reference"].sub(/\s.*/, "").split("/").map do |abbrev| name, url = ABBREVS[abbrev] { entity: { name: name, url: url, abbreviation: abbrev }, role: [type: "publisher"] } end end |
#copyright ⇒ Array<Hash>
159 160 161 162 163 164 165 166 167 168 169 170 |
# File 'lib/relaton_iec/data_parser.rb', line 159 def copyright # rubocop:disable Metrics/AbcSize from = @pub["reference"].match(/(?<=:)\d{4}/).to_s from = @pub["releaseDate"]&.match(/\d{4}/).to_s if from.empty? return [] if from.nil? || from.empty? abbreviation = @pub["reference"].match(/.*?(?=\s)/).to_s owner = abbreviation.split("/").map do |abbrev| name, url = ABBREVS[abbrev] { name: name, abbreviation: abbrev, url: url } end [{ owner: owner, from: from }] end |
#create_relations(doc) ⇒ Array<Hash>
Create relations.
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 |
# File 'lib/relaton_iec/data_parser.rb', line 277 def create_relations(doc) # rubocop:disable Metrics/MethodLength doc.xpath('//ROW[STATUS[.!="PREPARING" and .!="PUBLISHED"]]') .map do |r| r_type = r.at("STATUS").text.downcase type = case r_type when "revised", "replaced" then "updates" when "withdrawn" then "obsoletes" else r_type end ref = r.at("FULL_NAME").text fref = RelatonBib::FormattedRef.new content: ref, format: "text/plain" docid = RelatonBib::DocumentIdentifier.new(id: ref, type: "IEC", primary: true) bibitem = IecBibliographicItem.new(formattedref: fref, docid: [docid]) RelatonBib::DocumentRelation.new type: type, bibitem: bibitem end end |
#date ⇒ Array<RelatonBib::BibliographicDate>
Parse dates.
190 191 192 193 194 195 196 197 198 199 200 201 |
# File 'lib/relaton_iec/data_parser.rb', line 190 def date { "published" => "publicationDate", "stable-until" => "stabilityDate", "confirmed" => "confirmationDate", "obsoleted" => "dateOfWithdrawal", }.reduce([]) do |a, (k, v)| next a unless @pub[v] a << RelatonBib::BibliographicDate.new(type: k, on: @pub[v]) end end |
#docid ⇒ Array<RelatonBib::DocumentIdentifier>
Parse document identifiers.
54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/relaton_iec/data_parser.rb', line 54 def docid ids = [] begin pubid = Pubid::Iec::Identifier.parse(@pub["reference"]) ids << DocumentIdentifier.new(id: pubid, type: "IEC", primary: true) rescue StandardError ids << DocumentIdentifier.new(id: @pub["reference"], type: "IEC", primary: true) end urnid = "urn:#{@pub['urnAlt'][0]}" ids << DocumentIdentifier.new(id: urnid, type: "URN") end |
#doctype ⇒ String
Parse document type.
246 247 248 249 |
# File 'lib/relaton_iec/data_parser.rb', line 246 def doctype type = DOCTYPES[@pub["stdType"]] || @pub["stdType"].downcase DocumentType.new type: type end |
#editorialgroup ⇒ Hash
Parse editorial group.
131 132 133 134 135 136 137 138 139 140 141 142 |
# File 'lib/relaton_iec/data_parser.rb', line 131 def editorialgroup return unless @pub["committee"] wg = @pub["committee"]["reference"] { technical_committee: [{ name: wg, type: "technicalCommittee", number: wg.match(/\d+/)&.to_s&.to_i, }], } end |
#ics ⇒ Array<RelatonIsoBib::Ics>
Fetche ics.
177 178 179 180 181 182 183 |
# File 'lib/relaton_iec/data_parser.rb', line 177 def ics return [] unless @pub["classifications"] @pub["classifications"].select { |c| c["type"] == "ICS" }.map do |c| RelatonIsoBib::Ics.new(c["value"]) end end |
#lang_to_script(lang) ⇒ String
Detect script.
107 108 109 110 111 |
# File 'lib/relaton_iec/data_parser.rb', line 107 def lang_to_script(lang) case lang when "en", "fr", "es" then "Latn" end end |
#language ⇒ Array<String>
Parse languages.
84 85 86 |
# File 'lib/relaton_iec/data_parser.rb', line 84 def language @pub["title"].map { |t| t["lang"] }.uniq end |
#link ⇒ Array<RelatonBib::TypedUri>
Parse links.
221 222 223 224 225 226 227 228 229 230 |
# File 'lib/relaton_iec/data_parser.rb', line 221 def link url = "#{DOMAIN}/publication/#{urn_id}" l = [RelatonBib::TypedUri.new(content: url, type: "src")] RelatonBib.array(@pub["releaseItems"]).each_with_object(l) do |r, a| next unless r["type"] == "PREVIEW" url = "#{DOMAIN}/preview/#{r['contentRef']['fileName']}" a << RelatonBib::TypedUri.new(content: url, type: "obp") end end |
#parse ⇒ RelatonIec::IecBibliographicItem
Parse document.
40 41 42 43 44 45 46 47 |
# File 'lib/relaton_iec/data_parser.rb', line 40 def parse # rubocop:disable Metrics/AbcSize args = ATTRS.each_with_object({}) { |a, h| h[a] = send a } args[:docstatus] = RelatonBib::DocumentStatus.new stage: @pub["status"] args[:edition] = @pub["edition"] args[:price_code] = @pub["priceInfo"]["priceCode"] args[:place] = [RelatonBib::Place.new(city: "Geneva")] IecBibliographicItem.new(**args) end |
#relation ⇒ Array<RelatonBib::DocumentRelation>
Parse relation.
256 257 258 259 260 261 262 263 264 265 266 267 268 |
# File 'lib/relaton_iec/data_parser.rb', line 256 def relation # rubocop:disable Metrics/MethodLength try = 0 begin uri = URI "#{DOMAIN}/webstore/webstore.nsf/AjaxRequestXML?" \ "Openagent&url=#{urn_id}" resp = Net::HTTP.get_response uri doc = Nokogiri::XML resp.body create_relations doc rescue StandardError => e try += 1 try < 3 ? retry : raise(e) end end |
#script ⇒ Array<String>
Parse scripts.
93 94 95 96 97 98 |
# File 'lib/relaton_iec/data_parser.rb', line 93 def script language.each_with_object([]) do |l, s| scr = lang_to_script l s << scr if scr && !s.include?(scr) end end |
#structuredidentifier ⇒ RelatonIsoBib::StructuredIdentifier
Parse structured identifier.
71 72 73 74 75 76 77 |
# File 'lib/relaton_iec/data_parser.rb', line 71 def structuredidentifier urn = @pub.dig("project", "urn") return unless urn pnum = urn.split(":").last RelatonIsoBib::StructuredIdentifier.new(project_number: pnum, type: "IEC") end |
#title ⇒ RelatonBib::TypedTitleStringCollection
Parse titles.
118 119 120 121 122 123 124 |
# File 'lib/relaton_iec/data_parser.rb', line 118 def title @pub["title"].reduce(RelatonBib::TypedTitleStringCollection.new) do |a, t| a + RelatonBib::TypedTitleString.from_string( t["value"], t["lang"], lang_to_script(t["lang"]) ) end end |
#urn_id ⇒ String
Extract URN ID from URN.
237 238 239 |
# File 'lib/relaton_iec/data_parser.rb', line 237 def urn_id @pub["urn"].split(":").last end |