Class: Relaton::Iso::Scraper
- Inherits:
-
Object
- Object
- Relaton::Iso::Scraper
- Defined in:
- lib/relaton/iso/scraper.rb
Overview
Scrapper.
Constant Summary collapse
- DOMAIN =
rubocop:disable Metrics/ModuleLength
"https://www.iso.org"- TYPES =
{ "TS" => "technical-specification", "DTS" => "technical-specification", "TR" => "technical-report", "DTR" => "technical-report", "PAS" => "publicly-available-specification", # "AWI" => "approvedWorkItem", # "CD" => "committeeDraft", # "FDIS" => "finalDraftInternationalStandard", # "NP" => "newProposal", # "DIS" => "draftInternationalStandard", # "WD" => "workingDraft", # "R" => "recommendation", "Guide" => "guide", "ISO" => "international-standard", "IEC" => "international-standard", "IWA" => "international-workshop-agreement", }.freeze
- STGABBR =
{ "00" => "NWIP", "10" => "AWI", "20" => "WD", "30" => "CD", "40" => "DIS", "50" => "FDIS", "60" => { "00" => "PRF", "60" => "FINAL" }, }.freeze
- PUBLISHERS =
{ "IEC" => { name: "International Electrotechnical Commission", uri: "www.iec.ch" }, "ISO" => { name: "International Organization for Standardization", uri: "www.iso.org" }, "IEEE" => { name: "Institute of Electrical and Electronics Engineers", uri: "www.ieee.org" }, "SAE" => { name: "SAE International", uri: "www.sae.org" }, "CIE" => { name: " International Commission on Illumination", uri: "cie.co.at" }, "ASME" => { name: "American Society of Mechanical Engineers", uri: "www.asme.org" }, }.freeze
Class Method Summary collapse
Instance Method Summary collapse
- #edition ⇒ Object
-
#fetch_relaton_docids ⇒ Array<RelatonBib::DocumentIdentifier>
Create document ids.
- #id ⇒ Object
-
#initialize(lang, errors) ⇒ Scraper
constructor
extend self.
-
#isoref ⇒ String
Create ISO reference identifier with English language.
-
#parse(path) ⇒ Object
rubocop:disable Metrics/AbcSize,Metrics/MethodLength.
-
#pubid ⇒ Object
rubocop:disable Metrics/AbcSize.
- #urn ⇒ Object
Constructor Details
#initialize(lang, errors) ⇒ Scraper
extend self
49 50 51 52 |
# File 'lib/relaton/iso/scraper.rb', line 49 def initialize(lang, errors) @lang = lang @errors = errors end |
Class Method Details
.parse_page(path, lang: nil, errors: {}) ⇒ RelatonIsoBib::IsoBibliographicItem
Parse page.
59 60 61 |
# File 'lib/relaton/iso/scraper.rb', line 59 def self.parse_page(path, lang: nil, errors: {}) new(lang, errors).parse(path) end |
Instance Method Details
#edition ⇒ Object
119 120 121 122 123 124 125 |
# File 'lib/relaton/iso/scraper.rb', line 119 def edition return @edition if defined?(@edition) ed = @doc.at("//div[div[.='Edition']]/text()[last()]") @errors[:edition] &&= ed.nil? @edition = ed && Bib::Edition.new(content: ed.text.match(/\d+$/).to_s) end |
#fetch_relaton_docids ⇒ Array<RelatonBib::DocumentIdentifier>
Create document ids.
132 133 134 135 136 137 138 |
# File 'lib/relaton/iso/scraper.rb', line 132 def fetch_relaton_docids [ Docidentifier.new(content: pubid, type: "ISO", primary: true), Docidentifier.new(content: isoref, type: "iso-reference"), Docidentifier.new(content: urn, type: "URN"), ] end |
#id ⇒ Object
95 96 97 98 99 100 101 |
# File 'lib/relaton/iso/scraper.rb', line 95 def id return @id if defined?(@id) did = @doc.at("//h1/span[1]") @errors[:id] &&= did.nil? @id = did && did.text.split(" | ").first.strip end |
#isoref ⇒ String
Create ISO reference identifier with English language.
145 146 147 148 |
# File 'lib/relaton/iso/scraper.rb', line 145 def isoref params = pubid.to_h.except(:typed_stage) ::Pubid::Iso::Identifier.create(language: "en", **params).to_s(format: :ref_num_short) end |
#parse(path) ⇒ Object
rubocop:disable Metrics/AbcSize,Metrics/MethodLength
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
# File 'lib/relaton/iso/scraper.rb', line 63 def parse(path) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength @doc, @url = get_page path titles, abstract, langs = fetch_titles_abstract contributors = fetch_contributors eg_contributor = fetch_editorialgroup_contributor contributors << eg_contributor if eg_contributor ItemData.new( id: id.gsub(/[^\w]/, ""), # fetched: Date.today.to_s, type: "standard", docidentifier: fetch_relaton_docids, docnumber: fetch_docnumber, edition: edition, language: langs.map { |l| l[:lang] }, script: langs.map { |l| script(l[:lang]) }.uniq, title: titles, status: fetch_status, ics: fetch_ics, date: fetch_dates, contributor: contributors, abstract: abstract, copyright: fetch_copyright, source: fetch_source(@url), relation: fetch_relations, place: [Bib::Place.new(city: "Geneva")], structuredidentifier: fetch_structuredidentifier, ext: parse_ext, ) end |
#pubid ⇒ Object
rubocop:disable Metrics/AbcSize
103 104 105 106 107 108 109 110 111 |
# File 'lib/relaton/iso/scraper.rb', line 103 def pubid # rubocop:disable Metrics/AbcSize return @pubid if @pubid @pubid = ::Pubid::Iso::Identifier.parse(id) @pubid.root.edition ||= edition.content if @pubid.base @pubid rescue StandardError => e Util.error "Failed to parse pubid from #{id}: #{e.}" end |
#urn ⇒ Object
113 114 115 116 117 |
# File 'lib/relaton/iso/scraper.rb', line 113 def urn pubid_dup = pubid.dup pubid_dup.stage ||= ::Pubid::Iso::Identifier.parse_stage(stage_code) pubid_dup end |