Class: Relaton::Iso::Scraper
- Inherits:
-
Object
- Object
- Relaton::Iso::Scraper
- Defined in:
- lib/relaton/iso/scraper.rb
Overview
Scrapper.
Constant Summary collapse
- DOMAIN =
rubocop:disable Metrics/ModuleLength
"https://www.iso.org"- TYPES =
{ "TS" => "technical-specification", "DTS" => "technical-specification", "TR" => "technical-report", "DTR" => "technical-report", "PAS" => "publicly-available-specification", # "AWI" => "approvedWorkItem", # "CD" => "committeeDraft", # "FDIS" => "finalDraftInternationalStandard", # "NP" => "newProposal", # "DIS" => "draftInternationalStandard", # "WD" => "workingDraft", # "R" => "recommendation", "Guide" => "guide", "ISO" => "international-standard", "IEC" => "international-standard", "IWA" => "international-workshop-agreement", }.freeze
- STGABBR =
{ "00" => "NWIP", "10" => "AWI", "20" => "WD", "30" => "CD", "40" => "DIS", "50" => "FDIS", "60" => { "00" => "PRF", "60" => "FINAL" }, }.freeze
- PUBLISHERS =
{ "IEC" => { name: "International Electrotechnical Commission", uri: "www.iec.ch" }, "ISO" => { name: "International Organization for Standardization", uri: "www.iso.org" }, "IEEE" => { name: "Institute of Electrical and Electronics Engineers", uri: "www.ieee.org" }, "SAE" => { name: "SAE International", uri: "www.sae.org" }, "CIE" => { name: " International Commission on Illumination", uri: "cie.co.at" }, "ASME" => { name: "American Society of Mechanical Engineers", uri: "www.asme.org" }, }.freeze
Class Method Summary collapse
Instance Method Summary collapse
- #edition ⇒ Object
-
#fetch_relaton_docids ⇒ Array<RelatonBib::DocumentIdentifier>
Create document ids.
- #id ⇒ Object
-
#initialize(lang, errors) ⇒ Scraper
constructor
extend self.
-
#isoref ⇒ String
Create ISO reference identifier with English language.
-
#parse(path) ⇒ Object
rubocop:disable Metrics/AbcSize,Metrics/MethodLength.
-
#pubid ⇒ Object
rubocop:disable Metrics/AbcSize.
- #urn ⇒ Object
Constructor Details
#initialize(lang, errors) ⇒ Scraper
extend self
49 50 51 52 |
# File 'lib/relaton/iso/scraper.rb', line 49 def initialize(lang, errors) @lang = lang @errors = errors end |
Class Method Details
.parse_page(path, lang: nil, errors: {}) ⇒ RelatonIsoBib::IsoBibliographicItem
Parse page.
59 60 61 |
# File 'lib/relaton/iso/scraper.rb', line 59 def self.parse_page(path, lang: nil, errors: {}) new(lang, errors).parse(path) end |
Instance Method Details
#edition ⇒ Object
117 118 119 120 121 122 123 |
# File 'lib/relaton/iso/scraper.rb', line 117 def edition return @edition if defined?(@edition) ed = @doc.at("//div[div[.='Edition']]/text()[last()]") @errors[:edition] &&= ed.nil? @edition = ed && Bib::Edition.new(content: ed.text.match(/\d+$/).to_s) end |
#fetch_relaton_docids ⇒ Array<RelatonBib::DocumentIdentifier>
Create document ids.
130 131 132 133 134 135 136 |
# File 'lib/relaton/iso/scraper.rb', line 130 def fetch_relaton_docids [ Docidentifier.new(content: pubid, type: "ISO", primary: true), Docidentifier.new(content: isoref, type: "iso-reference"), Docidentifier.new(content: urn, type: "URN"), ] end |
#id ⇒ Object
95 96 97 98 99 100 101 |
# File 'lib/relaton/iso/scraper.rb', line 95 def id return @id if defined?(@id) did = @doc.at("//h1/span[1]") @errors[:id] &&= did.nil? @id = did && did.text.split(" | ").first.strip end |
#isoref ⇒ String
Create ISO reference identifier with English language.
143 144 145 146 147 |
# File 'lib/relaton/iso/scraper.rb', line 143 def isoref pubid.dup.tap do |id| id.languages = [::Pubid::Components::Language.new(code: "en", original_code: "E")] end.to_s end |
#parse(path) ⇒ Object
rubocop:disable Metrics/AbcSize,Metrics/MethodLength
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
# File 'lib/relaton/iso/scraper.rb', line 63 def parse(path) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength @doc, @url = get_page path titles, abstract, langs = fetch_titles_abstract contributors = fetch_contributors eg_contributor = fetch_editorialgroup_contributor contributors << eg_contributor if eg_contributor ItemData.new( id: id.gsub(/[^\w]/, ""), # fetched: Date.today.to_s, type: "standard", docidentifier: fetch_relaton_docids, docnumber: fetch_docnumber, edition: edition, language: langs.map { |l| l[:lang] }, script: langs.map { |l| script(l[:lang]) }.uniq, title: titles, status: fetch_status, ics: fetch_ics, date: fetch_dates, contributor: contributors, abstract: abstract, copyright: fetch_copyright, source: fetch_source(@url), relation: fetch_relations, place: [Bib::Place.new(city: "Geneva")], structuredidentifier: fetch_structuredidentifier, ext: parse_ext, ) end |
#pubid ⇒ Object
rubocop:disable Metrics/AbcSize
103 104 105 106 107 108 109 110 111 |
# File 'lib/relaton/iso/scraper.rb', line 103 def pubid # rubocop:disable Metrics/AbcSize return @pubid if @pubid @pubid = ::Pubid::Iso::Identifier.parse(id) @pubid.root.edition ||= edition.content if @pubid.base_identifier @pubid rescue StandardError => e Util.error "Failed to parse pubid from #{id}: #{e.}" end |
#urn ⇒ Object
113 114 115 |
# File 'lib/relaton/iso/scraper.rb', line 113 def urn pubid.with_harmonized_stage(stage_code) end |