Class: RelatonIec::DataFetcher
- Inherits:
-
Object
- Object
- RelatonIec::DataFetcher
- Defined in:
- lib/relaton_iec/data_fetcher.rb
Constant Summary collapse
- ENTRYPOINT =
"https://api.iec.ch/harmonized/publications?size=100&sortBy=urn&page=".freeze
- CREDENTIAL =
"https://api.iec.ch/oauth/client_credential/accesstoken?grant_type=client_credentials".freeze
Instance Method Summary collapse
-
#access_token ⇒ String
Get access token.
-
#add_static_files_to_index ⇒ void
Add static files to index.
-
#fetch ⇒ Object
Fetch data from IEC.
-
#fetch_all ⇒ void
Fetch documents from IEC API.
-
#fetch_page(page) ⇒ Net::HTTP::Response
Fetch page from IEC API.
-
#fetch_page_token(page) ⇒ Net::HTTP::Response
Fetch page.
-
#fetch_pub(pub) ⇒ Object
Fetch publication and save it to file.
- #index_id(pub) ⇒ Object
-
#initialize(source = "iec-harmonised-latest", output: "data", format: "yaml") ⇒ DataFetcher
constructor
Initialize new instance.
Constructor Details
#initialize(source = "iec-harmonised-latest", output: "data", format: "yaml") ⇒ DataFetcher
Initialize new instance.
13 14 15 16 17 18 19 20 |
# File 'lib/relaton_iec/data_fetcher.rb', line 13 def initialize(source = "iec-harmonised-latest", output: "data", format: "yaml") @output = output @format = format @ext = format.sub(/^bib/, "") @files = [] @index = Index.new "index.yaml" @all = source == "iec-harmonised-all" end |
Instance Method Details
#access_token ⇒ String
Get access token.
121 122 123 124 125 126 127 128 129 130 131 |
# File 'lib/relaton_iec/data_fetcher.rb', line 121 def access_token # rubocop:disable Metrics/AbcSize @access_token ||= begin uri = URI CREDENTIAL req = Net::HTTP::Get.new uri req.basic_auth ENV.fetch("IEC_HAPI_PROJ_PUBS_KEY"), ENV.fetch("IEC_HAPI_PROJ_PUBS_SECRET") res = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http| http.request req end JSON.parse(res.body)["access_token"] end end |
#add_static_files_to_index ⇒ void
This method returns an undefined value.
Add static files to index.
51 52 53 54 55 56 57 |
# File 'lib/relaton_iec/data_fetcher.rb', line 51 def add_static_files_to_index Dir["static/*.yaml"].each do |file| pub = RelatonBib.parse_yaml File.read(file, encoding: "UTF-8") pubid = RelatonBib.array(pub["docid"]).detect { |id| id["primary"] }["id"] @index.add pubid, file end end |
#fetch ⇒ Object
Fetch data from IEC.
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/relaton_iec/data_fetcher.rb', line 25 def fetch # rubocop:disable Metrics/AbcSize, Metrics/MethodLength t1 = Time.now puts "Started at: #{t1}" FileUtils.mkdir_p @output if @all FileUtils.rm Dir[File.join(@output, "*.#{@ext}")] @index.clear end fetch_all add_static_files_to_index @index.save t2 = Time.now puts "Stopped at: #{t2}" puts "Done in: #{(t2 - t1).round} sec." rescue StandardError => e warn e. warn e.backtrace.join("\n") end |
#fetch_all ⇒ void
This method returns an undefined value.
Fetch documents from IEC API.
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/relaton_iec/data_fetcher.rb', line 64 def fetch_all # rubocop:disable Metrics/MethodLength page = 0 next_page = true while next_page res = fetch_page_token page unless res.code == "200" warn "[relaton-iec] #{res.body}" break end json = JSON.parse res.body json["publication"].each { |pub| fetch_pub pub } page += 1 next_page = res["link"]&.include? "rel=\"last\"" end end |
#fetch_page(page) ⇒ Net::HTTP::Response
Fetch page from IEC API.
103 104 105 106 107 108 109 110 111 112 113 114 |
# File 'lib/relaton_iec/data_fetcher.rb', line 103 def fetch_page(page) url = "#{ENTRYPOINT}#{page}" if !@all && @index.last_change url += "&lastChangeTimestampFrom=#{@index.last_change}" end uri = URI url req = Net::HTTP::Get.new uri req["Authorization"] = "Bearer #{access_token}" Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http| http.request req end end |
#fetch_page_token(page) ⇒ Net::HTTP::Response
Fetch page. If response code is 401, then get new access token and try
87 88 89 90 91 92 93 94 |
# File 'lib/relaton_iec/data_fetcher.rb', line 87 def fetch_page_token(page) res = fetch_page page if res.code == "401" @access_token = nil res = fetch_page page end res end |
#fetch_pub(pub) ⇒ Object
Fetch publication and save it to file.
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# File 'lib/relaton_iec/data_fetcher.rb', line 138 def fetch_pub(pub) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize bib = DataParser.new(pub).parse did = bib.docidentifier.detect &:primary file = File.join(@output, "#{did.id.downcase.gsub(/[:\s\/]/, '_')}.#{@ext}") if @files.include? file then warn "File #{file} exists." else @files << file @index.add index_id(pub), file, pub["lastChangeTimestamp"] end content = case @format when "xml" then bib.to_xml bibdata: true when "yaml", "yml" then bib.to_hash.to_yaml when "bibxml" then bib.to_bibxml end File.write file, content, encoding: "UTF-8" end |
#index_id(pub) ⇒ Object
155 156 157 158 159 160 161 162 163 164 |
# File 'lib/relaton_iec/data_fetcher.rb', line 155 def index_id(pub) /-(?<part>\d+)/ =~ pub["reference"] title = pub.dig("title", 0, "value") return pub["reference"] unless part && title ids = title.scan(/(?<=-\sPart\s)#{part[0]}\d+(?=:)/).map do |m| pub["reference"].sub(/-#{part}/, "-#{m}") end ids.size > 1 ? ids : pub["reference"] end |