Class: Relaton::Ogc::DataFetcher
- Inherits:
-
Core::DataFetcher
- Object
- Core::DataFetcher
- Relaton::Ogc::DataFetcher
- Defined in:
- lib/relaton/ogc/data_fetcher.rb
Constant Summary collapse
- ENDPOINT =
"https://raw.githubusercontent.com/opengeospatial/NamingAuthority/master/definitions/docs/docs.json"
Instance Method Summary collapse
- #etag ⇒ String?
- #etag=(e_tag) ⇒ Object
-
#fetch(_source = nil) ⇒ Object
rubocop:disable Metrics/AbcSize.
- #fetch_doc(hit) ⇒ Object
- #file_name(bib) ⇒ Object
-
#get_data ⇒ Object
rubocop:disable Metrics/AbcSize.
- #index ⇒ Object
-
#initialize(output, format) ⇒ DataFetcher
constructor
A new instance of DataFetcher.
- #log_error(msg) ⇒ Object
- #to_bibxml(_bib) ⇒ Object
- #to_xml(bib) ⇒ Object
- #to_yaml(bib) ⇒ Object
-
#write_document(bib) ⇒ Object
rubocop:disable Metrics/AbcSize.
Constructor Details
#initialize(output, format) ⇒ DataFetcher
Returns a new instance of DataFetcher.
14 15 16 17 18 19 |
# File 'lib/relaton/ogc/data_fetcher.rb', line 14 def initialize(output, format) super @etagfile = File.join output, "etag.txt" @docids = [] @dupids = Set.new end |
Instance Method Details
#etag ⇒ String?
85 86 87 88 89 |
# File 'lib/relaton/ogc/data_fetcher.rb', line 85 def etag @etag ||= if File.exist? @etagfile File.read @etagfile, encoding: "UTF-8" end end |
#etag=(e_tag) ⇒ Object
92 93 94 |
# File 'lib/relaton/ogc/data_fetcher.rb', line 92 def etag=(e_tag) File.write @etagfile, e_tag, encoding: "UTF-8" end |
#fetch(_source = nil) ⇒ Object
rubocop:disable Metrics/AbcSize
29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/relaton/ogc/data_fetcher.rb', line 29 def fetch(_source = nil) # rubocop:disable Metrics/AbcSize get_data do |etag, json| no_errors = true json.each_value { |hit| fetch_doc(hit) || no_errors = false } if @dupids.any? Util.warn "Duplicated documents: #{@dupids.to_a.join(', ')}" end self.etag = etag if no_errors index.save report_errors end end |
#fetch_doc(hit) ⇒ Object
42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/relaton/ogc/data_fetcher.rb', line 42 def fetch_doc(hit) return if hit["type"] == "CC" bib = Scraper.parse_page hit, @errors write_document bib true rescue StandardError => e Util.error "Fetching document: #{hit['identifier']}\n" \ "#{e.class} #{e.}\n#{e.backtrace}" false end |
#file_name(bib) ⇒ Object
67 68 69 70 |
# File 'lib/relaton/ogc/data_fetcher.rb', line 67 def file_name(bib) name = bib.docidentifier[0].content.upcase.gsub(/[\s:.]/, "_") "#{@output}/#{name}.#{@ext}" end |
#get_data ⇒ Object
rubocop:disable Metrics/AbcSize
96 97 98 99 100 101 102 103 104 105 106 107 |
# File 'lib/relaton/ogc/data_fetcher.rb', line 96 def get_data # rubocop:disable Metrics/AbcSize h = {} h["If-None-Match"] = etag if etag resp = Faraday.new(ENDPOINT, headers: h).get case resp.status when 200 json = JSON.parse(resp.body) block_given? ? yield(resp[:etag], json) : json when 304 then [] else raise Relaton::RequestError, "Could not access #{ENDPOINT}" end end |
#index ⇒ Object
25 26 27 |
# File 'lib/relaton/ogc/data_fetcher.rb', line 25 def index @index ||= Relaton::Index.find_or_create :ogc, file: "#{INDEXFILE}.yaml" end |
#log_error(msg) ⇒ Object
21 22 23 |
# File 'lib/relaton/ogc/data_fetcher.rb', line 21 def log_error(msg) Util.error msg end |
#to_bibxml(_bib) ⇒ Object
80 81 82 |
# File 'lib/relaton/ogc/data_fetcher.rb', line 80 def to_bibxml(_bib) raise NotImplementedError, "OGC does not support bibxml format" end |
#to_xml(bib) ⇒ Object
76 77 78 |
# File 'lib/relaton/ogc/data_fetcher.rb', line 76 def to_xml(bib) Bibdata.to_xml bib end |
#to_yaml(bib) ⇒ Object
72 73 74 |
# File 'lib/relaton/ogc/data_fetcher.rb', line 72 def to_yaml(bib) Item.to_yaml bib end |
#write_document(bib) ⇒ Object
rubocop:disable Metrics/AbcSize
54 55 56 57 58 59 60 61 62 63 64 65 |
# File 'lib/relaton/ogc/data_fetcher.rb', line 54 def write_document(bib) # rubocop:disable Metrics/AbcSize docid = bib.docidentifier[0].content if @docids.include?(docid) @dupids << docid return end @docids << docid file = file_name bib index.add_or_update docid, file File.write file, serialize(bib), encoding: "UTF-8" end |