Class: Relaton::Ogc::DataFetcher

Inherits:
Core::DataFetcher
  • Object
show all
Defined in:
lib/relaton/ogc/data_fetcher.rb

Constant Summary collapse

ENDPOINT =
"https://raw.githubusercontent.com/opengeospatial/NamingAuthority/master/definitions/docs/docs.json"

Instance Method Summary collapse

Constructor Details

#initialize(output, format) ⇒ DataFetcher

Returns a new instance of DataFetcher.



14
15
16
17
18
19
# File 'lib/relaton/ogc/data_fetcher.rb', line 14

def initialize(output, format)
  super
  @etagfile = File.join output, "etag.txt"
  @docids = []
  @dupids = Set.new
end

Instance Method Details

#etagString?

Returns:

  • (String, nil)


85
86
87
88
89
# File 'lib/relaton/ogc/data_fetcher.rb', line 85

def etag
  @etag ||= if File.exist? @etagfile
              File.read @etagfile, encoding: "UTF-8"
            end
end

#etag=(e_tag) ⇒ Object

Parameters:

  • e_tag (String)


92
93
94
# File 'lib/relaton/ogc/data_fetcher.rb', line 92

def etag=(e_tag)
  File.write @etagfile, e_tag, encoding: "UTF-8"
end

#fetch(_source = nil) ⇒ Object

rubocop:disable Metrics/AbcSize



29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/relaton/ogc/data_fetcher.rb', line 29

def fetch(_source = nil) # rubocop:disable Metrics/AbcSize
  get_data do |etag, json|
    no_errors = true
    json.each_value { |hit| fetch_doc(hit) || no_errors = false }
    if @dupids.any?
      Util.warn "Duplicated documents: #{@dupids.to_a.join(', ')}"
    end
    self.etag = etag if no_errors
    index.save
    report_errors
  end
end

#fetch_doc(hit) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
# File 'lib/relaton/ogc/data_fetcher.rb', line 42

def fetch_doc(hit)
  return if hit["type"] == "CC"

  bib = Scraper.parse_page hit, @errors
  write_document bib
  true
rescue StandardError => e
  Util.error "Fetching document: #{hit['identifier']}\n" \
             "#{e.class} #{e.message}\n#{e.backtrace}"
  false
end

#file_name(bib) ⇒ Object



67
68
69
70
# File 'lib/relaton/ogc/data_fetcher.rb', line 67

def file_name(bib)
  name = bib.docidentifier[0].content.upcase.gsub(/[\s:.]/, "_")
  "#{@output}/#{name}.#{@ext}"
end

#get_dataObject

rubocop:disable Metrics/AbcSize



96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/relaton/ogc/data_fetcher.rb', line 96

def get_data # rubocop:disable Metrics/AbcSize
  h = {}
  h["If-None-Match"] = etag if etag
  resp = Faraday.new(ENDPOINT, headers: h).get
  case resp.status
  when 200
    json = JSON.parse(resp.body)
    block_given? ? yield(resp[:etag], json) : json
  when 304 then []
  else raise Relaton::RequestError, "Could not access #{ENDPOINT}"
  end
end

#indexObject



25
26
27
# File 'lib/relaton/ogc/data_fetcher.rb', line 25

def index
  @index ||= Relaton::Index.find_or_create :ogc, file: "#{INDEXFILE}.yaml"
end

#log_error(msg) ⇒ Object



21
22
23
# File 'lib/relaton/ogc/data_fetcher.rb', line 21

def log_error(msg)
  Util.error msg
end

#to_bibxml(_bib) ⇒ Object

Raises:

  • (NotImplementedError)


80
81
82
# File 'lib/relaton/ogc/data_fetcher.rb', line 80

def to_bibxml(_bib)
  raise NotImplementedError, "OGC does not support bibxml format"
end

#to_xml(bib) ⇒ Object



76
77
78
# File 'lib/relaton/ogc/data_fetcher.rb', line 76

def to_xml(bib)
  Bibdata.to_xml bib
end

#to_yaml(bib) ⇒ Object



72
73
74
# File 'lib/relaton/ogc/data_fetcher.rb', line 72

def to_yaml(bib)
  Item.to_yaml bib
end

#write_document(bib) ⇒ Object

rubocop:disable Metrics/AbcSize



54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/relaton/ogc/data_fetcher.rb', line 54

def write_document(bib) # rubocop:disable Metrics/AbcSize
  docid = bib.docidentifier[0].content
  if @docids.include?(docid)
    @dupids << docid
    return
  end

  @docids << docid
  file = file_name bib
  index.add_or_update docid, file
  File.write file, serialize(bib), encoding: "UTF-8"
end