Class: Relaton::Ecma::DataFetcher
- Inherits:
-
Core::DataFetcher
- Object
- Core::DataFetcher
- Relaton::Ecma::DataFetcher
- Defined in:
- lib/relaton/ecma/data_fetcher.rb
Constant Summary collapse
- URL =
"https://www.ecma-international.org/publications-and-standards/"- SOURCES =
%w[standards technical-reports mementos].freeze
Instance Method Summary collapse
- #agent ⇒ Object
-
#fetch(_ = nil) ⇒ void
Fetch data from Ecma website.
- #filename(bib) ⇒ Object
- #html_index(type) ⇒ Object
- #index ⇒ Object
- #index_id(bib) ⇒ Object
- #locality_with_volume(bib) ⇒ Object
- #log_error(msg) ⇒ Object
- #parse_page(hit) ⇒ Object
- #to_bibxml(bib) ⇒ Object
- #to_xml(bib) ⇒ Object
- #to_yaml(bib) ⇒ Object
- #write_file(bib) ⇒ Object
Instance Method Details
#agent ⇒ Object
28 29 30 |
# File 'lib/relaton/ecma/data_fetcher.rb', line 28 def agent @agent ||= Mechanize.new.tap { |a| a.user_agent_alias = Mechanize::AGENT_ALIASES.keys.sample } end |
#fetch(_ = nil) ⇒ void
This method returns an undefined value.
Fetch data from Ecma website.
96 97 98 99 100 |
# File 'lib/relaton/ecma/data_fetcher.rb', line 96 def fetch(_ = nil) SOURCES.each { |source| html_index source } index.save report_errors end |
#filename(bib) ⇒ Object
44 45 46 47 48 49 50 |
# File 'lib/relaton/ecma/data_fetcher.rb', line 44 def filename(bib) id = bib.docidentifier[0].content id += " #{bib.edition.content}" if bib.edition locality = locality_with_volume bib id += " #{locality.reference_from}" if locality output_file id end |
#html_index(type) ⇒ Object
78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/relaton/ecma/data_fetcher.rb', line 78 def html_index(type) # rubocop:disable Metrics/MethodLength result = agent.get "#{URL}#{type}/" # @last_call_time = Time.now result.xpath( "//li/span[1]/a", "//div[contains(@class, 'entry-content-wrapper')][.//a[.='Download']]", ).each do |hit| parse_page(hit) rescue StandardError => e Util.error { "#{e.}\n#{e.backtrace}" } end end |
#index ⇒ Object
20 21 22 |
# File 'lib/relaton/ecma/data_fetcher.rb', line 20 def index @index ||= Relaton::Index.find_or_create :ecma, file: "#{INDEXFILE}.yaml" end |
#index_id(bib) ⇒ Object
52 53 54 55 56 57 58 |
# File 'lib/relaton/ecma/data_fetcher.rb', line 52 def index_id(bib) { id: bib.docidentifier[0].content }.tap do |i| i[:ed] = bib.edition.content if bib.edition locality = locality_with_volume bib i[:vol] = locality.reference_from if locality end end |
#locality_with_volume(bib) ⇒ Object
60 61 62 63 64 65 66 |
# File 'lib/relaton/ecma/data_fetcher.rb', line 60 def locality_with_volume(bib) bib.extent.each do |e| locality = e.locality.find { |l| l.type == "volume" } return locality if locality end nil end |
#log_error(msg) ⇒ Object
24 25 26 |
# File 'lib/relaton/ecma/data_fetcher.rb', line 24 def log_error(msg) Util.error msg end |
#parse_page(hit) ⇒ Object
73 74 75 |
# File 'lib/relaton/ecma/data_fetcher.rb', line 73 def parse_page(hit) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength DataParser.new(hit, @errors).parse.each { |item| write_file item } end |
#to_bibxml(bib) ⇒ Object
70 |
# File 'lib/relaton/ecma/data_fetcher.rb', line 70 def to_bibxml(bib) = bib.to_rfcxml |
#to_xml(bib) ⇒ Object
68 |
# File 'lib/relaton/ecma/data_fetcher.rb', line 68 def to_xml(bib) = bib.to_xml(bibdata: true) |
#to_yaml(bib) ⇒ Object
69 |
# File 'lib/relaton/ecma/data_fetcher.rb', line 69 def to_yaml(bib) = bib.to_yaml |
#write_file(bib) ⇒ Object
33 34 35 36 37 38 39 40 41 42 |
# File 'lib/relaton/ecma/data_fetcher.rb', line 33 def write_file(bib) file = filename bib if @files.include? file Util.warn "Duplicate file #{file}" else @files << file File.write file, serialize(bib), encoding: "UTF-8" index.add_or_update index_id(bib), file end end |