Class: Relaton::Etsi::DataFetcher
- Inherits:
-
Core::DataFetcher
- Object
- Core::DataFetcher
- Relaton::Etsi::DataFetcher
- Defined in:
- lib/relaton/etsi/data_fetcher.rb
Constant Summary collapse
- PAGE_SIZE =
50- SOURCEURL =
"https://www.etsi.org/custom/standardssearch/data.php?format=json&includeScope=1&" \ "page=%<page>s&search=&title=1&etsiNumber=1&content=1&version=0&onApproval=1&published=1&" \ "withdrawn=1&historical=1&isCurrent=1&superseded=1&startDate=1988-01-15&endDate=%<date>s&" \ "harmonized=0&keyword=&TB=&stdType=&frequency=&mandate=&collection=&sort=1&x=%<timestamp>s".freeze
- NETWORK_ERRORS =
[ Mechanize::Error, Net::OpenTimeout, Net::ReadTimeout, SocketError, Errno::ECONNRESET ].freeze
Instance Method Summary collapse
- #derive_status(record) ⇒ Object
-
#fetch(_source = nil) ⇒ Object
Fetch all ETSI documents from the ETSI website.
- #fetch_page(page) ⇒ Object
- #fetch_remaining_pages(first_page) ⇒ Object
-
#fetch_with_retry(url, retries: 3, delay: 2) ⇒ Object
rubocop:disable Metrics/MethodLength.
- #index ⇒ Object
- #log_error(msg) ⇒ Object
- #normalize(record) ⇒ Object
- #process_records(records) ⇒ Object
- #save(bib) ⇒ Object
- #to_bibxml(bib) ⇒ Object
- #to_xml(bib) ⇒ Object
- #to_yaml(bib) ⇒ Object
Instance Method Details
#derive_status(record) ⇒ Object
77 78 79 80 81 82 83 84 85 |
# File 'lib/relaton/etsi/data_fetcher.rb', line 77 def derive_status(record) return "Withdrawn" if record["ACTION_TYPE"] == "WD" code = record["STATUS_CODE"].to_i return "On Approval" if code < 12 return "Historical" if code == 13 "Published" end |
#fetch(_source = nil) ⇒ Object
Fetch all ETSI documents from the ETSI website.
32 33 34 35 36 37 38 |
# File 'lib/relaton/etsi/data_fetcher.rb', line 32 def fetch(_source = nil) first_page = fetch_page(1) process_records(first_page) fetch_remaining_pages(first_page) index.save report_errors end |
#fetch_page(page) ⇒ Object
51 52 53 54 55 56 |
# File 'lib/relaton/etsi/data_fetcher.rb', line 51 def fetch_page(page) date = Time.now.to_date + 1 = (Time.now.to_f * 1000).to_i url = format(SOURCEURL, page: page, date: date, timestamp: ) JSON.parse(fetch_with_retry(url)) end |
#fetch_remaining_pages(first_page) ⇒ Object
40 41 42 43 44 45 46 47 48 49 |
# File 'lib/relaton/etsi/data_fetcher.rb', line 40 def fetch_remaining_pages(first_page) total = first_page.first ? first_page.first["total_count"].to_i : 0 total_pages = (total / PAGE_SIZE.to_f).ceil (2..total_pages).each do |page| records = fetch_page(page) break if records.empty? process_records(records) end end |
#fetch_with_retry(url, retries: 3, delay: 2) ⇒ Object
rubocop:disable Metrics/MethodLength
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/relaton/etsi/data_fetcher.rb', line 92 def fetch_with_retry(url, retries: 3, delay: 2) # rubocop:disable Metrics/MethodLength attempt = 0 begin Mechanize.new.get(url).body rescue *NETWORK_ERRORS => e attempt += 1 if attempt <= retries Util.info "Fetch failed (#{e.}), " \ "retrying (#{attempt}/#{retries})..." sleep delay * attempt retry end raise end end |
#index ⇒ Object
19 20 21 |
# File 'lib/relaton/etsi/data_fetcher.rb', line 19 def index @index ||= Relaton::Index.find_or_create :etsi, file: INDEX_FILE end |
#log_error(msg) ⇒ Object
23 24 25 |
# File 'lib/relaton/etsi/data_fetcher.rb', line 23 def log_error(msg) Util.error msg end |
#normalize(record) ⇒ Object
64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/relaton/etsi/data_fetcher.rb', line 64 def normalize(record) { "ETSI deliverable" => record["ETSI_DELIVERABLE"], "title" => record["TITLE"], "Details link" => "https://webapp.etsi.org/workprogram/Report_WorkItem.asp?WKI_ID=#{record['wki_id']}", "PDF link" => "https://www.etsi.org/deliver/#{record['EDSpathname']}#{record['EDSPDFfilename']}", "Status" => derive_status(record), "Keywords" => record["Keywords"].to_s, "Technical body" => record["TB"], "Scope" => record["Scope"], } end |
#process_records(records) ⇒ Object
58 59 60 61 62 |
# File 'lib/relaton/etsi/data_fetcher.rb', line 58 def process_records(records) records.each do |record| save DataParser.new(normalize(record), @errors).parse end end |
#save(bib) ⇒ Object
108 109 110 111 112 113 |
# File 'lib/relaton/etsi/data_fetcher.rb', line 108 def save(bib) id = bib.docidentifier.first.content file = output_file id File.write file, serialize(bib), encoding: "UTF-8" index.add_or_update id, file end |
#to_bibxml(bib) ⇒ Object
123 124 125 |
# File 'lib/relaton/etsi/data_fetcher.rb', line 123 def to_bibxml(bib) bib.to_rfcxml end |
#to_xml(bib) ⇒ Object
119 120 121 |
# File 'lib/relaton/etsi/data_fetcher.rb', line 119 def to_xml(bib) bib.to_xml bibdata: true end |
#to_yaml(bib) ⇒ Object
115 116 117 |
# File 'lib/relaton/etsi/data_fetcher.rb', line 115 def to_yaml(bib) bib.to_yaml end |