Class: Relaton::Etsi::DataFetcher

Inherits:
Core::DataFetcher
  • Object
show all
Defined in:
lib/relaton/etsi/data_fetcher.rb

Constant Summary collapse

PAGE_SIZE =
50
SOURCEURL =
"https://www.etsi.org/custom/standardssearch/data.php?format=json&includeScope=1&" \
"page=%<page>s&search=&title=1&etsiNumber=1&content=1&version=0&onApproval=1&published=1&" \
"withdrawn=1&historical=1&isCurrent=1&superseded=1&startDate=1988-01-15&endDate=%<date>s&" \
"harmonized=0&keyword=&TB=&stdType=&frequency=&mandate=&collection=&sort=1&x=%<timestamp>s".freeze
NETWORK_ERRORS =
[
  Mechanize::Error, Net::OpenTimeout, Net::ReadTimeout,
  SocketError, Errno::ECONNRESET
].freeze

Instance Method Summary collapse

Instance Method Details

#derive_status(record) ⇒ Object



77
78
79
80
81
82
83
84
85
# File 'lib/relaton/etsi/data_fetcher.rb', line 77

def derive_status(record)
  return "Withdrawn" if record["ACTION_TYPE"] == "WD"

  code = record["STATUS_CODE"].to_i
  return "On Approval" if code < 12
  return "Historical" if code == 13

  "Published"
end

#fetch(_source = nil) ⇒ Object

Fetch all ETSI documents from the ETSI website.

Parameters:

  • _source (Object) (defaults to: nil)

    unused, required by superclass interface



32
33
34
35
36
37
38
# File 'lib/relaton/etsi/data_fetcher.rb', line 32

def fetch(_source = nil)
  first_page = fetch_page(1)
  process_records(first_page)
  fetch_remaining_pages(first_page)
  index.save
  report_errors
end

#fetch_page(page) ⇒ Object



51
52
53
54
55
56
# File 'lib/relaton/etsi/data_fetcher.rb', line 51

def fetch_page(page)
  date = Time.now.to_date + 1
  timestamp = (Time.now.to_f * 1000).to_i
  url = format(SOURCEURL, page: page, date: date, timestamp: timestamp)
  JSON.parse(fetch_with_retry(url))
end

#fetch_remaining_pages(first_page) ⇒ Object



40
41
42
43
44
45
46
47
48
49
# File 'lib/relaton/etsi/data_fetcher.rb', line 40

def fetch_remaining_pages(first_page)
  total = first_page.first ? first_page.first["total_count"].to_i : 0
  total_pages = (total / PAGE_SIZE.to_f).ceil
  (2..total_pages).each do |page|
    records = fetch_page(page)
    break if records.empty?

    process_records(records)
  end
end

#fetch_with_retry(url, retries: 3, delay: 2) ⇒ Object

rubocop:disable Metrics/MethodLength



92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/relaton/etsi/data_fetcher.rb', line 92

def fetch_with_retry(url, retries: 3, delay: 2) # rubocop:disable Metrics/MethodLength
  attempt = 0
  begin
    Mechanize.new.get(url).body
  rescue *NETWORK_ERRORS => e
    attempt += 1
    if attempt <= retries
      Util.info "Fetch failed (#{e.message}), " \
                "retrying (#{attempt}/#{retries})..."
      sleep delay * attempt
      retry
    end
    raise
  end
end

#indexObject



19
20
21
# File 'lib/relaton/etsi/data_fetcher.rb', line 19

def index
  @index ||= Relaton::Index.find_or_create :etsi, file: INDEX_FILE
end

#log_error(msg) ⇒ Object



23
24
25
# File 'lib/relaton/etsi/data_fetcher.rb', line 23

def log_error(msg)
  Util.error msg
end

#normalize(record) ⇒ Object



64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/relaton/etsi/data_fetcher.rb', line 64

def normalize(record)
  {
    "ETSI deliverable" => record["ETSI_DELIVERABLE"],
    "title" => record["TITLE"],
    "Details link" => "https://webapp.etsi.org/workprogram/Report_WorkItem.asp?WKI_ID=#{record['wki_id']}",
    "PDF link" => "https://www.etsi.org/deliver/#{record['EDSpathname']}#{record['EDSPDFfilename']}",
    "Status" => derive_status(record),
    "Keywords" => record["Keywords"].to_s,
    "Technical body" => record["TB"],
    "Scope" => record["Scope"],
  }
end

#process_records(records) ⇒ Object



58
59
60
61
62
# File 'lib/relaton/etsi/data_fetcher.rb', line 58

def process_records(records)
  records.each do |record|
    save DataParser.new(normalize(record), @errors).parse
  end
end

#save(bib) ⇒ Object



108
109
110
111
112
113
# File 'lib/relaton/etsi/data_fetcher.rb', line 108

def save(bib)
  id = bib.docidentifier.first.content
  file = output_file id
  File.write file, serialize(bib), encoding: "UTF-8"
  index.add_or_update id, file
end

#to_bibxml(bib) ⇒ Object



123
124
125
# File 'lib/relaton/etsi/data_fetcher.rb', line 123

def to_bibxml(bib)
  bib.to_rfcxml
end

#to_xml(bib) ⇒ Object



119
120
121
# File 'lib/relaton/etsi/data_fetcher.rb', line 119

def to_xml(bib)
  bib.to_xml bibdata: true
end

#to_yaml(bib) ⇒ Object



115
116
117
# File 'lib/relaton/etsi/data_fetcher.rb', line 115

def to_yaml(bib)
  bib.to_yaml
end