Class: Relaton::Iso::DataFetcher

Inherits:
Core::DataFetcher
  • Object
show all
Defined in:
lib/relaton/iso/data_fetcher.rb

Overview

Fetch ISO documents from the ISO Open Data programme bulk JSONL (see www.iso.org/open-data.html) and write each one as a YAML file under ‘@output`.

‘source` modes (matching the `Relaton::Core::DataFetcher.fetch` arg):

  • ‘“iso-open-data”` (default) - skip the run if the upstream `Last-Modified` header matches `LAST_MODIFIED_FILE`.

  • ‘“iso-open-data-all”` - clear `@output` and re-emit every record.

Constant Summary collapse

OPEN_DATA_URL =
"https://isopublicstorageprod.blob.core.windows.net/" \
"opendata/_latest/iso_deliverables_metadata/json/" \
"iso_deliverables_metadata.jsonl".freeze
TC_DATA_URL =
"https://isopublicstorageprod.blob.core.windows.net/" \
"opendata/_latest/iso_technical_committees/json/" \
"iso_technical_committees.jsonl".freeze
LAST_MODIFIED_FILE =
"last_modified.txt".freeze
MAX_DOWNLOAD_RETRIES =
4
RETRY_BACKOFF_BASE =
30

Instance Method Summary collapse

Instance Method Details

#fetch(source = nil) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/relaton/iso/data_fetcher.rb', line 42

def fetch(source = nil)
  @source = source || "iso-open-data"
  @full_refresh = @source == "iso-open-data-all"

  Util.info "Fetching ISO Open Data (mode: #{@source})..."
  last_modified = fetch_last_modified
  return if up_to_date?(last_modified)

  prepare_output
  jsonl_path = download_dataset
  ref_index, amend_index, date_index = build_ref_index(jsonl_path)
  tc_index = build_tc_index
  ingest_records(jsonl_path, ref_index, tc_index, amend_index, date_index)
  merge_static_files

  index.save
  save_last_modified(last_modified)
  report_errors
rescue StandardError => e
  Util.error "#{e.message}\n#{e.backtrace.join("\n")}"
  raise
end

#indexObject



36
37
38
39
40
# File 'lib/relaton/iso/data_fetcher.rb', line 36

def index
  @index ||= Relaton::Index.find_or_create(
    :iso, file: "#{INDEXFILE}.yaml", pubid_class: ::Pubid::Iso::Identifier,
  )
end

#log_error(msg) ⇒ Object



32
33
34
# File 'lib/relaton/iso/data_fetcher.rb', line 32

def log_error(msg)
  Util.error msg
end