Class: Relaton::Nist::DataFetcher
- Inherits:
-
Core::DataFetcher
- Object
- Core::DataFetcher
- Relaton::Nist::DataFetcher
- Defined in:
- lib/relaton/nist/data_fetcher.rb
Constant Summary collapse
- URL =
"https://github.com/usnistgov/NIST-Tech-Pubs/releases/download/Nov2024/allrecords-MODS.xml"
Instance Method Summary collapse
- #fetch(_source = nil) ⇒ Object
- #fetch_tech_pubs ⇒ Object
- #index ⇒ Object
- #log_error(msg) ⇒ Object
-
#pubid(id) ⇒ Object
Parse a docidentifier string into a Pubid::Nist::Identifier; nil (with a warning) if pubid can’t parse it, so a single bad id never aborts the crawl or corrupts index-v2.
- #series ⇒ Object
- #to_bibxml(bib) ⇒ Object
- #to_xml(bib) ⇒ Object
-
#to_yaml(bib) ⇒ Object
def add_static_files Dir.each do |file| bib = Item.from_yaml(File.read(file, encoding: “UTF-8”)) index.add_or_update bib.docidentifier.content, file end end.
- #write_file(bib) ⇒ Object
Instance Method Details
#fetch(_source = nil) ⇒ Object
14 15 16 17 18 19 20 |
# File 'lib/relaton/nist/data_fetcher.rb', line 14 def fetch(_source = nil) FileUtils.rm Dir[File.join(@output, "*.#{@ext}")] fetch_tech_pubs # add_static_files index.save report_errors end |
#fetch_tech_pubs ⇒ Object
22 23 24 25 26 |
# File 'lib/relaton/nist/data_fetcher.rb', line 22 def fetch_tech_pubs xml_data = Mechanize.new.get(URL).body docs = LocMods::Collection.from_xml xml_data docs.mods.each { |doc| write_file ModsParser.new(doc, series, @errors).parse } end |
#index ⇒ Object
73 74 75 76 77 |
# File 'lib/relaton/nist/data_fetcher.rb', line 73 def index @index ||= Relaton::Index.find_or_create( :nist, file: "#{INDEXFILE}.yaml", pubid_class: ::Pubid::Nist::Identifier ) end |
#log_error(msg) ⇒ Object
69 70 71 |
# File 'lib/relaton/nist/data_fetcher.rb', line 69 def log_error(msg) Util.error msg end |
#pubid(id) ⇒ Object
Parse a docidentifier string into a Pubid::Nist::Identifier; nil (with a warning) if pubid can’t parse it, so a single bad id never aborts the crawl or corrupts index-v2.
43 44 45 46 47 48 |
# File 'lib/relaton/nist/data_fetcher.rb', line 43 def pubid(id) ::Pubid::Nist::Identifier.parse id rescue StandardError => e Util.warn "Failed to parse `#{id}` with pubid: #{e.}" nil end |
#series ⇒ Object
79 80 81 |
# File 'lib/relaton/nist/data_fetcher.rb', line 79 def series @series ||= YAML.load_file File.("series.yaml", __dir__) end |
#to_bibxml(bib) ⇒ Object
65 66 67 |
# File 'lib/relaton/nist/data_fetcher.rb', line 65 def to_bibxml(bib) bib.to_rfcxml end |
#to_xml(bib) ⇒ Object
61 62 63 |
# File 'lib/relaton/nist/data_fetcher.rb', line 61 def to_xml(bib) Bibdata.to_xml(bib) end |
#to_yaml(bib) ⇒ Object
def add_static_files
Dir["./static/*.yaml"].each do |file|
bib = Item.from_yaml(File.read(file, encoding: "UTF-8"))
index.add_or_update bib.docidentifier[0].content, file
end
end
57 58 59 |
# File 'lib/relaton/nist/data_fetcher.rb', line 57 def to_yaml(bib) Item.to_yaml(bib) end |
#write_file(bib) ⇒ Object
28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/relaton/nist/data_fetcher.rb', line 28 def write_file(bib) id = bib.docidentifier.find(&:primary) || bib.docidentifier.first file = output_file id.content.sub(/^NIST IR/, "NISTIR") if @files.include? file Util.warn "File #{file} exists. Docid: #{id.content}" else @files << file end pid = pubid id.content index.add_or_update pid, file if pid File.write file, serialize(bib), encoding: "UTF-8" end |