Class: Relaton3gpp::DataFetcher
- Inherits:
-
Object
- Object
- Relaton3gpp::DataFetcher
- Defined in:
- lib/relaton_3gpp/data_fetcher.rb
Constant Summary collapse
- CURRENT =
"current.yaml".freeze
Class Method Summary collapse
-
.fetch(source, output: "data", format: "yaml") ⇒ Object
Initialize fetcher and run fetch.
Instance Method Summary collapse
-
#fetch(renewal) ⇒ Object
Parse documents.
-
#fetch_doc(row, specs, specrels, releases, tstatus) ⇒ Relaton3gpp::BibliographicItem?
Fetch document.
-
#file_name(bib) ⇒ String
Generate file name.
-
#get_file(renewal) ⇒ String
Get file from FTP.
- #index ⇒ Object
-
#initialize(output, format) ⇒ DataFetcher
constructor
Data fetcher initializer.
-
#save_doc(bib) ⇒ Object
Save document to file.
Constructor Details
#initialize(output, format) ⇒ DataFetcher
Data fetcher initializer
10 11 12 13 14 15 16 17 18 19 20 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 10 def initialize(output, format) require "fileutils" require "net/ftp" require "zip" require "mdb" @output = output @format = format @ext = format.sub(/^bib/, "") @files = [] end |
Class Method Details
.fetch(source, output: "data", format: "yaml") ⇒ Object
Initialize fetcher and run fetch
33 34 35 36 37 38 39 40 41 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 33 def self.fetch(source, output: "data", format: "yaml") t1 = Time.now puts "Started at: #{t1}" FileUtils.mkdir_p output new(output, format).fetch(source == "status-smg-3GPP-force") t2 = Time.now puts "Stopped at: #{t2}" puts "Done in: #{(t2 - t1).round} sec." end |
Instance Method Details
#fetch(renewal) ⇒ Object
Parse documents
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 48 def fetch(renewal) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize file = get_file renewal return unless file Zip::File.open(file) do |zip_file| enntry = zip_file.glob("status_smg_3GPP.mdb").first File.open("status_smg_3GPP.mdb", "wb") do |f| f.write enntry.get_input_stream.read end end dbs = Mdb.open "status_smg_3GPP.mdb" specs = dbs["Specs_GSM+3G"] specrels = dbs["Specs_GSM+3G_release-info"] releases = dbs["Releases"] tstatus = dbs["temp-status"] if renewal && dbs["2001-04-25_schedule"].any? FileUtils.rm_f File.join(@output, "/*") # if renewal && dbs["2001-04-25_schedule"].any? index.remove_all # if renewal end dbs["2001-04-25_schedule"].each do |row| fetch_doc row, specs, specrels, releases, tstatus end File.write CURRENT, @current.to_yaml, encoding: "UTF-8" index.save end |
#fetch_doc(row, specs, specrels, releases, tstatus) ⇒ Relaton3gpp::BibliographicItem?
Fetch document
118 119 120 121 122 123 124 125 126 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 118 def fetch_doc(row, specs, specrels, releases, tstatus) doc = Parser.parse row, specs, specrels, releases, tstatus save_doc doc rescue StandardError => e warn "Error: #{e.}" warn "PubID: #{row[:spec]}:#{row[:release]}/#{row[:MAJOR_VERSION_NB]}."\ "#{row[:TECHNICAL_VERSION_NB]}.#{row[:EDITORIAL_VERSION_NB]}" warn e.backtrace[0..5].join("\n") end |
#file_name(bib) ⇒ String
Generate file name
158 159 160 161 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 158 def file_name(bib) name = bib.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_").upcase File.join @output, "#{name}.#{@ext}" end |
#get_file(renewal) ⇒ String
Get file from FTP
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 81 def get_file(renewal) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity @current = YAML.load_file CURRENT if File.exist? CURRENT @current ||= {} n = 0 begin ftp = Net::FTP.new("www.3gpp.org") ftp.resume = true ftp.login ftp.chdir "/Information/Databases/Spec_Status/" d, t, _, file = ftp.list("*.zip").first.split unless renewal dt = DateTime.strptime("#{d} #{t}", "%m-%d-%y %I:%M%p") return if file == @current["file"] && !@current["date"].empty? && dt == DateTime.parse(@current["date"]) end ftp.getbinaryfile file rescue Net::ReadTimeout => e n += 1 retry if n < 5 raise e end @current["file"] = file @current["date"] = dt.to_s file end |
#index ⇒ Object
22 23 24 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 22 def index @index ||= Relaton::Index.find_or_create "3gpp", file: "index-v1.yaml" end |
#save_doc(bib) ⇒ Object
Save document to file
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 133 def save_doc(bib) # rubocop:disable Metrics/MethodLength return unless bib c = case @format when "xml" then bib.to_xml(bibdata: true) when "yaml" then bib.to_hash.to_yaml else bib.send("to_#{@format}") end file = file_name(bib) if @files.include? file warn "File #{file} already exists. Document: #{bib.docnumber}" else @files << file end index.add_or_update bib.docnumber, file File.write file, c, encoding: "UTF-8" end |