Class: Relaton3gpp::DataFetcher
- Inherits:
-
Object
- Object
- Relaton3gpp::DataFetcher
- Defined in:
- lib/relaton_3gpp/data_fetcher.rb
Constant Summary collapse
- CURRENT =
"current.yaml".freeze
Class Method Summary collapse
-
.fetch(source, output: "data", format: "yaml") ⇒ Object
Initialize fetcher and run fetch.
Instance Method Summary collapse
-
#fetch(renewal) ⇒ Object
Parse documents.
-
#file_name(bib) ⇒ String
Generate file name.
-
#get_file(renewal) ⇒ String?
Get file from FTP.
- #index ⇒ Object
-
#initialize(output, format) ⇒ DataFetcher
constructor
Data fetcher initializer.
-
#save_doc(bib) ⇒ Object
Save document to file.
Constructor Details
#initialize(output, format) ⇒ DataFetcher
Data fetcher initializer
10 11 12 13 14 15 16 17 18 19 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 10 def initialize(output, format) require "fileutils" require "net/ftp" require "csv" @output = output @format = format @ext = format.sub(/^bib/, "") @files = [] end |
Class Method Details
.fetch(source, output: "data", format: "yaml") ⇒ Object
Initialize fetcher and run fetch
32 33 34 35 36 37 38 39 40 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 32 def self.fetch(source, output: "data", format: "yaml") t1 = Time.now puts "Started at: #{t1}" FileUtils.mkdir_p output new(output, format).fetch(source == "status-smg-3GPP-force") t2 = Time.now puts "Stopped at: #{t2}" puts "Done in: #{(t2 - t1).round} sec." end |
Instance Method Details
#fetch(renewal) ⇒ Object
Parse documents
47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 47 def fetch(renewal) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize file = get_file renewal return unless file && File.exist?(file) && File.size(file) > 20_000_000 if renewal FileUtils.rm_f File.join(@output, "/*") # if renewal && dbs["2001-04-25_schedule"].any? index.remove_all # if renewal end CSV.open(file, "r:bom|utf-8", headers: true, col_sep: ";").each do |row| save_doc Parser.parse(row) end File.write CURRENT, @current.to_yaml, encoding: "UTF-8" index.save end |
#file_name(bib) ⇒ String
Generate file name
150 151 152 153 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 150 def file_name(bib) name = bib.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_").upcase File.join @output, "#{name}.#{@ext}" end |
#get_file(renewal) ⇒ String?
Get file from FTP. If file does not exist or changed, return nil
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 69 def get_file(renewal) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity @current = YAML.load_file CURRENT if File.exist? CURRENT @current ||= {} n = 0 begin ftp = Net::FTP.new("www.3gpp.org") ftp.resume = true ftp.login ftp.chdir "/Information/Databases/" file_path = ftp.list("*.csv").first return unless file_path d, t, _, file = file_path.split dt = DateTime.strptime("#{d} #{t}", "%m-%d-%y %I:%M%p") if !renewal && file == @current["file"] && !@current["date"].empty? && dt == DateTime.parse(@current["date"]) return end tmp_file = File.join Dir.tmpdir, "3gpp.csv" ftp.get(file, tmp_file) rescue Net::ReadTimeout => e n += 1 retry if n < 5 raise e end @current["file"] = file @current["date"] = dt.to_s tmp_file end |
#index ⇒ Object
21 22 23 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 21 def index @index ||= Relaton::Index.find_or_create "3gpp", file: "index-v1.yaml" end |
#save_doc(bib) ⇒ Object
Save document to file
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 125 def save_doc(bib) # rubocop:disable Metrics/MethodLength return unless bib c = case @format when "xml" then bib.to_xml(bibdata: true) when "yaml" then bib.to_hash.to_yaml else bib.send("to_#{@format}") end file = file_name(bib) if @files.include? file Util.warn "File #{file} already exists. Document: #{bib.docnumber}" else @files << file end index.add_or_update bib.docnumber, file File.write file, c, encoding: "UTF-8" end |