Class: Relaton3gpp::DataFetcher

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton_3gpp/data_fetcher.rb

Constant Summary collapse

CURRENT =
"current.yaml".freeze

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(output, format) ⇒ DataFetcher

Data fetcher initializer

Parameters:

  • output (String)

    directory to save files

  • format (String)

    format of output files (xml, yaml, bibxml)



10
11
12
13
14
15
16
17
18
19
20
# File 'lib/relaton_3gpp/data_fetcher.rb', line 10

def initialize(output, format)
  require "fileutils"
  require "net/ftp"
  require "zip"
  require "mdb"

  @output = output
  @format = format
  @ext = format.sub(/^bib/, "")
  @files = []
end

Class Method Details

.fetch(source, output: "data", format: "yaml") ⇒ Object

Initialize fetcher and run fetch

Parameters:

  • source (Strin)

    source name

  • output (Strin) (defaults to: "data")

    directory to save files, default: “data”

  • format (Strin) (defaults to: "yaml")

    format of output files (xml, yaml, bibxml), default: yaml



29
30
31
32
33
34
35
36
37
# File 'lib/relaton_3gpp/data_fetcher.rb', line 29

def self.fetch(source, output: "data", format: "yaml")
  t1 = Time.now
  puts "Started at: #{t1}"
  FileUtils.mkdir_p output
  new(output, format).fetch(source == "status-smg-3GPP-force")
  t2 = Time.now
  puts "Stopped at: #{t2}"
  puts "Done in: #{(t2 - t1).round} sec."
end

Instance Method Details

#fetch(renewal) ⇒ Object

Parse documents

Parameters:

  • renewal (Boolean)

    force to update all documents



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/relaton_3gpp/data_fetcher.rb', line 44

def fetch(renewal) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
  file = get_file renewal
  return unless file

  Zip::File.open(file) do |zip_file|
    enntry = zip_file.glob("status_smg_3GPP.mdb").first
    File.open("status_smg_3GPP.mdb", "wb") do |f|
      f.write enntry.get_input_stream.read
    end
  end
  dbs = Mdb.open "status_smg_3GPP.mdb"
  specs = dbs["Specs_GSM+3G"]
  specrels = dbs["Specs_GSM+3G_release-info"]
  releases = dbs["Releases"]
  tstatus = dbs["temp-status"]
  FileUtils.rm_f File.join(@output, "/*") if renewal && dbs["2001-04-25_schedule"].any?
  dbs["2001-04-25_schedule"].each do |row|
    fetch_doc row, specs, specrels, releases, tstatus
  end
  File.write CURRENT, @current.to_yaml, encoding: "UTF-8"
end

#fetch_doc(row, specs, specrels, releases, tstatus) ⇒ Relaton3gpp::BibliographicItem?

Fetch document

Parameters:

  • row (Hash)

    row from mdb

  • specs (Array<Hash>)

    specs

  • specrels (Array<Hash>)

    specrels

  • releases (Array<Hash>)

    releases

  • tstatus (Array<Hash>)

    tstatus

Returns:



110
111
112
113
114
115
116
117
118
# File 'lib/relaton_3gpp/data_fetcher.rb', line 110

def fetch_doc(row, specs, specrels, releases, tstatus)
  doc = Parser.parse row, specs, specrels, releases, tstatus
  save_doc doc
rescue StandardError => e
  warn "Error: #{e.message}"
  warn "PubID: #{row[:spec]}:#{row[:release]}/#{row[:MAJOR_VERSION_NB]}."\
       "#{row[:TECHNICAL_VERSION_NB]}.#{row[:EDITORIAL_VERSION_NB]}"
  warn e.backtrace[0..5].join("\n")
end

#file_name(bib) ⇒ String

Generate file name

Parameters:

  • bib (RelatonW3c::W3cBibliographicItem)

    bibliographic item

Returns:

  • (String)

    file name



149
150
151
152
# File 'lib/relaton_3gpp/data_fetcher.rb', line 149

def file_name(bib)
  name = bib.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_").upcase
  File.join @output, "#{name}.#{@ext}"
end

#get_file(renewal) ⇒ String

Get file from FTP

Parameters:

  • renewal (Boolean)

    force to update all documents

Returns:

  • (String)

    file name



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/relaton_3gpp/data_fetcher.rb', line 73

def get_file(renewal) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
  @current = YAML.load_file CURRENT if File.exist? CURRENT
  @current ||= {}
  n = 0
  begin
    ftp = Net::FTP.new("www.3gpp.org")
    ftp.resume = true
    ftp.
    ftp.chdir "/Information/Databases/Spec_Status/"
    d, t, _, file = ftp.list("*.zip").first.split
    unless renewal
      dt = DateTime.strptime("#{d} #{t}", "%m-%d-%y %I:%M%p")
      return if file == @current["file"] && dt == DateTime.parse(@current["date"])
    end

    ftp.getbinaryfile file
  rescue Net::ReadTimeout => e
    n += 1
    retry if n < 5
    raise e
  end
  @current["file"] = file
  @current["date"] = dt.to_s
  file
end

#save_doc(bib) ⇒ Object

Save document to file

Parameters:

  • bib (RelatonW3c::W3cBibliographicItem, nil)

    bibliographic item



125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/relaton_3gpp/data_fetcher.rb', line 125

def save_doc(bib) # rubocop:disable Metrics/MethodLength
  return unless bib

  c = case @format
      when "xml" then bib.to_xml(bibdata: true)
      when "yaml" then bib.to_hash.to_yaml
      else bib.send("to_#{@format}")
      end
  file = file_name(bib)
  if @files.include? file
    warn "File #{file} already exists. Document: #{bib.docnumber}"
  else
    @files << file
  end
  File.write file, c, encoding: "UTF-8"
end