Class: Relaton3gpp::DataFetcher

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton_3gpp/data_fetcher.rb

Constant Summary collapse

CURRENT =
"current.yaml".freeze

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(output, format) ⇒ DataFetcher

Data fetcher initializer

Parameters:

  • output (String)

    directory to save files

  • format (String)

    format of output files (xml, yaml, bibxml)



15
16
17
18
19
20
# File 'lib/relaton_3gpp/data_fetcher.rb', line 15

def initialize(output, format)
  @output = output
  @format = format
  @ext = format.sub(/^bib/, "")
  @files = []
end

Class Method Details

.fetch(output: "data", format: "yaml") ⇒ Object

Initialize fetcher and run fetch

Parameters:

  • output (Strin) (defaults to: "data")

    directory to save files, default: “data”

  • format (Strin) (defaults to: "yaml")

    format of output files (xml, yaml, bibxml), default: yaml



28
29
30
31
32
33
34
35
36
# File 'lib/relaton_3gpp/data_fetcher.rb', line 28

def self.fetch(output: "data", format: "yaml")
  t1 = Time.now
  puts "Started at: #{t1}"
  FileUtils.mkdir_p output unless Dir.exist? output
  new(output, format).fetch
  t2 = Time.now
  puts "Stopped at: #{t2}"
  puts "Done in: #{(t2 - t1).round} sec."
end

Instance Method Details

#fetchObject

Parse documents



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/relaton_3gpp/data_fetcher.rb', line 41

def fetch # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
  file = get_file
  return unless file

  Zip::File.open(file) do |zip_file|
    enntry = zip_file.glob("status_smg_3GPP.mdb").first
    File.open("status_smg_3GPP.mdb", "wb") do |f|
      f.write enntry.get_input_stream.read
    end
  end
  dbs = Mdb.open "status_smg_3GPP.mdb"
  specs = dbs["Specs_GSM+3G"]
  specrels = dbs["Specs_GSM+3G_release-info"]
  releases = dbs["Releases"]
  tstatus = dbs["temp-status"]
  dbs["2001-04-25_schedule"].each do |row|
    fetch_doc row, specs, specrels, releases, tstatus
  end
  File.write CURRENT, @current.to_yaml, encoding: "UTF-8"
end

#fetch_doc(row, specs, specrels, releases, tstatus) ⇒ Relaton3gpp::BibliographicItem?

Fetch document

Parameters:

  • row (Hash)

    row from mdb

  • specs (Array<Hash>)

    specs

  • specrels (Array<Hash>)

    specrels

  • releases (Array<Hash>)

    releases

  • tstatus (Array<Hash>)

    tstatus

Returns:



102
103
104
105
106
107
108
109
110
# File 'lib/relaton_3gpp/data_fetcher.rb', line 102

def fetch_doc(row, specs, specrels, releases, tstatus)
  doc = Parser.parse row, specs, specrels, releases, tstatus
  save_doc doc
rescue StandardError => e
  warn "Error: #{e.message}"
  warn "PubID: #{row[:spec]}:#{row[:release]}/#{row[:MAJOR_VERSION_NB]}."\
       "#{row[:TECHNICAL_VERSION_NB]}.#{row[:EDITORIAL_VERSION_NB]}"
  warn e.backtrace[0..5].join("\n")
end

#file_name(bib) ⇒ String

Generate file name

Parameters:

  • bib (RelatonW3c::W3cBibliographicItem)

    bibliographic item

Returns:

  • (String)

    file name



141
142
143
144
# File 'lib/relaton_3gpp/data_fetcher.rb', line 141

def file_name(bib)
  name = bib.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_").upcase
  File.join @output, "#{name}.#{@ext}"
end

#get_fileString

Get file from FTP

Returns:

  • (String)

    file name



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/relaton_3gpp/data_fetcher.rb', line 67

def get_file # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity
  @current = YAML.load_file CURRENT if File.exist? CURRENT
  @current ||= {}
  n = 0
  begin
    ftp = Net::FTP.new("www.3gpp.org")
    ftp.resume = true
    ftp.
    ftp.chdir "/Information/Databases/Spec_Status/"
    d, t, _, file = ftp.list("*.zip").first.split
    dt = DateTime.strptime("#{d} #{t}", "%m-%d-%y %I:%M%p")
    return if file == @current["file"] && dt == DateTime.parse(@current["date"])

    ftp.getbinaryfile file
  rescue Net::ReadTimeout => e
    n += 1
    retry if n < 5
    raise e
  end
  @current["file"] = file
  @current["date"] = dt.to_s
  file
end

#save_doc(bib) ⇒ Object

Save document to file

Parameters:

  • bib (RelatonW3c::W3cBibliographicItem, nil)

    bibliographic item



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/relaton_3gpp/data_fetcher.rb', line 117

def save_doc(bib) # rubocop:disable Metrics/MethodLength
  return unless bib

  c = case @format
      when "xml" then bib.to_xml(bibdata: true)
      when "yaml" then bib.to_hash.to_yaml
      else bib.send("to_#{@format}")
      end
  file = file_name(bib)
  if @files.include? file
    warn "File #{file} already exists. Document: #{bib.docnumber}"
  else
    @files << file
  end
  File.write file, c, encoding: "UTF-8"
end