Class: Relaton::ThreeGpp::DataFetcher

Inherits:
Core::DataFetcher
  • Object
show all
Defined in:
lib/relaton/3gpp/data_fetcher.rb

Constant Summary collapse

CURRENT =
"current.yaml".freeze
CSV_URL =
"https://www.3gpp.org/ftp/Information/Databases/3GPPBibliography.csv".freeze

Instance Method Summary collapse

Instance Method Details

#add_affiliation(contrib, affiliation) ⇒ Object



229
230
231
232
233
234
235
236
237
238
239
240
# File 'lib/relaton/3gpp/data_fetcher.rb', line 229

def add_affiliation(contrib, affiliation)
  changed = false

  affiliation.each do |a|
    unless contrib.person.affiliation.include? a
      contrib.person.affiliation << a
      changed = true
    end
  end

  changed
end

#add_contributor(bib1, bib2) ⇒ Object

rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity



210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# File 'lib/relaton/3gpp/data_fetcher.rb', line 210

def add_contributor(bib1, bib2) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
  changed = false

  bib2.contributor.each do |bc|
    next unless bc.person

    existed = bib1.contributor.find { |ic| ic.person&.name == bc.person.name }
    if existed
      chng = add_affiliation existed, bc.person.affiliation
      changed ||= chng
    else
      bib1.contributor << bc
      changed = true
    end
  end

  changed
end

#add_transposed_relation(bib1, bib2) ⇒ Relaton3gpp::BibliographicItem

Add transposed relation

Parameters:

  • bib1 (Relaton3gpp::BibliographicItem)

    the main bibliographic item

  • bib2 (Relaton3gpp::BibliographicItem)

    the transposed bibliographic item

Returns:

  • (Relaton3gpp::BibliographicItem)


202
203
204
205
206
207
208
# File 'lib/relaton/3gpp/data_fetcher.rb', line 202

def add_transposed_relation(bib1, bib2)
  bib2.relation.each { |r| bib1.relation << r }
  bib2.relation.clear
  desc = Bib::LocalizedMarkedUpString.new content: "equivalent"
  rel = Bib::Relation.new(type: "adoptedAs", bibitem: bib2, description: desc)
  bib1.relation << rel
end

#check_transposed_date(bib, existed) ⇒ Array<Relaton3gpp::BibliographicItem, Boolean>

Check if date of one bibliographic item is transposed to another

Parameters:

  • bib (Relaton3gpp::BibliographicItem)

    new bibliographic item

  • existed (Relaton3gpp::BibliographicItem)

    existing bibliographic item

Returns:

  • (Array<Relaton3gpp::BibliographicItem, Boolean>)

    main bibliographic item, related bibliographic item, true if relation has been added



183
184
185
186
187
188
189
190
191
192
# File 'lib/relaton/3gpp/data_fetcher.rb', line 183

def check_transposed_date(bib, existed)
  if bib.date[0].at < existed.date[0].at
    add_transposed_relation bib, existed
    [bib, existed, true]
  elsif bib.date[0].at > existed.date[0].at
    add_transposed_relation existed, bib
    [existed, bib, true]
  else [bib, existed, false]
  end
end

#download(uri, tmp_file) ⇒ Object



87
88
89
90
91
92
93
94
95
96
# File 'lib/relaton/3gpp/data_fetcher.rb', line 87

def download(uri, tmp_file)
  Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https",
                                      open_timeout: 30, read_timeout: 120) do |http|
    http.request(Net::HTTP::Get.new(uri.request_uri)) do |resp|
      raise "HTTP #{resp.code} from #{uri}" unless resp.is_a?(Net::HTTPSuccess)

      File.open(tmp_file, "wb") { |f| resp.read_body { |chunk| f.write(chunk) } }
    end
  end
end

#fetch(source) ⇒ Object

Parse documents

Parameters:

  • source (String)

    source of documents, status-smg-3gpp for updare or status-smg-3gpp-force for renewal



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/relaton/3gpp/data_fetcher.rb', line 27

def fetch(source) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
  renewal = source == "status-smg-3GPP-force"
  file = get_file renewal
  return unless file && File.exist?(file) && File.size(file) > 20_000_000

  if renewal
    FileUtils.rm_f Dir.glob(File.join(@output, "/*"))
    index.remove_all # if renewal
  end
  CSV.open(file, "r:bom|utf-8", headers: true, col_sep: ";").each do |row|
    save_doc Parser.parse(row, @errors)
  end
  File.write CURRENT, @current.to_yaml, encoding: "UTF-8"
  index.save
  report_errors
end

#get_file(renewal) ⇒ String?

Get file via HTTPS. If file has not changed, return nil

Parameters:

  • renewal (Boolean)

    force to update all documents

Returns:

  • (String, nil)

    file name



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/relaton/3gpp/data_fetcher.rb', line 51

def get_file(renewal) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
  @current = YAML.load_file CURRENT if File.exist? CURRENT
  @current ||= {}
  uri = URI(CSV_URL)
  n = 0
  begin
    last_modified = head_last_modified(uri)
    return unless last_modified

    dt = DateTime.parse(last_modified)
    if !renewal && !@current["date"].to_s.empty? &&
        dt == DateTime.parse(@current["date"])
      return
    end

    tmp_file = File.join Dir.tmpdir, "3gpp.csv"
    download(uri, tmp_file)
  rescue Net::OpenTimeout, Net::ReadTimeout, SocketError => e
    n += 1
    retry if n < 5
    raise e
  end
  @current["date"] = dt.to_s
  tmp_file
end

#head_last_modified(uri) ⇒ Object



77
78
79
80
81
82
83
84
85
# File 'lib/relaton/3gpp/data_fetcher.rb', line 77

def head_last_modified(uri)
  Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https",
                                      open_timeout: 30, read_timeout: 30) do |http|
    resp = http.request(Net::HTTP::Head.new(uri.request_uri))
    raise "HTTP #{resp.code} from #{uri}" unless resp.is_a?(Net::HTTPSuccess)

    resp["last-modified"]
  end
end

#indexObject



17
18
19
# File 'lib/relaton/3gpp/data_fetcher.rb', line 17

def index
  @index ||= Relaton::Index.find_or_create "3gpp", file: "#{INDEXFILE}.yaml"
end

#log_error(msg) ⇒ Object



13
14
15
# File 'lib/relaton/3gpp/data_fetcher.rb', line 13

def log_error(msg)
  Util.error msg
end

#merge_duplication(bib, file) ⇒ Relaton3gpp::BibliographicItem?

Merge duplication

Parameters:

  • bib (Relaton3gpp::BibliographicItem)

    new bibliographic item

  • file (String)

    file name of existing bibliographic item

Returns:

  • (Relaton3gpp::BibliographicItem, nil)

    merged bibliographic item or nil if no merge has been done



126
127
128
129
130
131
132
133
134
135
# File 'lib/relaton/3gpp/data_fetcher.rb', line 126

def merge_duplication(bib, file)
  hash = YAML.load_file file
  existed = Item.from_hash hash
  changed = update_source? bib, existed
  bib1, bib2, chng = transposed_relation bib, existed
  changed ||= chng
  chng = add_contributor(bib1, bib2)
  changed ||= chng
  bib1 if changed
end

#save_doc(bib) ⇒ Object

Save document to file

Parameters:

  • bib (RelatonW3c::W3cBibliographicItem, nil)

    bibliographic item



103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/relaton/3gpp/data_fetcher.rb', line 103

def save_doc(bib) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
  return unless bib

  bib1 = bib
  file = output_file(bib1.docnumber)
  if @files.include? file
    bib1 = merge_duplication bib1, file
    Util.warn "File #{file} already exists. Document: #{bib.docnumber}" if bib1.nil?
  else
    @files << file
    index.add_or_update bib1.docnumber, file
  end
  File.write file, serialize(bib1), encoding: "UTF-8" unless bib1.nil?
end

#to_bibxml(bib) ⇒ Object



250
251
252
# File 'lib/relaton/3gpp/data_fetcher.rb', line 250

def to_bibxml(bib)
  bib.to_rfcxml
end

#to_xml(bib) ⇒ Object



242
243
244
# File 'lib/relaton/3gpp/data_fetcher.rb', line 242

def to_xml(bib)
  bib.to_xml(bibdata: true)
end

#to_yaml(bib) ⇒ Object



246
247
248
# File 'lib/relaton/3gpp/data_fetcher.rb', line 246

def to_yaml(bib)
  bib.to_yaml
end

#transposed_relation(bib, existed) ⇒ Array<Relaton3gpp::BibliographicItem, Boolean>

If one of bibliographic items has date gereater than anotherm=, make it relation

Parameters:

  • bib (Relaton3gpp::BibliographicItem)

    new bibliographic item

  • existed (Relaton3gpp::BibliographicItem)

    existing bibliographic item

Returns:

  • (Array<Relaton3gpp::BibliographicItem, Boolean>)

    main bibliographic item, related bibliographic item, true if relation has been added



165
166
167
168
169
170
171
172
# File 'lib/relaton/3gpp/data_fetcher.rb', line 165

def transposed_relation(bib, existed) # rubocop:disable Metrics/CyclomaticComplexity
  if (bib.date.none? && existed.date.none?) || (bib.date.any? && existed.date.none?)
    return [bib, existed, false]
  end
  return [existed, bib, true] if bib.date.none? && existed.date.any?

  check_transposed_date bib, existed
end

#update_source?(bib1, bib2) ⇒ Boolean

Update link in case one of bibliographic items has no link

Parameters:

  • bib1 (Relaton3gpp::BibliographicItem)
  • bib2 (Relaton3gpp::BibliographicItem)

Returns:

  • (Boolean)

    true if link has been updated



145
146
147
148
149
150
151
152
153
154
# File 'lib/relaton/3gpp/data_fetcher.rb', line 145

def update_source?(bib1, bib2)
  if bib1.source.any? && bib2.source.empty?
    bib2.source = bib1.source
    true
  elsif bib1.source.empty? && bib2.source.any?
    bib1.source = bib2.source
    true
  else false
  end
end