Class: Relaton::ThreeGpp::DataFetcher
- Inherits:
-
Core::DataFetcher
- Object
- Core::DataFetcher
- Relaton::ThreeGpp::DataFetcher
- Defined in:
- lib/relaton/3gpp/data_fetcher.rb
Constant Summary collapse
- CURRENT =
"current.yaml".freeze
- CSV_URL =
"https://www.3gpp.org/ftp/Information/Databases/3GPPBibliography.csv".freeze
- CSV_FILE =
"3GPPBibliography.csv".freeze
Instance Method Summary collapse
- #add_affiliation(contrib, affiliation) ⇒ Object
-
#add_contributor(bib1, bib2) ⇒ Object
rubocop:disable Metrics/MethodLength,Metrics/AbcSize.
-
#add_transposed_relation(bib1, bib2) ⇒ Relaton3gpp::BibliographicItem
Add transposed relation.
-
#check_transposed_date(bib, existed) ⇒ Array<Relaton3gpp::BibliographicItem, Boolean>
Check if date of one bibliographic item is transposed to another.
- #download(uri, tmp_file) ⇒ Object
-
#fetch(source) ⇒ Object
Parse documents.
-
#get_file(renewal) ⇒ String?
Get file via HTTPS.
- #head_last_modified(uri) ⇒ Object
- #index ⇒ Object
- #log_error(msg) ⇒ Object
-
#merge_duplication(bib, file) ⇒ Relaton3gpp::BibliographicItem?
Merge duplication.
-
#save_doc(bib) ⇒ Object
Save document to file.
- #to_bibxml(bib) ⇒ Object
- #to_xml(bib) ⇒ Object
- #to_yaml(bib) ⇒ Object
-
#transposed_relation(bib, existed) ⇒ Array<Relaton3gpp::BibliographicItem, Boolean>
If one of bibliographic items has date gereater than anotherm=, make it relation.
-
#update_source(bib1, bib2) ⇒ Boolean
Update link in case one of bibliographic items has no link.
Instance Method Details
#add_affiliation(contrib, affiliation) ⇒ Object
230 231 232 233 234 235 236 237 238 239 240 241 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 230 def add_affiliation(contrib, affiliation) changed = false affiliation.each do |a| unless contrib.person.affiliation.include? a contrib.person.affiliation << a changed = true end end changed end |
#add_contributor(bib1, bib2) ⇒ Object
rubocop:disable Metrics/MethodLength,Metrics/AbcSize
211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 211 def add_contributor(bib1, bib2) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize changed = false bib2.contributor.each do |bc| next unless bc.person existed = bib1.contributor.find { |ic| ic.person&.name == bc.person.name } if existed chng = add_affiliation existed, bc.person.affiliation changed ||= chng else bib1.contributor << bc changed = true end end changed end |
#add_transposed_relation(bib1, bib2) ⇒ Relaton3gpp::BibliographicItem
Add transposed relation
203 204 205 206 207 208 209 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 203 def add_transposed_relation(bib1, bib2) bib2.relation.each { |r| bib1.relation << r } bib2.relation.clear desc = Bib::LocalizedMarkedUpString.new content: "equivalent" rel = Bib::Relation.new(type: "adoptedAs", bibitem: bib2, description: desc) bib1.relation << rel end |
#check_transposed_date(bib, existed) ⇒ Array<Relaton3gpp::BibliographicItem, Boolean>
Check if date of one bibliographic item is transposed to another
184 185 186 187 188 189 190 191 192 193 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 184 def check_transposed_date(bib, existed) if bib.date[0].at < existed.date[0].at add_transposed_relation bib, existed [bib, existed, true] elsif bib.date[0].at > existed.date[0].at add_transposed_relation existed, bib [existed, bib, true] else [bib, existed, false] end end |
#download(uri, tmp_file) ⇒ Object
89 90 91 92 93 94 95 96 97 98 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 89 def download(uri, tmp_file) Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https", open_timeout: 30, read_timeout: 120) do |http| http.request(Net::HTTP::Get.new(uri.request_uri)) do |resp| raise "HTTP #{resp.code} from #{uri}" unless resp.is_a?(Net::HTTPSuccess) File.open(tmp_file, "wb") { |f| resp.read_body { |chunk| f.write(chunk) } } end end end |
#fetch(source) ⇒ Object
Parse documents
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 27 def fetch(source) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize renewal = source == "status-smg-3GPP-force" file = get_file renewal return unless file && File.exist?(file) && File.size(file) > 20_000_000 if renewal FileUtils.rm_f Dir.glob(File.join(@output, "/*")) # if renewal && dbs["2001-04-25_schedule"].any? index.remove_all # if renewal end CSV.open(file, "r:bom|utf-8", headers: true, col_sep: ";").each do |row| save_doc Parser.parse(row, @errors) end File.write CURRENT, @current.to_yaml, encoding: "UTF-8" index.save report_errors end |
#get_file(renewal) ⇒ String?
Get file via HTTPS. If file has not changed, return nil
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 51 def get_file(renewal) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity @current = YAML.load_file CURRENT if File.exist? CURRENT @current ||= {} uri = URI(CSV_URL) n = 0 begin last_modified = head_last_modified(uri) return unless last_modified dt = DateTime.parse(last_modified) if !renewal && CSV_FILE == @current["file"] && !@current["date"].to_s.empty? && dt == DateTime.parse(@current["date"]) return end tmp_file = File.join Dir.tmpdir, "3gpp.csv" download(uri, tmp_file) rescue Net::OpenTimeout, Net::ReadTimeout, SocketError => e n += 1 retry if n < 5 raise e end @current["file"] = CSV_FILE @current["date"] = dt.to_s tmp_file end |
#head_last_modified(uri) ⇒ Object
79 80 81 82 83 84 85 86 87 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 79 def head_last_modified(uri) Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https", open_timeout: 30, read_timeout: 30) do |http| resp = http.request(Net::HTTP::Head.new(uri.request_uri)) raise "HTTP #{resp.code} from #{uri}" unless resp.is_a?(Net::HTTPSuccess) resp["last-modified"] end end |
#index ⇒ Object
18 19 20 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 18 def index @index ||= Relaton::Index.find_or_create "3gpp", file: "#{INDEXFILE}.yaml" end |
#log_error(msg) ⇒ Object
14 15 16 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 14 def log_error(msg) Util.error msg end |
#merge_duplication(bib, file) ⇒ Relaton3gpp::BibliographicItem?
Merge duplication
128 129 130 131 132 133 134 135 136 137 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 128 def merge_duplication(bib, file) hash = YAML.load_file file existed = Item.from_hash hash changed = update_source bib, existed bib1, bib2, chng = transposed_relation bib, existed changed ||= chng chng = add_contributor(bib1, bib2) changed ||= chng bib1 if changed end |
#save_doc(bib) ⇒ Object
Save document to file
105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 105 def save_doc(bib) # rubocop:disable Metrics/MethodLength return unless bib bib1 = bib file = output_file(bib1.docnumber) if @files.include? file bib1 = merge_duplication bib1, file Util.warn "File #{file} already exists. Document: #{bib.docnumber}" if bib1.nil? else @files << file index.add_or_update bib1.docnumber, file end File.write file, serialize(bib1), encoding: "UTF-8" unless bib1.nil? end |
#to_bibxml(bib) ⇒ Object
251 252 253 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 251 def to_bibxml(bib) bib.to_rfcxml end |
#to_xml(bib) ⇒ Object
243 244 245 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 243 def to_xml(bib) bib.to_xml(bibdata: true) end |
#to_yaml(bib) ⇒ Object
247 248 249 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 247 def to_yaml(bib) bib.to_yaml end |
#transposed_relation(bib, existed) ⇒ Array<Relaton3gpp::BibliographicItem, Boolean>
If one of bibliographic items has date gereater than anotherm=, make it relation
167 168 169 170 171 172 173 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 167 def transposed_relation(bib, existed) # rubocop:disable Metrics/CyclomaticComplexity return [bib, existed, false] if bib.date.none? && existed.date.none? || bib.date.any? && existed.date.none? return [existed, bib, true] if bib.date.none? && existed.date.any? check_transposed_date bib, existed end |
#update_source(bib1, bib2) ⇒ Boolean
Update link in case one of bibliographic items has no link
147 148 149 150 151 152 153 154 155 156 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 147 def update_source(bib1, bib2) if bib1.source.any? && bib2.source.empty? bib2.source = bib1.source true elsif bib1.source.empty? && bib2.source.any? bib1.source = bib2.source true else false end end |