Class: Relaton::ThreeGpp::DataFetcher
- Inherits:
-
Core::DataFetcher
- Object
- Core::DataFetcher
- Relaton::ThreeGpp::DataFetcher
- Defined in:
- lib/relaton/3gpp/data_fetcher.rb
Constant Summary collapse
- CURRENT =
"current.yaml".freeze
- CSV_URL =
"https://www.3gpp.org/ftp/Information/Databases/3GPPBibliography.csv".freeze
Instance Method Summary collapse
- #add_affiliation(contrib, affiliation) ⇒ Object
-
#add_contributor(bib1, bib2) ⇒ Object
rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity.
-
#add_transposed_relation(bib1, bib2) ⇒ Relaton3gpp::BibliographicItem
Add transposed relation.
-
#check_transposed_date(bib, existed) ⇒ Array<Relaton3gpp::BibliographicItem, Boolean>
Check if date of one bibliographic item is transposed to another.
- #download(uri, tmp_file) ⇒ Object
-
#fetch(source) ⇒ Object
Parse documents.
-
#get_file(renewal) ⇒ String?
Get file via HTTPS.
- #head_last_modified(uri) ⇒ Object
- #index ⇒ Object
- #log_error(msg) ⇒ Object
-
#merge_duplication(bib, file) ⇒ Relaton3gpp::BibliographicItem?
Merge duplication.
-
#save_doc(bib) ⇒ Object
Save document to file.
- #to_bibxml(bib) ⇒ Object
- #to_xml(bib) ⇒ Object
- #to_yaml(bib) ⇒ Object
-
#transposed_relation(bib, existed) ⇒ Array<Relaton3gpp::BibliographicItem, Boolean>
If one of bibliographic items has date gereater than anotherm=, make it relation.
-
#update_source?(bib1, bib2) ⇒ Boolean
Update link in case one of bibliographic items has no link.
Instance Method Details
#add_affiliation(contrib, affiliation) ⇒ Object
229 230 231 232 233 234 235 236 237 238 239 240 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 229 def add_affiliation(contrib, affiliation) changed = false affiliation.each do |a| unless contrib.person.affiliation.include? a contrib.person.affiliation << a changed = true end end changed end |
#add_contributor(bib1, bib2) ⇒ Object
rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 210 def add_contributor(bib1, bib2) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity changed = false bib2.contributor.each do |bc| next unless bc.person existed = bib1.contributor.find { |ic| ic.person&.name == bc.person.name } if existed chng = add_affiliation existed, bc.person.affiliation changed ||= chng else bib1.contributor << bc changed = true end end changed end |
#add_transposed_relation(bib1, bib2) ⇒ Relaton3gpp::BibliographicItem
Add transposed relation
202 203 204 205 206 207 208 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 202 def add_transposed_relation(bib1, bib2) bib2.relation.each { |r| bib1.relation << r } bib2.relation.clear desc = Bib::LocalizedMarkedUpString.new content: "equivalent" rel = Bib::Relation.new(type: "adoptedAs", bibitem: bib2, description: desc) bib1.relation << rel end |
#check_transposed_date(bib, existed) ⇒ Array<Relaton3gpp::BibliographicItem, Boolean>
Check if date of one bibliographic item is transposed to another
183 184 185 186 187 188 189 190 191 192 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 183 def check_transposed_date(bib, existed) if bib.date[0].at < existed.date[0].at add_transposed_relation bib, existed [bib, existed, true] elsif bib.date[0].at > existed.date[0].at add_transposed_relation existed, bib [existed, bib, true] else [bib, existed, false] end end |
#download(uri, tmp_file) ⇒ Object
87 88 89 90 91 92 93 94 95 96 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 87 def download(uri, tmp_file) Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https", open_timeout: 30, read_timeout: 120) do |http| http.request(Net::HTTP::Get.new(uri.request_uri)) do |resp| raise "HTTP #{resp.code} from #{uri}" unless resp.is_a?(Net::HTTPSuccess) File.open(tmp_file, "wb") { |f| resp.read_body { |chunk| f.write(chunk) } } end end end |
#fetch(source) ⇒ Object
Parse documents
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 27 def fetch(source) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize renewal = source == "status-smg-3GPP-force" file = get_file renewal return unless file && File.exist?(file) && File.size(file) > 20_000_000 if renewal FileUtils.rm_f Dir.glob(File.join(@output, "/*")) index.remove_all # if renewal end CSV.open(file, "r:bom|utf-8", headers: true, col_sep: ";").each do |row| save_doc Parser.parse(row, @errors) end File.write CURRENT, @current.to_yaml, encoding: "UTF-8" index.save report_errors end |
#get_file(renewal) ⇒ String?
Get file via HTTPS. If file has not changed, return nil
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 51 def get_file(renewal) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity @current = YAML.load_file CURRENT if File.exist? CURRENT @current ||= {} uri = URI(CSV_URL) n = 0 begin last_modified = head_last_modified(uri) return unless last_modified dt = DateTime.parse(last_modified) if !renewal && !@current["date"].to_s.empty? && dt == DateTime.parse(@current["date"]) return end tmp_file = File.join Dir.tmpdir, "3gpp.csv" download(uri, tmp_file) rescue Net::OpenTimeout, Net::ReadTimeout, SocketError => e n += 1 retry if n < 5 raise e end @current["date"] = dt.to_s tmp_file end |
#head_last_modified(uri) ⇒ Object
77 78 79 80 81 82 83 84 85 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 77 def head_last_modified(uri) Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https", open_timeout: 30, read_timeout: 30) do |http| resp = http.request(Net::HTTP::Head.new(uri.request_uri)) raise "HTTP #{resp.code} from #{uri}" unless resp.is_a?(Net::HTTPSuccess) resp["last-modified"] end end |
#index ⇒ Object
17 18 19 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 17 def index @index ||= Relaton::Index.find_or_create "3gpp", file: "#{INDEXFILE}.yaml" end |
#log_error(msg) ⇒ Object
13 14 15 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 13 def log_error(msg) Util.error msg end |
#merge_duplication(bib, file) ⇒ Relaton3gpp::BibliographicItem?
Merge duplication
126 127 128 129 130 131 132 133 134 135 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 126 def merge_duplication(bib, file) hash = YAML.load_file file existed = Item.from_hash hash changed = update_source? bib, existed bib1, bib2, chng = transposed_relation bib, existed changed ||= chng chng = add_contributor(bib1, bib2) changed ||= chng bib1 if changed end |
#save_doc(bib) ⇒ Object
Save document to file
103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 103 def save_doc(bib) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize return unless bib bib1 = bib file = output_file(bib1.docnumber) if @files.include? file bib1 = merge_duplication bib1, file Util.warn "File #{file} already exists. Document: #{bib.docnumber}" if bib1.nil? else @files << file index.add_or_update bib1.docnumber, file end File.write file, serialize(bib1), encoding: "UTF-8" unless bib1.nil? end |
#to_bibxml(bib) ⇒ Object
250 251 252 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 250 def to_bibxml(bib) bib.to_rfcxml end |
#to_xml(bib) ⇒ Object
242 243 244 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 242 def to_xml(bib) bib.to_xml(bibdata: true) end |
#to_yaml(bib) ⇒ Object
246 247 248 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 246 def to_yaml(bib) bib.to_yaml end |
#transposed_relation(bib, existed) ⇒ Array<Relaton3gpp::BibliographicItem, Boolean>
If one of bibliographic items has date gereater than anotherm=, make it relation
165 166 167 168 169 170 171 172 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 165 def transposed_relation(bib, existed) # rubocop:disable Metrics/CyclomaticComplexity if (bib.date.none? && existed.date.none?) || (bib.date.any? && existed.date.none?) return [bib, existed, false] end return [existed, bib, true] if bib.date.none? && existed.date.any? check_transposed_date bib, existed end |
#update_source?(bib1, bib2) ⇒ Boolean
Update link in case one of bibliographic items has no link
145 146 147 148 149 150 151 152 153 154 |
# File 'lib/relaton/3gpp/data_fetcher.rb', line 145 def update_source?(bib1, bib2) if bib1.source.any? && bib2.source.empty? bib2.source = bib1.source true elsif bib1.source.empty? && bib2.source.any? bib1.source = bib2.source true else false end end |