Class: Relaton::Ccsds::DataFetcher
- Inherits:
-
Relaton::Core::DataFetcher
- Object
- Relaton::Core::DataFetcher
- Relaton::Ccsds::DataFetcher
- Defined in:
- lib/relaton/ccsds/data/fetcher.rb
Constant Summary collapse
- TRRGX =
/\s-\s\w+\sTranslated$/
Instance Method Summary collapse
- #agent ⇒ Object
-
#create_instance_relation(bib, file) ⇒ void
Create instance relation and save to file.
-
#create_relation(bib, type) {|Relaton::Bib::Relation.new(type: type, bibitem: rel)| ... } ⇒ Relaton::Bib::Relation
Create relation.
-
#create_relations(bib, file) ⇒ void
Create translation or instance relation and save to file.
- #fetch(_source = nil) ⇒ Object
-
#fetch_docs(url) ⇒ void
Fetch documents from url.
-
#index ⇒ Object
Pubid index (index-v2): ‘:id` is the lean pubid hash.
-
#merge_links(bib, file) ⇒ void
Merge identical documents with different links (updaes given bibitem).
-
#parse_and_save(doc, data) ⇒ void
Parse document and save to file.
- #parse_file(file) ⇒ Object
-
#save_bib(bib) ⇒ void
Save bibitem to file.
-
#search_instance_translation(bib) ⇒ void
Search translation and instance relation.
-
#search_relations(bibid, bib) ⇒ void
Search instance or translation relation.
- #search_translations(bibid, bib) ⇒ Object
- #to_bibxml(bib) ⇒ Object
- #to_xml(bib) ⇒ Object
- #to_yaml(bib) ⇒ Object
-
#translation_relation_types(bib) ⇒ Array<String>
Translation or instance relation types.
Instance Method Details
#agent ⇒ Object
13 14 15 16 17 18 19 |
# File 'lib/relaton/ccsds/data/fetcher.rb', line 13 def agent return @agent if @agent @agent = Mechanize.new @agent.request_headers = { "Accept" => "application/json;odata=verbose" } @agent end |
#create_instance_relation(bib, file) ⇒ void
This method returns an undefined value.
Create instance relation and save to file
192 193 194 195 196 197 |
# File 'lib/relaton/ccsds/data/fetcher.rb', line 192 def create_instance_relation(bib, file) inst = parse_file file create_relation(inst, "hasInstance") { |rel| bib.relation << rel } create_relation(bib, "instanceOf") { |rel| inst.relation << rel } File.write file, serialize(inst), encoding: "UTF-8" end |
#create_relation(bib, type) {|Relaton::Bib::Relation.new(type: type, bibitem: rel)| ... } ⇒ Relaton::Bib::Relation
Create relation
207 208 209 210 211 212 213 214 |
# File 'lib/relaton/ccsds/data/fetcher.rb', line 207 def create_relation(bib, type) bib_docid = bib.docidentifier.first return unless bib_docid docid = Bib::Docidentifier.from_yaml(bib_docid.to_yaml) rel = Relaton::Bib::ItemData.new docidentifier: [docid], formattedref: Relaton::Bib::Formattedref.new(content: bib_docid.content.dup) yield Relaton::Bib::Relation.new(type: type, bibitem: rel) end |
#create_relations(bib, file) ⇒ void
This method returns an undefined value.
Create translation or instance relation and save to file
152 153 154 155 156 157 158 |
# File 'lib/relaton/ccsds/data/fetcher.rb', line 152 def create_relations(bib, file) inst = parse_file file type1, type2 = translation_relation_types(inst) create_relation(inst, type1) { |rel| bib.relation << rel } create_relation(bib, type2) { |rel| inst.relation << rel } File.write file, serialize(inst), encoding: "UTF-8" end |
#fetch(_source = nil) ⇒ Object
32 33 34 35 |
# File 'lib/relaton/ccsds/data/fetcher.rb', line 32 def fetch(_source = nil) fetch_docs "https://ccsds.org/publications/ccsdsallpubs/" index.save end |
#fetch_docs(url) ⇒ void
This method returns an undefined value.
Fetch documents from url
44 45 46 47 48 |
# File 'lib/relaton/ccsds/data/fetcher.rb', line 44 def fetch_docs(url) resp = agent.get(url) json = JSON.parse resp.body.match(/const config = (.*);/)[1] @array = json["data"].map { |doc| parse_and_save doc, json["data"] } end |
#index ⇒ Object
Pubid index (index-v2): ‘:id` is the lean pubid hash. index-v1 (the pubid-v1 hash index for the released gem line) is rebuilt separately by the data repo’s build_index_v1.rb, in its own process with a pubid-v1 bundle, because pubid v1 and v2 both define Pubid::Ccsds::Identifier and cannot coexist here.
26 27 28 29 30 |
# File 'lib/relaton/ccsds/data/fetcher.rb', line 26 def index @index ||= Relaton::Index.find_or_create( :ccsds, file: "#{INDEXFILE}.yaml", pubid_class: Pubid::Ccsds::Identifier ) end |
#merge_links(bib, file) ⇒ void
This method returns an undefined value.
Merge identical documents with different links (updaes given bibitem)
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 |
# File 'lib/relaton/ccsds/data/fetcher.rb', line 224 def merge_links(bib, file) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength # skip merging when new file unless @files.include?(file) @files << file return end puts "(#{file}) file already exists. Trying to merge links ..." bib2 = parse_file file bib2.source.each do |src| next if bib.source.any? { |s| s.type == src.type } bib.source << src end Util.info "links are merged.", key: file end |
#parse_and_save(doc, data) ⇒ void
This method returns an undefined value.
Parse document and save to file
10 - Patent Licensing. Some docs has this field. Content is same and looks not useful.
11 - Extra Information. Looks not useful.
70 71 72 73 74 75 76 77 |
# File 'lib/relaton/ccsds/data/fetcher.rb', line 70 def parse_and_save(doc, data) bibitem = DataParser.new(doc, data).parse if doc[4] == "Silver Book" predecessor = DataParser.new(doc, data, bibitem).parse save_bib predecessor end save_bib bibitem end |
#parse_file(file) ⇒ Object
160 161 162 163 164 165 166 167 |
# File 'lib/relaton/ccsds/data/fetcher.rb', line 160 def parse_file(file) case @format when "yaml" then Item.from_yaml File.read(file, encoding: "UTF-8") when "xml" then Item.from_xml File.read(file, encoding: "UTF-8") else raise "Unknown format #{@format}" end end |
#save_bib(bib) ⇒ void
This method returns an undefined value.
Save bibitem to file
86 87 88 89 90 91 92 93 94 |
# File 'lib/relaton/ccsds/data/fetcher.rb', line 86 def save_bib(bib) # rubocop:disable Metrics/AbcSize search_instance_translation bib file = output_file(bib.docidentifier.first.content) merge_links bib, file File.write file, serialize(bib), encoding: "UTF-8" index.add_or_update Pubid::Ccsds::Identifier.parse(bib.docidentifier.first.content), file rescue StandardError => e puts "Failed to save #{bib.docidentifier.first.content}: #{e.}\n#{e.backtrace[0..5].join("\n")}" end |
#search_instance_translation(bib) ⇒ void
This method returns an undefined value.
Search translation and instance relation
103 104 105 106 107 108 109 110 |
# File 'lib/relaton/ccsds/data/fetcher.rb', line 103 def search_instance_translation(bib) bibid = bib.docidentifier.first.content.dup if bibid.sub!(TRRGX, "") search_relations bibid, bib else search_translations bibid, bib end end |
#search_relations(bibid, bib) ⇒ void
This method returns an undefined value.
Search instance or translation relation
120 121 122 123 124 125 126 127 128 129 130 |
# File 'lib/relaton/ccsds/data/fetcher.rb', line 120 def search_relations(bibid, bib) bibid_pid = ::Pubid::Ccsds::Identifier.parse(bibid) # search(bibid_pid) narrows candidates by number via binary search first. index.search(bibid_pid) do |row| id = row[:id].exclude(:language) # TODO: smiplify this line? next if id != bibid_pid || row[:id] == bib.docidentifier.first.content create_relations bib, row[:file] end end |
#search_translations(bibid, bib) ⇒ Object
132 133 134 135 136 137 138 139 140 141 142 |
# File 'lib/relaton/ccsds/data/fetcher.rb', line 132 def search_translations(bibid, bib) bibid_pid = ::Pubid::Ccsds::Identifier.parse(bibid) # will call create_instance_relation if # there are same identifiers in index but with word "Translated" # search(bibid_pid) narrows candidates by number via binary search first. index.search(bibid_pid) do |row| next unless row[:id].language && row[:id].exclude(:language) == bibid_pid create_instance_relation bib, row[:file] end end |
#to_bibxml(bib) ⇒ Object
244 |
# File 'lib/relaton/ccsds/data/fetcher.rb', line 244 def to_bibxml(bib) = bib.to_rfcxml |
#to_xml(bib) ⇒ Object
243 |
# File 'lib/relaton/ccsds/data/fetcher.rb', line 243 def to_xml(bib) = bib.to_xml(bibdata: true) |
#to_yaml(bib) ⇒ Object
242 |
# File 'lib/relaton/ccsds/data/fetcher.rb', line 242 def to_yaml(bib) = bib.to_yaml |
#translation_relation_types(bib) ⇒ Array<String>
Translation or instance relation types
176 177 178 179 180 181 182 |
# File 'lib/relaton/ccsds/data/fetcher.rb', line 176 def translation_relation_types(bib) if bib.docidentifier.first.content.match?(TRRGX) ["hasTranslation"] * 2 else ["instanceOf", "hasInstance"] end end |