Class: Relaton::Ccsds::DataFetcher

Inherits:
Relaton::Core::DataFetcher
  • Object
show all
Defined in:
lib/relaton/ccsds/data/fetcher.rb

Constant Summary collapse

TRRGX =
/\s-\s\w+\sTranslated$/

Instance Method Summary collapse

Instance Method Details

#agentObject



13
14
15
16
17
18
19
# File 'lib/relaton/ccsds/data/fetcher.rb', line 13

def agent
  return @agent if @agent

  @agent = Mechanize.new
  @agent.request_headers = { "Accept" => "application/json;odata=verbose" }
  @agent
end

#create_instance_relation(bib, file) ⇒ void

This method returns an undefined value.

Create instance relation and save to file

Parameters:



183
184
185
186
187
188
# File 'lib/relaton/ccsds/data/fetcher.rb', line 183

def create_instance_relation(bib, file)
  inst = parse_file file
  create_relation(inst, "hasInstance") { |rel| bib.relation << rel }
  create_relation(bib, "instanceOf") { |rel| inst.relation << rel }
  File.write file, serialize(inst), encoding: "UTF-8"
end

#create_relation(bib, type) {|Relaton::Bib::Relation.new(type: type, bibitem: rel)| ... } ⇒ Relaton::Bib::Relation

Create relation

Parameters:

Yields:

  • (Relaton::Bib::Relation.new(type: type, bibitem: rel))

Returns:

  • (Relaton::Bib::Relation)

    relation



198
199
200
201
202
203
204
205
# File 'lib/relaton/ccsds/data/fetcher.rb', line 198

def create_relation(bib, type)
  bib_docid = bib.docidentifier.first
  return unless bib_docid

  docid = Bib::Docidentifier.from_yaml(bib_docid.to_yaml)
  rel = Relaton::Bib::ItemData.new docidentifier: [docid], formattedref: Relaton::Bib::Formattedref.new(content: bib_docid.content.dup)
  yield Relaton::Bib::Relation.new(type: type, bibitem: rel)
end

#create_relations(bib, file) ⇒ void

This method returns an undefined value.

Create translation or instance relation and save to file

Parameters:



143
144
145
146
147
148
149
# File 'lib/relaton/ccsds/data/fetcher.rb', line 143

def create_relations(bib, file)
  inst = parse_file file
  type1, type2 = translation_relation_types(inst)
  create_relation(inst, type1) { |rel| bib.relation << rel }
  create_relation(bib, type2) { |rel| inst.relation << rel }
  File.write file, serialize(inst), encoding: "UTF-8"
end

#fetch(_source = nil) ⇒ Object



27
28
29
30
# File 'lib/relaton/ccsds/data/fetcher.rb', line 27

def fetch(_source = nil)
  fetch_docs "https://ccsds.org/publications/ccsdsallpubs/"
  index.save
end

#fetch_docs(url) ⇒ void

This method returns an undefined value.

Fetch documents from url

Parameters:

  • url (String)


39
40
41
42
43
# File 'lib/relaton/ccsds/data/fetcher.rb', line 39

def fetch_docs(url)
  resp = agent.get(url)
  json = JSON.parse resp.body.match(/const config = (.*);/)[1]
  @array = json["data"].map { |doc| parse_and_save doc, json["data"] }
end

#indexObject



21
22
23
24
25
# File 'lib/relaton/ccsds/data/fetcher.rb', line 21

def index
  @index ||= Relaton::Index.find_or_create(
    :ccsds, file: "#{INDEXFILE}.yaml", pubid_class: Pubid::Ccsds::Identifier
  )
end

This method returns an undefined value.

Merge identical documents with different links (updaes given bibitem)

Parameters:



215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# File 'lib/relaton/ccsds/data/fetcher.rb', line 215

def merge_links(bib, file) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
  # skip merging when new file
  unless @files.include?(file)
    @files << file
    return
  end

  puts "(#{file}) file already exists. Trying to merge links ..."

  bib2 = parse_file file
  bib2.source.each do |src|
    next if bib.source.any? { |s| s.type == src.type }

    bib.source << src
  end
  Util.info "links are merged.", key: file
end

#parse_and_save(doc, data) ⇒ void

This method returns an undefined value.

Parse document and save to file

10 - Patent Licensing. Some docs has this field. Content is same and looks not useful.
11 - Extra Information. Looks not useful.

Parameters:

  • doc (Hash)

    document data

  • data (Array<Array<String>>)

    collection of documents 0 - empty 1 - center/a HTML element with href to PDF 2 - a HTML element with href to HTML and document ID content (e.g. “CCSDS 123.0-B-1”) 3 - document title 4 - document series (e.g. “Blue Book”, “Silver Book”, etc) 5 - issue number 6 - publication date (e.g. “August 2020”) 7 - abstract 8 - Working Group as ‘name <a href=“path” …` 9 - ISO Equivalent as `id <a href=“uri” …`



65
66
67
68
69
70
71
72
# File 'lib/relaton/ccsds/data/fetcher.rb', line 65

def parse_and_save(doc, data)
  bibitem = DataParser.new(doc, data).parse
  if doc[4] == "Silver Book"
    predecessor = DataParser.new(doc, data, bibitem).parse
    save_bib predecessor
  end
  save_bib bibitem
end

#parse_file(file) ⇒ Object



151
152
153
154
155
156
157
158
# File 'lib/relaton/ccsds/data/fetcher.rb', line 151

def parse_file(file)
  case @format
  when "yaml" then Item.from_yaml File.read(file, encoding: "UTF-8")
  when "xml" then Item.from_xml File.read(file, encoding: "UTF-8")
  else
    raise "Unknown format #{@format}"
  end
end

#save_bib(bib) ⇒ void

This method returns an undefined value.

Save bibitem to file

Parameters:



81
82
83
84
85
86
87
88
89
# File 'lib/relaton/ccsds/data/fetcher.rb', line 81

def save_bib(bib) # rubocop:disable Metrics/AbcSize
  search_instance_translation bib
  file = output_file(bib.docidentifier.first.content)
  merge_links bib, file
  File.write file, serialize(bib), encoding: "UTF-8"
  index.add_or_update Pubid::Ccsds::Identifier.parse(bib.docidentifier.first.content), file
rescue StandardError => e
  puts "Failed to save #{bib.docidentifier.first.content}: #{e.message}\n#{e.backtrace[0..5].join("\n")}"
end

#search_instance_translation(bib) ⇒ void

This method returns an undefined value.

Search translation and instance relation

Parameters:



98
99
100
101
102
103
104
105
# File 'lib/relaton/ccsds/data/fetcher.rb', line 98

def search_instance_translation(bib)
  bibid = bib.docidentifier.first.content.dup
  if bibid.sub!(TRRGX, "")
    search_relations bibid, bib
  else
    search_translations bibid, bib
  end
end

#search_relations(bibid, bib) ⇒ void

This method returns an undefined value.

Search instance or translation relation

Parameters:



115
116
117
118
119
120
121
122
123
# File 'lib/relaton/ccsds/data/fetcher.rb', line 115

def search_relations(bibid, bib)
  index.search do |row|
    id = row[:id].exclude(:language)
    # TODO: smiplify this line?
    next if id != bibid || row[:id] == bib.docidentifier.first.content

    create_relations bib, row[:file]
  end
end

#search_translations(bibid, bib) ⇒ Object



125
126
127
128
129
130
131
132
133
# File 'lib/relaton/ccsds/data/fetcher.rb', line 125

def search_translations(bibid, bib)
  # will call create_instance_relation if
  # there are same identifiers in index but with word "Translated"
  index.search do |row|
    next unless row[:id].language && row[:id].exclude(:language) == bibid

    create_instance_relation bib, row[:file]
  end
end

#to_bibxml(bib) ⇒ Object



235
# File 'lib/relaton/ccsds/data/fetcher.rb', line 235

def to_bibxml(bib) = bib.to_rfcxml

#to_xml(bib) ⇒ Object



234
# File 'lib/relaton/ccsds/data/fetcher.rb', line 234

def to_xml(bib) = bib.to_xml(bibdata: true)

#to_yaml(bib) ⇒ Object



233
# File 'lib/relaton/ccsds/data/fetcher.rb', line 233

def to_yaml(bib) = bib.to_yaml

#translation_relation_types(bib) ⇒ Array<String>

Translation or instance relation types

Parameters:

Returns:

  • (Array<String>)

    relation types



167
168
169
170
171
172
173
# File 'lib/relaton/ccsds/data/fetcher.rb', line 167

def translation_relation_types(bib)
  if bib.docidentifier.first.content.match?(TRRGX)
    ["hasTranslation"] * 2
  else
    ["instanceOf", "hasInstance"]
  end
end