Class: Relaton::Ccsds::DataFetcher

Inherits:
Relaton::Core::DataFetcher
  • Object
show all
Defined in:
lib/relaton/ccsds/data/fetcher.rb

Constant Summary collapse

TRRGX =
/\s-\s\w+\sTranslated$/

Instance Method Summary collapse

Instance Method Details

#agentObject



13
14
15
16
17
18
19
# File 'lib/relaton/ccsds/data/fetcher.rb', line 13

def agent
  return @agent if @agent

  @agent = Mechanize.new
  @agent.request_headers = { "Accept" => "application/json;odata=verbose" }
  @agent
end

#create_instance_relation(bib, file) ⇒ void

This method returns an undefined value.

Create instance relation and save to file

Parameters:



192
193
194
195
196
197
# File 'lib/relaton/ccsds/data/fetcher.rb', line 192

def create_instance_relation(bib, file)
  inst = parse_file file
  create_relation(inst, "hasInstance") { |rel| bib.relation << rel }
  create_relation(bib, "instanceOf") { |rel| inst.relation << rel }
  File.write file, serialize(inst), encoding: "UTF-8"
end

#create_relation(bib, type) {|Relaton::Bib::Relation.new(type: type, bibitem: rel)| ... } ⇒ Relaton::Bib::Relation

Create relation

Parameters:

Yields:

  • (Relaton::Bib::Relation.new(type: type, bibitem: rel))

Returns:

  • (Relaton::Bib::Relation)

    relation



207
208
209
210
211
212
213
214
# File 'lib/relaton/ccsds/data/fetcher.rb', line 207

def create_relation(bib, type)
  bib_docid = bib.docidentifier.first
  return unless bib_docid

  docid = Bib::Docidentifier.from_yaml(bib_docid.to_yaml)
  rel = Relaton::Bib::ItemData.new docidentifier: [docid], formattedref: Relaton::Bib::Formattedref.new(content: bib_docid.content.dup)
  yield Relaton::Bib::Relation.new(type: type, bibitem: rel)
end

#create_relations(bib, file) ⇒ void

This method returns an undefined value.

Create translation or instance relation and save to file

Parameters:



152
153
154
155
156
157
158
# File 'lib/relaton/ccsds/data/fetcher.rb', line 152

def create_relations(bib, file)
  inst = parse_file file
  type1, type2 = translation_relation_types(inst)
  create_relation(inst, type1) { |rel| bib.relation << rel }
  create_relation(bib, type2) { |rel| inst.relation << rel }
  File.write file, serialize(inst), encoding: "UTF-8"
end

#fetch(_source = nil) ⇒ Object



32
33
34
35
# File 'lib/relaton/ccsds/data/fetcher.rb', line 32

def fetch(_source = nil)
  fetch_docs "https://ccsds.org/publications/ccsdsallpubs/"
  index.save
end

#fetch_docs(url) ⇒ void

This method returns an undefined value.

Fetch documents from url

Parameters:

  • url (String)


44
45
46
47
48
# File 'lib/relaton/ccsds/data/fetcher.rb', line 44

def fetch_docs(url)
  resp = agent.get(url)
  json = JSON.parse resp.body.match(/const config = (.*);/)[1]
  @array = json["data"].map { |doc| parse_and_save doc, json["data"] }
end

#indexObject

Pubid index (index-v2): ‘:id` is the lean pubid hash. index-v1 (the pubid-v1 hash index for the released gem line) is rebuilt separately by the data repo’s build_index_v1.rb, in its own process with a pubid-v1 bundle, because pubid v1 and v2 both define Pubid::Ccsds::Identifier and cannot coexist here.



26
27
28
29
30
# File 'lib/relaton/ccsds/data/fetcher.rb', line 26

def index
  @index ||= Relaton::Index.find_or_create(
    :ccsds, file: "#{INDEXFILE}.yaml", pubid_class: Pubid::Ccsds::Identifier
  )
end

This method returns an undefined value.

Merge identical documents with different links (updaes given bibitem)

Parameters:



224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# File 'lib/relaton/ccsds/data/fetcher.rb', line 224

def merge_links(bib, file) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
  # skip merging when new file
  unless @files.include?(file)
    @files << file
    return
  end

  puts "(#{file}) file already exists. Trying to merge links ..."

  bib2 = parse_file file
  bib2.source.each do |src|
    next if bib.source.any? { |s| s.type == src.type }

    bib.source << src
  end
  Util.info "links are merged.", key: file
end

#parse_and_save(doc, data) ⇒ void

This method returns an undefined value.

Parse document and save to file

10 - Patent Licensing. Some docs has this field. Content is same and looks not useful.
11 - Extra Information. Looks not useful.

Parameters:

  • doc (Hash)

    document data

  • data (Array<Array<String>>)

    collection of documents 0 - empty 1 - center/a HTML element with href to PDF 2 - a HTML element with href to HTML and document ID content (e.g. “CCSDS 123.0-B-1”) 3 - document title 4 - document series (e.g. “Blue Book”, “Silver Book”, etc) 5 - issue number 6 - publication date (e.g. “August 2020”) 7 - abstract 8 - Working Group as ‘name <a href=“path” …` 9 - ISO Equivalent as `id <a href=“uri” …`



70
71
72
73
74
75
76
77
# File 'lib/relaton/ccsds/data/fetcher.rb', line 70

def parse_and_save(doc, data)
  bibitem = DataParser.new(doc, data).parse
  if doc[4] == "Silver Book"
    predecessor = DataParser.new(doc, data, bibitem).parse
    save_bib predecessor
  end
  save_bib bibitem
end

#parse_file(file) ⇒ Object



160
161
162
163
164
165
166
167
# File 'lib/relaton/ccsds/data/fetcher.rb', line 160

def parse_file(file)
  case @format
  when "yaml" then Item.from_yaml File.read(file, encoding: "UTF-8")
  when "xml" then Item.from_xml File.read(file, encoding: "UTF-8")
  else
    raise "Unknown format #{@format}"
  end
end

#save_bib(bib) ⇒ void

This method returns an undefined value.

Save bibitem to file

Parameters:



86
87
88
89
90
91
92
93
94
# File 'lib/relaton/ccsds/data/fetcher.rb', line 86

def save_bib(bib) # rubocop:disable Metrics/AbcSize
  search_instance_translation bib
  file = output_file(bib.docidentifier.first.content)
  merge_links bib, file
  File.write file, serialize(bib), encoding: "UTF-8"
  index.add_or_update Pubid::Ccsds::Identifier.parse(bib.docidentifier.first.content), file
rescue StandardError => e
  puts "Failed to save #{bib.docidentifier.first.content}: #{e.message}\n#{e.backtrace[0..5].join("\n")}"
end

#search_instance_translation(bib) ⇒ void

This method returns an undefined value.

Search translation and instance relation

Parameters:



103
104
105
106
107
108
109
110
# File 'lib/relaton/ccsds/data/fetcher.rb', line 103

def search_instance_translation(bib)
  bibid = bib.docidentifier.first.content.dup
  if bibid.sub!(TRRGX, "")
    search_relations bibid, bib
  else
    search_translations bibid, bib
  end
end

#search_relations(bibid, bib) ⇒ void

This method returns an undefined value.

Search instance or translation relation

Parameters:



120
121
122
123
124
125
126
127
128
129
130
# File 'lib/relaton/ccsds/data/fetcher.rb', line 120

def search_relations(bibid, bib)
  bibid_pid = ::Pubid::Ccsds::Identifier.parse(bibid)
  # search(bibid_pid) narrows candidates by number via binary search first.
  index.search(bibid_pid) do |row|
    id = row[:id].exclude(:language)
    # TODO: smiplify this line?
    next if id != bibid_pid || row[:id] == bib.docidentifier.first.content

    create_relations bib, row[:file]
  end
end

#search_translations(bibid, bib) ⇒ Object



132
133
134
135
136
137
138
139
140
141
142
# File 'lib/relaton/ccsds/data/fetcher.rb', line 132

def search_translations(bibid, bib)
  bibid_pid = ::Pubid::Ccsds::Identifier.parse(bibid)
  # will call create_instance_relation if
  # there are same identifiers in index but with word "Translated"
  # search(bibid_pid) narrows candidates by number via binary search first.
  index.search(bibid_pid) do |row|
    next unless row[:id].language && row[:id].exclude(:language) == bibid_pid

    create_instance_relation bib, row[:file]
  end
end

#to_bibxml(bib) ⇒ Object



244
# File 'lib/relaton/ccsds/data/fetcher.rb', line 244

def to_bibxml(bib) = bib.to_rfcxml

#to_xml(bib) ⇒ Object



243
# File 'lib/relaton/ccsds/data/fetcher.rb', line 243

def to_xml(bib) = bib.to_xml(bibdata: true)

#to_yaml(bib) ⇒ Object



242
# File 'lib/relaton/ccsds/data/fetcher.rb', line 242

def to_yaml(bib) = bib.to_yaml

#translation_relation_types(bib) ⇒ Array<String>

Translation or instance relation types

Parameters:

Returns:

  • (Array<String>)

    relation types



176
177
178
179
180
181
182
# File 'lib/relaton/ccsds/data/fetcher.rb', line 176

def translation_relation_types(bib)
  if bib.docidentifier.first.content.match?(TRRGX)
    ["hasTranslation"] * 2
  else
    ["instanceOf", "hasInstance"]
  end
end