Class: Relaton::Ieee::DataFetcher

Inherits:
Core::DataFetcher
  • Object
show all
Defined in:
lib/relaton/ieee/data_fetcher.rb

Constant Summary collapse

RELATION_TYPES =
{
  "S" => { type: "obsoletedBy" },
  "V" => { type: "updates", description: "revises" },
  "T" => { type: "updates", description: "amends" },
  "C" => { type: "updates", description: "corrects" },
  "O" => { type: "adoptedFrom" },
  "P" => { type: "complementOf", description: "supplement" },
  "N" => false, "G" => false,
  "F" => false, "I" => false,
  "E" => false, "B" => false, "W" => false
}.freeze

Instance Method Summary collapse

Instance Method Details

#add_crossref(docnumber, amsid) ⇒ Object

Save unresolved relation reference. Called from worker threads via IdamsParser#parse_relation, so mutates crossrefs under a mutex.

Parameters:

  • docnumber (String)

    of main document

  • amsid (Nokogiri::XML::Element)

    relation data



68
69
70
71
72
73
# File 'lib/relaton/ieee/data_fetcher.rb', line 68

def add_crossref(docnumber, amsid)
  return if RELATION_TYPES[amsid.type] == false

  ref = { amsid: amsid.date_string, type: amsid.type }
  mutex.synchronize { crossrefs[docnumber] << ref }
end

#backrefsHash

Returns list of AMSID => PubID.

Returns:

  • (Hash)

    list of AMSID => PubID



39
40
41
# File 'lib/relaton/ieee/data_fetcher.rb', line 39

def backrefs
  @backrefs ||= {}
end

#create_relation(type, fref) ⇒ RelatonBib::DocumentRelation

Create relation instance

Parameters:

  • type (String)

    IEEE relation type

  • fref (String)

    reference

Returns:

  • (RelatonBib::DocumentRelation)


83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/relaton/ieee/data_fetcher.rb', line 83

def create_relation(type, fref)
  unless RELATION_TYPES.key? type
    Util.warn "Unknown relation type: '#{type}' for reference '#{fref}'", key: fref
    return
  end
  return if RELATION_TYPES[type] == false

  docid = Bib::Docidentifier.new(type: "IEEE", content: fref, primary: true)
  bib = ItemData.new formattedref: Bib::Formattedref.new(content: fref), docidentifier: [docid]
  description = create_relation_description type
  Bib::Relation.new(type: RELATION_TYPES[type][:type], description: description, bibitem: bib)
end

#docsHash

Returns list of docnumber => parsed bib (cache for update_relations).

Returns:

  • (Hash)

    list of docnumber => parsed bib (cache for update_relations)



44
45
46
# File 'lib/relaton/ieee/data_fetcher.rb', line 44

def docs
  @docs ||= {}
end

#fetch(_source = nil) ⇒ Object



30
31
32
33
34
35
36
# File 'lib/relaton/ieee/data_fetcher.rb', line 30

def fetch(_source = nil)
  files = Dir["ieee-rawbib/**/*.{xml,zip}"].reject { |f| f["Deleted_"] }
  files = prefilter_winners(files) unless ENV["IEEE_FETCH_PREFILTER"] == "0"
  process_files(files)
  update_relations
  report_errors
end

#log_error(msg) ⇒ Object

Convert documents from ‘ieee-rawbib` dir (IEEE dataset) to BibYAML/BibXML



26
27
28
# File 'lib/relaton/ieee/data_fetcher.rb', line 26

def log_error(msg)
  Util.error msg
end

#mutexObject

Mutex guarding worker-thread mutations of shared state during parse.



57
58
59
# File 'lib/relaton/ieee/data_fetcher.rb', line 57

def mutex
  @mutex ||= Mutex.new
end

#saved_writesHash

accepted by commit_doc. Populated only when running with parallel workers (writes are staged to per-glob-index suffixed paths and reconciled into the final filename after the parsing phase).

Returns:

  • (Hash)

    docnumber => max global glob-index whose write was



52
53
54
# File 'lib/relaton/ieee/data_fetcher.rb', line 52

def saved_writes
  @saved_writes ||= {}
end