Class: FAIRChampionHarvester::INCHI

Inherits:
Object
  • Object
show all
Defined in:
lib/inchi.rb

Class Method Summary collapse

Class Method Details

.resolve_inchi(guid, meta) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/inchi.rb', line 3

def self.resolve_inchi(guid, meta)
  type, url = Core.convertToURL(guid)
  meta.guidtype = type if meta.guidtype.nil?
  meta.comments << "INFO: Found an InChI Key GUID.\n"
  # $stderr.puts "1"
  meta.comments << "INFO: Resolving using PubChem Resolver #{url} with HTTP Accept Headers #{FAIRChampionHarvester::Utils::AcceptHeader}.\n"

  head, body = Core.fetch(guid: url, headers: FAIRChampionHarvester::Utils::AcceptHeader, meta: meta)
  # this is a Net::HTTP response
  # $stderr.puts "2"

  return meta unless body

  # $stderr.puts "3"

  meta.full_response << body # set it here so it isn't empty
  # $stderr.puts "4"

  (parser, type) = Core.figure_out_type(head)
  unless parser and type
    meta.comments << "CRITICAL: Couldn't find a parser for the data returned from #{url}. Halting. \n"
    return meta
  end
  # $stderr.puts "5"

  # this next operation is safe because we know that pubchem does in fact return Turtle
  unless parser.eql? "turtle"
    meta.comments << "CRITICAL: expected turtle format from #{url}. Halting. \n"
    return meta
  end
  # $stderr.puts "6"

  Core.parse_rdf(meta, body)

  query = SPARQL.parse("select ?o where {VALUES ?p {
                        <http://semanticscience.org/resource/is-attribute-of> <https://semanticscience.org/resource/is-attribute-of>}
                          ?s ?p ?o}")
  results = query.execute(meta.graph)
  unless results.any?
    meta.comments << "CRITICAL: Could not find the sio:is_attribute_of predicate in the first layer of metadatafrom https://pubchem.ncbi.nlm.nih.gov/rest/rdf/inchikey/#{guid}. Halting. \n"
    return meta
  end
  # $stderr.puts "7"

  cpd = results.first[:o]
  cpd = cpd.to_s
  cpd = cpd.gsub(%r{/$}, "") # has a rogue trailing slash
  meta.comments << "INFO: Found #{cpd} as the identifier of the second layer of metadata.\n"
  meta.comments << "INFO: Resolving #{cpd} using HTTP Accept Header #{FAIRChampionHarvester::Utils::AcceptHeader}.\n"

  head2, body2 = Core.fetch(guid: cpd, headers: FAIRChampionHarvester::Utils::AcceptHeader, meta: meta)
  unless body2
    meta.comments << "CRITICAL: Resolution of #{cpd} using HTTP Accept Header #{FAIRChampionHarvester::Utils::AcceptHeader} returned no message body. Halting. \n"
    return meta
  end
  # $stderr.puts "8"

  meta.full_response << body2 # set it here so it isn't empty
  (parser, type) = Harvester.figure_out_type(head2)
  # this next operation is safe because we know that pubchem does in fact return Turtle
  unless parser.eql? "turtle"
    meta.comments << "CRITICAL: Expected turtle format from #{cpd}.  Giving up. \n"
    return meta # simply fail if they asked for HTML or something else
  end
  # $stderr.puts "9"
  Harvester.parse_rdf(meta, body2)
  # $stderr.puts "10"

  meta
end