Class: FAIRChampionHarvester::CommonQueries
- Inherits:
-
Object
- Object
- FAIRChampionHarvester::CommonQueries
- Defined in:
- lib/common_queries.rb
Class Method Summary collapse
-
.GetDataIdentifier(graph:, meta: FAIRChampionHarvester::MetadataObject.new) ⇒ Object
send it the graph.
- .GetSelfIdentifier(g, meta = FAIRChampionHarvester::MetadataObject.new) ⇒ Object
Class Method Details
.GetDataIdentifier(graph:, meta: FAIRChampionHarvester::MetadataObject.new) ⇒ Object
send it the graph
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
# File 'lib/common_queries.rb', line 70 def self.GetDataIdentifier(graph:, meta: FAIRChampionHarvester::MetadataObject.new) # send it the graph @identifier = nil g = graph # warn "querying graph of size #{g.size}" # warn "#{g.dump(:ntriples)}\n\n\n" .comments << "INFO: SPARQLing graph of size #{graph.size}.\n" FAIRChampionHarvester::Utils::DATA_PREDICATES.each do |prop| .comments << "INFO: SPARQLing for #{prop}.\n" if prop =~ %r{schema\.org/distribution} # query = SPARQL.parse("select ?o where { # VALUES ?schemaurl {<http://schema.org/contentUrl> <https://schema.org/contentUrl>} # VALUES ?dist {<http://schema.org/distribution> <https://schema.org/distribution>} # ?s ?dist ?b . # ?b ?schemaurl ?o}") query = SPARQL.parse("select ?o where { VALUES ?dist {<http://schema.org/distribution> <https://schema.org/distribution>} ?s ?dist ?b . }") results = query.execute(g) if results.any? unless results.first[:o].respond_to? :value .comments << "INFO: '#{prop}' data identifier did not have the expected structure. Moving on.\n" next end @identifier = results.first[:o].value .comments << "INFO: found identifier '#{@identifier}' using Schema.org distribution property.\n" return @identifier else .comments << "INFO: '#{prop}' did not result in any query match.\n" end elsif prop =~ /dcat\#/ query = SPARQL.parse("select ?b where { ?s <#{prop}> ?o .}") results = query.execute(g) if results.any? unless results.first[:o].respond_to? :value .comments << "INFO: '#{prop}' data identifier did not have the expected structure. Moving on.\n" next end @identifier = results.first[:b].value .comments << "INFO: found data identifier '#{@identifier}' using DCAT '#{prop}' property.\n" return @identifier else .comments << "INFO: '#{prop}' did not result in any query match.\n" end elsif prop =~ /mainEntity/ query = SPARQL.parse("select ?o where { VALUES ?schemaidentifier {<http://schema.org/identifier> <https://schema.org/identifier>} ?s <#{prop}> ?entity . ?entity ?schemaidentifier ?o}") results = query.execute(g) if results.any? unless results.first[:o].respond_to? :value .comments << "INFO: '#{prop}' data identifier did not have the expected structure. Moving on.\n" next end @identifier = results.first[:o].value .comments << "INFO: found identifier '#{@identifier}' using schema:mainEntity containing a schema:identifier clause.\n" return @identifier else .comments << "INFO: '#{prop}' did not result in any query match.\n" end else query = SPARQL.parse("select ?o where {?s <#{prop}> ?o}") results = query.execute(g) if results.any? unless results.first[:o].respond_to? :value .comments << "INFO: '#{prop}' data identifier did not have the expected structure. Moving on.\n" next end @identifier = results.first[:o].value .comments << "INFO: found identifier '#{@identifier}' using #{prop}.\n" return @identifier else .comments << "INFO: '#{prop}' did not result in any query match.\n" end end end .comments << "INFO: No data identifier found in this chunk of metadata.\n" @identifier # returns nil if we get to this line end |
.GetSelfIdentifier(g, meta = FAIRChampionHarvester::MetadataObject.new) ⇒ Object
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/common_queries.rb', line 3 def self.GetSelfIdentifier(g, = FAIRChampionHarvester::MetadataObject.new) identifiers = [] FAIRChampionHarvester::Utils::SELF_IDENTIFIER_PREDICATES.each do |prop| if prop =~ %r{schema\.org/identifier} # test 1 - this assumes that the identifier node attached to "root" is the one we are looking for # and assumes the PropertyValue schema for the value of identifier query = SPARQL.parse("select ?identifier where { VALUES ?predi {<http://schema.org/identifier> <https://schema.org/identifier>} VALUES ?predpv {<http://schema.org/PropertyValue> <https://schema.org/PropertyValue>} VALUES ?predval {<http://schema.org/value> <https://schema.org/value>} ?s ?predi ?i . ?i a ?predpv . ?i ?predval ?identifier . FILTER NOT EXISTS {?sub ?pred ?s} } #must be the root, if not, we don't know what id it is! ") results = query.execute(g) if results.any? results.each do |r| unless r[:identifier].respond_to? :value .comments << "INFO: '#{prop}' PropertyValue did not have the expected structure. Moving on.\n" next end identifier = r[:identifier].value .comments << "INFO: found identifier '#{identifier}' using Schema.org identifier as PropertyValue.\n" identifiers << identifier end else # g.each_statement {|s| $stderr.puts s.subject, s.predicate, s.object, "\n"} # test 2 - a simple URL or a value from schema # $stderr.puts "QUEWRY: select ?identifier where {?s <#{prop}> ?identifier}" query = SPARQL.parse("select ?identifier where {?s <#{prop}> ?identifier}") results = query.execute(g) if results.any? results.each do |r| # $stderr.puts "inspecting results from query #{r.inspect}" unless r[:identifier].respond_to? :value .comments << "INFO: '#{prop}' as a simple value did not have the expected structure. Moving on.\n" next end identifier = r[:identifier].value .comments << "INFO: found identifier '#{identifier}' using Schema.org identifier as with a string or URI value.\n" identifiers << identifier end end end else query = SPARQL.parse("select ?identifier where {?s <#{prop}> ?identifier}") results = query.execute(g) if results.any? results.each do |r| unless r[:identifier].respond_to? :value .comments << "INFO: '#{prop}' as a simple identifier predicate did not have the expected structure. Moving on.\n" next end identifier = r[:identifier].value .comments << "INFO: found identifier '#{identifier}' using #{prop} as a string or URI.\n" identifiers << identifier end end end end identifiers end |