Class: FAIRChampionHarvester::Utils
- Inherits:
-
Object
- Object
- FAIRChampionHarvester::Utils
- Defined in:
- lib/utils.rb
Constant Summary collapse
- AcceptHeader =
these are all set in Config.rb FAIRChampionHarvester::Utils::ExtructCommand = extruct_command FAIRChampionHarvester::Utils::RDFCommand = rdf_command FAIRChampionHarvester::Utils::TikaCommand = tika_command
{ "Accept" => "text/turtle, application/ld+json, application/rdf+xml, text/xhtml+xml, application/n3, application/rdf+n3, application/turtle, application/x-turtle, text/n3, text/turtle, text/rdf+n3, text/rdf+turtle, application/n-triples" }
- AcceptDefaultHeader =
{ "Accept" => "*/*" }
- TEXT_FORMATS =
{ "text" => ["text/plain"] }
- RDF_FORMATS =
{ "jsonld" => ["application/ld+json", "application/vnd.schemaorg.ld+json"], # NEW FOR DATACITE "turtle" => ["text/turtle", "application/n3", "application/rdf+n3", "application/turtle", "application/x-turtle", "text/n3", "text/turtle", "text/rdf+n3", "text/rdf+turtle"], # 'rdfa' => ['text/xhtml+xml', 'application/xhtml+xml'], "rdfxml" => ["application/rdf+xml"], "triples" => ["application/n-triples", "application/n-quads", "application/trig"] }
- XML_FORMATS =
{ "xml" => ["text/xhtml", "text/xml"] }
- HTML_FORMATS =
{ "html" => ["text/html", "text/xhtml+xml", "application/xhtml+xml"] }
- JSON_FORMATS =
{ "json" => ["application/json"] }
- DATA_PREDICATES =
[ "http://www.w3.org/ns/ldp#contains", "http://xmlns.com/foaf/0.1/primaryTopic", "http://purl.obolibrary.org/obo/IAO_0000136", # is about "http://purl.obolibrary.org/obo/IAO:0000136", # is about (not the valid URL...) "https://www.w3.org/ns/ldp#contains", "https://xmlns.com/foaf/0.1/primaryTopic", # 'http://schema.org/about', # removed for being too general "http://schema.org/mainEntity", "http://schema.org/codeRepository", "http://schema.org/distribution", "http://schema.org/contentUrl", # 'https://schema.org/about', #removed for being too general "https://schema.org/mainEntity", "https://schema.org/codeRepository", "https://schema.org/distribution", "https://schema.org/contentUrl", "http://www.w3.org/ns/dcat#distribution", "https://www.w3.org/ns/dcat#distribution", "http://www.w3.org/ns/dcat#dataset", "https://www.w3.org/ns/dcat#dataset", "http://www.w3.org/ns/dcat#downloadURL", "https://www.w3.org/ns/dcat#downloadURL", "http://www.w3.org/ns/dcat#accessURL", "https://www.w3.org/ns/dcat#accessURL", "http://semanticscience.org/resource/SIO_000332", # is about "http://semanticscience.org/resource/is-about", # is about "https://semanticscience.org/resource/SIO_000332", # is about "https://semanticscience.org/resource/is-about", # is about "https://purl.obolibrary.org/obo/IAO_0000136" # is about ]
- SELF_IDENTIFIER_PREDICATES =
[ "http://purl.org/dc/elements/1.1/identifier", "https://purl.org/dc/elements/1.1/identifier", "http://purl.org/dc/terms/identifier", "http://schema.org/identifier", "https://purl.org/dc/terms/identifier", "https://schema.org/identifier" ]
- GUID_TYPES =
{ "inchi" => /^\w{14}-\w{10}-\w$/, "doi" => %r{^10.\d{4,9}/[-._;()/:A-Z0-9]+$}i, "handle1" => %r{^[^/]+/[^/]+$}i, "handle2" => %r{^\d{4,5}/[-._;()/:A-Z0-9]+$}i, # legacy style 12345/AGB47A "uri" => %r{^\w+:/?/?[^\s]+$}, "purl" => /purl\./, "ark_url" => %r{ https?:// # http:// or https:// [^\s/]+? # domain name (non-greedy) /ark: # /ark: (?:/)? # optional extra slash (ark:/...) [0-9]+ # NAAN (Name Assigning Authority Number) / # separator [a-z0-9~=+*@_$.\-/]+ # Name + optional Qualifier }ix, "ark" => %r{ \b # word boundary (avoids matching inside words) ark: # literal "ark:" (?:/)? # optional slash [0-9]+ # NAAN / # separator [a-z0-9~=+*@_$.\-/]+ # Name + optional Qualifier }ix }
- ExtructCommand =
extruct_command- RDFCommand =
rdf_command- TikaCommand =
tika_command