Class: Relaton::W3c::DataParser
- Inherits:
-
Object
- Object
- Relaton::W3c::DataParser
- Includes:
- RateLimitHandler
- Defined in:
- lib/relaton/w3c/data_parser.rb
Constant Summary collapse
- USED_TYPES =
%w[WD NOTE PER PR REC CR].freeze
- DOCTYPES =
{ "TR" => "technicalReport", "NOTE" => "groupNote", }.freeze
- STAGES =
{ "RET" => "Retired", "SPSD" => "Superseded Recommendation", "OBSL" => "Obsoleted Recommendation", "WD" => "Working Draft", "CRD" => "Candidate Recommendation Draft", "CR" => "Candidate Recommendation", "PR" => "Proposed Recommendation", "PER" => "Proposed Edited Recommendation", "REC" => "Recommendation", }.freeze
- ERROR_KEYS =
Document parser initalization
%i[status title doc_uri formattedref series date relation contributor doctype].freeze
Constants included from RateLimitHandler
RateLimitHandler::MAX_RETRIES, RateLimitHandler::RETRYABLE_ERRORS
Class Method Summary collapse
-
.parse(spec, errors = {}) ⇒ Relaton::W3c::ItemData?
Initialize document parser and run it.
-
.parse_identifier(url) ⇒ String
Parse identifier from URL.
Instance Method Summary collapse
- #create_editor(unrealized_editor) ⇒ Object
-
#create_relation(version, type, desc = nil) ⇒ Bib::Relation
Create relation.
- #create_w3c_org ⇒ Object
- #doc_uri(spec = @spec) ⇒ Object
-
#identifier(link = doc_uri) ⇒ String
Generate identifier from URL.
-
#initialize(spec, errors = {}) ⇒ DataParser
constructor
A new instance of DataParser.
-
#parse ⇒ Relaton::W3c::ItemData
Parse document.
-
#parse_contrib ⇒ Array<Bib::Contributor>
Parse contributor.
-
#parse_date ⇒ Array<Bib::Date>
Parse date.
-
#parse_deliverers ⇒ Array<Bib::Contributor>
Parse deliverers as contributors with role “author” and description “committee”.
-
#parse_docid ⇒ Array<Bib::Docidentifier>
Parse docidentifier.
-
#parse_doctype ⇒ Doctype?
Parse doctype.
-
#parse_ext ⇒ Ext?
Parse ext with doctype.
-
#parse_formattedref ⇒ String?
Parse formattedref.
-
#parse_relation ⇒ Array<Bib::Relation>
Parse relation.
-
#parse_series ⇒ Array<Bib::Series>
Parse series.
-
#parse_source ⇒ Array<Bib::Uri>
Parse link.
-
#parse_status ⇒ Bib::Status?
Extract document status.
-
#parse_title(spec = @spec) ⇒ Array<Bib::Title>
Parse title.
-
#pub_id(url) ⇒ String
Generate PubID.
-
#relations ⇒ Array<Bib::Relation>
Create relations.
-
#type ⇒ String
Extract type.
-
#type_from_link ⇒ String?
Fetch type from link.
Methods included from RateLimitHandler
Constructor Details
#initialize(spec, errors = {}) ⇒ DataParser
Returns a new instance of DataParser.
33 34 35 36 37 |
# File 'lib/relaton/w3c/data_parser.rb', line 33 def initialize(spec, errors = {}) @spec = spec @errors = errors ERROR_KEYS.each { |k| @errors[k] = true unless @errors.key?(k) } end |
Class Method Details
.parse(spec, errors = {}) ⇒ Relaton::W3c::ItemData?
Initialize document parser and run it
46 47 48 |
# File 'lib/relaton/w3c/data_parser.rb', line 46 def self.parse(spec, errors = {}) new(spec, errors).parse end |
.parse_identifier(url) ⇒ String
Parse identifier from URL
163 164 165 166 167 168 |
# File 'lib/relaton/w3c/data_parser.rb', line 163 def self.parse_identifier(url) if /.+\/(\w+(?:[-+][\w.]+)+(?:\/\w+)?)/ =~ url.to_s $1.to_s else url.to_s.split("/").last end end |
Instance Method Details
#create_editor(unrealized_editor) ⇒ Object
328 329 330 331 332 333 334 335 336 337 338 339 340 |
# File 'lib/relaton/w3c/data_parser.rb', line 328 def create_editor(unrealized_editor) editor = realize unrealized_editor return unless editor surname = Bib::LocalizedString.new(content: editor.family, language: "en", script: "Latn") forename = Bib::FullNameType::Forename.new(content: editor.given, language: "en", script: "Latn") name = Bib::FullName.new(surname: surname, forename: [forename]) person = Bib::Person.new(name: name) Bib::Contributor.new( person: person, role: [Bib::Contributor::Role.new(type: "editor")], ) end |
#create_relation(version, type, desc = nil) ⇒ Bib::Relation
Create relation
279 280 281 282 283 284 285 286 287 288 289 |
# File 'lib/relaton/w3c/data_parser.rb', line 279 def create_relation(version, type, desc = nil) version_spec = realize version url = doc_uri(version_spec) id = pub_id(url) title = parse_title(version_spec) docid = Bib::Docidentifier.new(type: "W3C", content: id, primary: true) link = [Bib::Uri.new(type: "src", content: url)] bib = ItemData.new(title: title, docidentifier: [docid], source: link) dsc = Bib::LocalizedMarkedUpString.new(content: desc) if desc Bib::Relation.new(type: type, bibitem: bib, description: dsc) end |
#create_w3c_org ⇒ Object
367 368 369 370 371 372 373 |
# File 'lib/relaton/w3c/data_parser.rb', line 367 def create_w3c_org Bib::Organization.new( name: [Bib::TypedLocalizedString.new(content: "World Wide Web Consortium")], abbreviation: Bib::LocalizedString.new(content: "W3C"), uri: Bib::Uri.new(content: "https://www.w3.org"), ) end |
#doc_uri(spec = @spec) ⇒ Object
111 112 113 114 115 |
# File 'lib/relaton/w3c/data_parser.rb', line 111 def doc_uri(spec = @spec) result = spec.respond_to?(:uri) ? spec.uri : spec.shortlink @errors[:doc_uri] &&= result.nil? result end |
#identifier(link = doc_uri) ⇒ String
Generate identifier from URL
152 153 154 |
# File 'lib/relaton/w3c/data_parser.rb', line 152 def identifier(link = doc_uri) self.class.parse_identifier(link) end |
#parse ⇒ Relaton::W3c::ItemData
Parse document
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/relaton/w3c/data_parser.rb', line 55 def parse # rubocop:disable Metrics/MethodLength, Metrics/AbcSize ItemData.new( type: "standard", language: ["en"], script: ["Latn"], status: parse_status, title: parse_title, source: parse_source, docidentifier: parse_docid, formattedref: parse_formattedref, docnumber: identifier, series: parse_series, date: parse_date, relation: parse_relation, contributor: parse_contrib, ext: parse_ext, ) end |
#parse_contrib ⇒ Array<Bib::Contributor>
Parse contributor
309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 |
# File 'lib/relaton/w3c/data_parser.rb', line 309 def parse_contrib # rubocop:disable Metrics/MethodLength contribs = [Bib::Contributor.new( organization: create_w3c_org, role: [Bib::Contributor::Role.new(type: "publisher")], )] if @spec.links.respond_to?(:editors) editors = realize @spec.links.editors editors.links.editors&.each do |ed| editor = create_editor(ed) contribs << editor if editor end end result = contribs + parse_deliverers @errors[:contributor] &&= result.empty? result end |
#parse_date ⇒ Array<Bib::Date>
Parse date
221 222 223 224 225 226 227 228 229 |
# File 'lib/relaton/w3c/data_parser.rb', line 221 def parse_date result = if @spec.respond_to?(:date) [Bib::Date.new(type: "published", at: @spec.date.to_date.to_s)] else [] end @errors[:date] &&= result.empty? result end |
#parse_deliverers ⇒ Array<Bib::Contributor>
Parse deliverers as contributors with role “author” and description “committee”
347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 |
# File 'lib/relaton/w3c/data_parser.rb', line 347 def parse_deliverers # rubocop:disable Metrics/MethodLength return [] unless @spec.links.respond_to?(:deliverers) deliverers = realize @spec.links.deliverers return [] unless deliverers&.links&.deliverers deliverers.links.deliverers.map do |edg| org = create_w3c_org.tap do |o| subdiv_name = Bib::TypedLocalizedString.new(content: edg.title) subdiv = Bib::Subdivision.new(name: [subdiv_name], type: "technical-committee") o.subdivision = [subdiv] end role = Bib::Contributor::Role.new( type: "author", description: [Bib::LocalizedMarkedUpString.new(content: "committee")], ) Bib::Contributor.new(organization: org, role: [role]) end end |
#parse_docid ⇒ Array<Bib::Docidentifier>
Parse docidentifier
131 132 133 134 |
# File 'lib/relaton/w3c/data_parser.rb', line 131 def parse_docid id = pub_id(doc_uri) [Bib::Docidentifier.new(type: "W3C", content: id, primary: true)] end |
#parse_doctype ⇒ Doctype?
Parse doctype
200 201 202 203 204 205 |
# File 'lib/relaton/w3c/data_parser.rb', line 200 def parse_doctype t = DOCTYPES[type] || DOCTYPES[type_from_link] result = Doctype.new(content: t) if t @errors[:doctype] &&= result.nil? result end |
#parse_ext ⇒ Ext?
Parse ext with doctype
79 80 81 82 83 |
# File 'lib/relaton/w3c/data_parser.rb', line 79 def parse_ext dt = parse_doctype result = Ext.new(doctype: dt, flavor: "w3c") result end |
#parse_formattedref ⇒ String?
Parse formattedref
296 297 298 299 300 301 302 |
# File 'lib/relaton/w3c/data_parser.rb', line 296 def parse_formattedref result = if @spec.respond_to?(:uri) Bib::Formattedref.new(content: pub_id(@spec.uri)) end @errors[:formattedref] &&= result.nil? result end |
#parse_relation ⇒ Array<Bib::Relation>
Parse relation
236 237 238 239 240 241 242 243 244 245 |
# File 'lib/relaton/w3c/data_parser.rb', line 236 def parse_relation result = if @spec.links.respond_to?(:version_history) version_history = realize @spec.links.version_history version_history.links.spec_versions.map { |version| create_relation(version, "hasEdition") } else relations end @errors[:relation] &&= result.empty? result end |
#parse_series ⇒ Array<Bib::Series>
Parse series
175 176 177 178 179 180 181 182 183 184 |
# File 'lib/relaton/w3c/data_parser.rb', line 175 def parse_series result = if type title = Bib::Title.new(content: "W3C #{type}", language: "en", script: "Latn") [Bib::Series.new(title: [title], number: identifier)] else [] end @errors[:series] &&= result.empty? result end |
#parse_source ⇒ Array<Bib::Uri>
Parse link
122 123 124 |
# File 'lib/relaton/w3c/data_parser.rb', line 122 def parse_source [Bib::Uri.new(type: "src", content: doc_uri)] end |
#parse_status ⇒ Bib::Status?
Extract document status
90 91 92 93 94 95 96 |
# File 'lib/relaton/w3c/data_parser.rb', line 90 def parse_status result = if @spec.respond_to?(:status) && @spec.status Bib::Status.new(stage: Bib::Status::Stage.new(content: @spec.status)) end @errors[:status] &&= result.nil? result end |
#parse_title(spec = @spec) ⇒ Array<Bib::Title>
Parse title
103 104 105 106 107 108 109 |
# File 'lib/relaton/w3c/data_parser.rb', line 103 def parse_title(spec = @spec) return [] unless spec&.title && spec.title.strip != "" result = [Bib::Title.new(content: spec.title, language: "en", script: "Latn")] @errors[:title] &&= result.empty? result end |
#pub_id(url) ⇒ String
Generate PubID
141 142 143 |
# File 'lib/relaton/w3c/data_parser.rb', line 141 def pub_id(url) "W3C #{identifier(url)}" end |
#relations ⇒ Array<Bib::Relation>
Create relations
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 |
# File 'lib/relaton/w3c/data_parser.rb', line 252 def relations # rubocop:disable Metrics/MethodLength, Metrics/AbcSize rels = [] rels << create_relation(@spec.links.specification, "editionOf") if @spec.links.respond_to?(:specification) if @spec.links.respond_to?(:predecessor_versions) && @spec.links.predecessor_versions predecessor_versions = realize @spec.links.predecessor_versions predecessor_versions.links.predecessor_versions.each do |version| rels << create_relation(version, "obsoletes") end end if @spec.links.respond_to?(:successor_versions) && @spec.links.successor_versions successor_versions = realize @spec.links.successor_versions successor_versions.links.successor_versions.each do |version| rels << create_relation(version, "updatedBy", "errata") end end rels end |
#type ⇒ String
Extract type
191 192 193 |
# File 'lib/relaton/w3c/data_parser.rb', line 191 def type @type ||= @spec.respond_to?(:status) ? @spec.status : "technicalReport" end |
#type_from_link ⇒ String?
Fetch type from link
212 213 214 |
# File 'lib/relaton/w3c/data_parser.rb', line 212 def type_from_link @spec.shortlink.strip.match(/www\.w3\.org\/(TR)/)&.to_a&.fetch 1 end |