Class: Relaton::W3c::DataParser

Inherits:
Object
  • Object
show all
Includes:
RateLimitHandler
Defined in:
lib/relaton/w3c/data_parser.rb

Constant Summary collapse

USED_TYPES =
%w[WD NOTE PER PR REC CR].freeze
DOCTYPES =
{
  "TR" => "technicalReport",
  "NOTE" => "groupNote",
}.freeze
STAGES =
{
  "RET" => "Retired",
  "SPSD" => "Superseded Recommendation",
  "OBSL" => "Obsoleted Recommendation",
  "WD" => "Working Draft",
  "CRD" => "Candidate Recommendation Draft",
  "CR" => "Candidate Recommendation",
  "PR" => "Proposed Recommendation",
  "PER" => "Proposed Edited Recommendation",
  "REC" => "Recommendation",
}.freeze
ERROR_KEYS =

Document parser initalization

%i[status title doc_uri formattedref series date
relation contributor doctype].freeze

Constants included from RateLimitHandler

RateLimitHandler::MAX_RETRIES, RateLimitHandler::RETRYABLE_ERRORS

Class Method Summary collapse

Instance Method Summary collapse

Methods included from RateLimitHandler

fetched_objects, #realize

Constructor Details

#initialize(spec, errors = {}) ⇒ DataParser

Returns a new instance of DataParser.



33
34
35
36
37
# File 'lib/relaton/w3c/data_parser.rb', line 33

def initialize(spec, errors = {})
  @spec = spec
  @errors = errors
  ERROR_KEYS.each { |k| @errors[k] = true unless @errors.key?(k) }
end

Class Method Details

.parse(spec, errors = {}) ⇒ Relaton::W3c::ItemData?

Initialize document parser and run it

Parameters:

  • spec (W3cApi::Models::SpecVersion)

Returns:



46
47
48
# File 'lib/relaton/w3c/data_parser.rb', line 46

def self.parse(spec, errors = {})
  new(spec, errors).parse
end

.parse_identifier(url) ⇒ String

Parse identifier from URL

Parameters:

  • url (String)

    URL

Returns:

  • (String)

    identifier



163
164
165
166
167
168
# File 'lib/relaton/w3c/data_parser.rb', line 163

def self.parse_identifier(url)
  if /.+\/(\w+(?:[-+][\w.]+)+(?:\/\w+)?)/ =~ url.to_s
    $1.to_s
  else url.to_s.split("/").last
  end
end

Instance Method Details

#create_editor(unrealized_editor) ⇒ Object



328
329
330
331
332
333
334
335
336
337
338
339
340
# File 'lib/relaton/w3c/data_parser.rb', line 328

def create_editor(unrealized_editor)
  editor = realize unrealized_editor
  return unless editor

  surname = Bib::LocalizedString.new(content: editor.family, language: "en", script: "Latn")
  forename = Bib::FullNameType::Forename.new(content: editor.given, language: "en", script: "Latn")
  name = Bib::FullName.new(surname: surname, forename: [forename])
  person = Bib::Person.new(name: name)
  Bib::Contributor.new(
    person: person,
    role: [Bib::Contributor::Role.new(type: "editor")],
  )
end

#create_relation(version, type, desc = nil) ⇒ Bib::Relation

Create relation

Parameters:

  • version (Object)

    version link

  • type (String)

    relation type

  • desc (String, nil) (defaults to: nil)

    relation description

Returns:

  • (Bib::Relation)

    relation



279
280
281
282
283
284
285
286
287
288
289
# File 'lib/relaton/w3c/data_parser.rb', line 279

def create_relation(version, type, desc = nil)
  version_spec = realize version
  url = doc_uri(version_spec)
  id = pub_id(url)
  title = parse_title(version_spec)
  docid = Bib::Docidentifier.new(type: "W3C", content: id, primary: true)
  link = [Bib::Uri.new(type: "src", content: url)]
  bib = ItemData.new(title: title, docidentifier: [docid], source: link)
  dsc = Bib::LocalizedMarkedUpString.new(content: desc) if desc
  Bib::Relation.new(type: type, bibitem: bib, description: dsc)
end

#create_w3c_orgObject



367
368
369
370
371
372
373
# File 'lib/relaton/w3c/data_parser.rb', line 367

def create_w3c_org
  Bib::Organization.new(
    name: [Bib::TypedLocalizedString.new(content: "World Wide Web Consortium")],
    abbreviation: Bib::LocalizedString.new(content: "W3C"),
    uri: Bib::Uri.new(content: "https://www.w3.org"),
  )
end

#doc_uri(spec = @spec) ⇒ Object



111
112
113
114
115
# File 'lib/relaton/w3c/data_parser.rb', line 111

def doc_uri(spec = @spec)
  result = spec.respond_to?(:uri) ? spec.uri : spec.shortlink
  @errors[:doc_uri] &&= result.nil?
  result
end

#identifier(link = doc_uri) ⇒ String

Generate identifier from URL

Parameters:

  • link (String) (defaults to: doc_uri)

Returns:

  • (String)

    identifier



152
153
154
# File 'lib/relaton/w3c/data_parser.rb', line 152

def identifier(link = doc_uri)
  self.class.parse_identifier(link)
end

#parseRelaton::W3c::ItemData

Parse document

Returns:



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/relaton/w3c/data_parser.rb', line 55

def parse # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
  ItemData.new(
    type: "standard",
    language: ["en"],
    script: ["Latn"],
    status: parse_status,
    title: parse_title,
    source: parse_source,
    docidentifier: parse_docid,
    formattedref: parse_formattedref,
    docnumber: identifier,
    series: parse_series,
    date: parse_date,
    relation: parse_relation,
    contributor: parse_contrib,
    ext: parse_ext,
  )
end

#parse_contribArray<Bib::Contributor>

Parse contributor

Returns:

  • (Array<Bib::Contributor>)

    contributor



309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
# File 'lib/relaton/w3c/data_parser.rb', line 309

def parse_contrib # rubocop:disable Metrics/MethodLength
  contribs = [Bib::Contributor.new(
    organization: create_w3c_org,
    role: [Bib::Contributor::Role.new(type: "publisher")],
  )]

  if @spec.links.respond_to?(:editors)
    editors = realize @spec.links.editors
    editors.links.editors&.each do |ed|
      editor = create_editor(ed)
      contribs << editor if editor
    end
  end

  result = contribs + parse_deliverers
  @errors[:contributor] &&= result.empty?
  result
end

#parse_dateArray<Bib::Date>

Parse date

Returns:

  • (Array<Bib::Date>)

    date



221
222
223
224
225
226
227
228
229
# File 'lib/relaton/w3c/data_parser.rb', line 221

def parse_date
  result = if @spec.respond_to?(:date)
             [Bib::Date.new(type: "published", at: @spec.date.to_date.to_s)]
           else
             []
           end
  @errors[:date] &&= result.empty?
  result
end

#parse_deliverersArray<Bib::Contributor>

Parse deliverers as contributors with role “author” and description “committee”

Returns:

  • (Array<Bib::Contributor>)

    deliverer contributors



347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
# File 'lib/relaton/w3c/data_parser.rb', line 347

def parse_deliverers # rubocop:disable Metrics/MethodLength
  return [] unless @spec.links.respond_to?(:deliverers)

  deliverers = realize @spec.links.deliverers
  return [] unless deliverers&.links&.deliverers

  deliverers.links.deliverers.map do |edg|
    org = create_w3c_org.tap do |o|
      subdiv_name = Bib::TypedLocalizedString.new(content: edg.title)
      subdiv = Bib::Subdivision.new(name: [subdiv_name], type: "technical-committee")
      o.subdivision = [subdiv]
    end
    role = Bib::Contributor::Role.new(
      type: "author",
      description: [Bib::LocalizedMarkedUpString.new(content: "committee")],
    )
    Bib::Contributor.new(organization: org, role: [role])
  end
end

#parse_docidArray<Bib::Docidentifier>

Parse docidentifier

Returns:

  • (Array<Bib::Docidentifier>)

    docidentifier



131
132
133
134
# File 'lib/relaton/w3c/data_parser.rb', line 131

def parse_docid
  id = pub_id(doc_uri)
  [Bib::Docidentifier.new(type: "W3C", content: id, primary: true)]
end

#parse_doctypeDoctype?

Parse doctype

Returns:



200
201
202
203
204
205
# File 'lib/relaton/w3c/data_parser.rb', line 200

def parse_doctype
  t = DOCTYPES[type] || DOCTYPES[type_from_link]
  result = Doctype.new(content: t) if t
  @errors[:doctype] &&= result.nil?
  result
end

#parse_extExt?

Parse ext with doctype

Returns:

  • (Ext, nil)

    ext



79
80
81
82
83
# File 'lib/relaton/w3c/data_parser.rb', line 79

def parse_ext
  dt = parse_doctype
  result = Ext.new(doctype: dt, flavor: "w3c")
  result
end

#parse_formattedrefString?

Parse formattedref

Returns:

  • (String, nil)

    formattedref



296
297
298
299
300
301
302
# File 'lib/relaton/w3c/data_parser.rb', line 296

def parse_formattedref
  result = if @spec.respond_to?(:uri)
             Bib::Formattedref.new(content: pub_id(@spec.uri))
           end
  @errors[:formattedref] &&= result.nil?
  result
end

#parse_relationArray<Bib::Relation>

Parse relation

Returns:

  • (Array<Bib::Relation>)

    relation



236
237
238
239
240
241
242
243
244
245
# File 'lib/relaton/w3c/data_parser.rb', line 236

def parse_relation
  result = if @spec.links.respond_to?(:version_history)
             version_history = realize @spec.links.version_history
             version_history.links.spec_versions.map { |version| create_relation(version, "hasEdition") }
           else
             relations
           end
  @errors[:relation] &&= result.empty?
  result
end

#parse_seriesArray<Bib::Series>

Parse series

Returns:

  • (Array<Bib::Series>)

    series



175
176
177
178
179
180
181
182
183
184
# File 'lib/relaton/w3c/data_parser.rb', line 175

def parse_series
  result = if type
             title = Bib::Title.new(content: "W3C #{type}", language: "en", script: "Latn")
             [Bib::Series.new(title: [title], number: identifier)]
           else
             []
           end
  @errors[:series] &&= result.empty?
  result
end

#parse_sourceArray<Bib::Uri>

Parse link

Returns:

  • (Array<Bib::Uri>)

    link



122
123
124
# File 'lib/relaton/w3c/data_parser.rb', line 122

def parse_source
  [Bib::Uri.new(type: "src", content: doc_uri)]
end

#parse_statusBib::Status?

Extract document status

Returns:

  • (Bib::Status, nil)

    document status



90
91
92
93
94
95
96
# File 'lib/relaton/w3c/data_parser.rb', line 90

def parse_status
  result = if @spec.respond_to?(:status) && @spec.status
             Bib::Status.new(stage: Bib::Status::Stage.new(content: @spec.status))
           end
  @errors[:status] &&= result.nil?
  result
end

#parse_title(spec = @spec) ⇒ Array<Bib::Title>

Parse title

Returns:

  • (Array<Bib::Title>)

    title



103
104
105
106
107
108
109
# File 'lib/relaton/w3c/data_parser.rb', line 103

def parse_title(spec = @spec)
  return [] unless spec&.title && spec.title.strip != ""

  result = [Bib::Title.new(content: spec.title, language: "en", script: "Latn")]
  @errors[:title] &&= result.empty?
  result
end

#pub_id(url) ⇒ String

Generate PubID

Returns:

  • (String)

    PubID



141
142
143
# File 'lib/relaton/w3c/data_parser.rb', line 141

def pub_id(url)
  "W3C #{identifier(url)}"
end

#relationsArray<Bib::Relation>

Create relations

Returns:

  • (Array<Bib::Relation>)

    relations



252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
# File 'lib/relaton/w3c/data_parser.rb', line 252

def relations # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
  rels = []
  rels << create_relation(@spec.links.specification, "editionOf") if @spec.links.respond_to?(:specification)
  if @spec.links.respond_to?(:predecessor_versions) && @spec.links.predecessor_versions
    predecessor_versions = realize @spec.links.predecessor_versions
    predecessor_versions.links.predecessor_versions.each do |version|
      rels << create_relation(version, "obsoletes")
    end
  end
  if @spec.links.respond_to?(:successor_versions) && @spec.links.successor_versions
    successor_versions = realize @spec.links.successor_versions
    successor_versions.links.successor_versions.each do |version|
      rels << create_relation(version, "updatedBy", "errata")
    end
  end
  rels
end

#typeString

Extract type

Returns:

  • (String)

    type



191
192
193
# File 'lib/relaton/w3c/data_parser.rb', line 191

def type
  @type ||= @spec.respond_to?(:status) ? @spec.status : "technicalReport"
end

Fetch type from link

Returns:

  • (String, nil)

    type



212
213
214
# File 'lib/relaton/w3c/data_parser.rb', line 212

def type_from_link
  @spec.shortlink.strip.match(/www\.w3\.org\/(TR)/)&.to_a&.fetch 1
end