Class: Relaton::W3c::DataFetcher

Inherits:
Core::DataFetcher
  • Object
show all
Includes:
RateLimitHandler
Defined in:
lib/relaton/w3c/data_fetcher.rb

Constant Summary

Constants included from RateLimitHandler

RateLimitHandler::MAX_RETRIES, RateLimitHandler::RETRYABLE_ERRORS

Instance Method Summary collapse

Methods included from RateLimitHandler

fetched_objects, #realize

Instance Method Details

#clientObject



21
22
23
# File 'lib/relaton/w3c/data_fetcher.rb', line 21

def client
  @client ||= W3cApi::Client.new
end

#fetch(_source = nil) ⇒ Object

Parse documents



28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/relaton/w3c/data_fetcher.rb', line 28

def fetch(_source = nil)
  specs = client.specifications
  loop do
    specs.links.specifications.each do |spec|
      fetch_spec spec
    end

    break unless specs.next?

    specs = specs.next
  end
  index.save
  report_errors
end

#fetch_spec(unrealized_spec) ⇒ Object



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/relaton/w3c/data_fetcher.rb', line 43

def fetch_spec(unrealized_spec)
  spec = realize unrealized_spec
  save_doc DataParser.parse(spec, @errors)

  if spec.links.respond_to?(:version_history) && spec.links.version_history
    version_history = realize spec.links.version_history
    version_history.links.spec_versions.each { |version| save_doc DataParser.parse(realize version) }
  end

  if spec.links.respond_to?(:predecessor_versions) && spec.links.predecessor_versions
    predecessor_versions = realize spec.links.predecessor_versions
    predecessor_versions.links.predecessor_versions.each { |version| save_doc DataParser.parse(realize version) }
  end

  if spec.links.respond_to?(:successor_versions) && spec.links.successor_versions
    successor_versions = realize spec.links.successor_versions
    successor_versions.links.successor_versions.each { |version| save_doc DataParser.parse(realize version) }
  end
end

#file_name(id) ⇒ String

Generate file name

Parameters:

  • id (String)

    document id

Returns:

  • (String)

    file name



101
102
103
104
# File 'lib/relaton/w3c/data_fetcher.rb', line 101

def file_name(id)
  name = id.sub(/^W3C\s/, "").gsub(/[\s,:\/+]/, "_").squeeze("_").downcase
  File.join @output, "#{name}.#{@ext}"
end

#indexObject



13
14
15
# File 'lib/relaton/w3c/data_fetcher.rb', line 13

def index
  @index ||= Relaton::Index.find_or_create(:W3C, file: "#{INDEXFILE}.yaml")
end

#log_error(msg) ⇒ Object



17
18
19
# File 'lib/relaton/w3c/data_fetcher.rb', line 17

def log_error(msg)
  Util.error msg
end

#save_doc(bib, warn_duplicate: true) ⇒ Object

Save document to file

Parameters:



68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/relaton/w3c/data_fetcher.rb', line 68

def save_doc(bib, warn_duplicate: true)
  return unless bib

  file = file_name(bib.docnumber)
  if @files.include?(file)
    Util.warn "File #{file} already exists. Document: #{bib.docnumber}" if warn_duplicate
  else
    pubid = PubId.parse bib.docnumber
    index.add_or_update pubid.to_hash, file
    @files << file
  end
  File.write file, serialize(bib), encoding: "UTF-8"
end

#to_bibxml(bib) ⇒ Object



90
91
92
# File 'lib/relaton/w3c/data_fetcher.rb', line 90

def to_bibxml(bib)
  bib.to_xml
end

#to_xml(bib) ⇒ Object



82
83
84
# File 'lib/relaton/w3c/data_fetcher.rb', line 82

def to_xml(bib)
  bib.to_xml(bibdata: true)
end

#to_yaml(bib) ⇒ Object



86
87
88
# File 'lib/relaton/w3c/data_fetcher.rb', line 86

def to_yaml(bib)
  bib.to_yaml
end