Module: Kataba

Defined in:
lib/kataba.rb

Defined Under Namespace

Classes: Configuration

Class Method Summary collapse

Class Method Details

.configurationObject



21
22
23
# File 'lib/kataba.rb', line 21

def self.configuration
  @configuration ||=  Configuration.new
end

.configure {|configuration| ... } ⇒ Object

Allows for configuration by block

Example:

MegaLotto.configure do |config|
 config.drawing_count = 10
end

Yields:



17
18
19
# File 'lib/kataba.rb', line 17

def self.configure
  yield(configuration) if block_given?
end

.download_xsd(xsd_uri_array) ⇒ Object



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# File 'lib/kataba.rb', line 95

def self.download_xsd(xsd_uri_array)
  new_xsd_uris = []
  file_paths = []

  # Download files
  xsd_uri_array.each do |xsd_uri|
    uri_md5 = Digest::MD5.hexdigest(xsd_uri)

    dir_name = "#{self.configuration.offline_storage}"

    # Make dir if needed
    unless File.directory?(dir_name)
      FileUtils.mkdir_p(dir_name)
    end

    file_path = "#{dir_name}/#{uri_md5}.xsd"
    tmp_path  = "#{file_path}.part"

    # Write to a .part file first; only rename to the final cache path
    # after we've confirmed the bytes parse as XML. Without this, a
    # malformed response (HTML error page, truncated TCP stream, captive
    # portal stub) would land at the canonical cache path and poison
    # every subsequent fetch.
    File.open(tmp_path, "wb+") do |file|
      if !self.configuration.mirror_list.to_s.empty?
        mirror_list = YAML.load_file(self.configuration.mirror_list)
        mirror = mirror_list[xsd_uri]
        if mirror.to_s.empty?
          # No mirror for that uri
          file.write(URI.open(xsd_uri).read)
        else
          file.write(URI.open(mirror).read)
        end
      else
        file.write(URI.open(xsd_uri).read)
      end
    end

    begin
      Nokogiri::XML(File.read(tmp_path)) { |c| c.strict }
    rescue Nokogiri::XML::SyntaxError
      File.delete(tmp_path)
      raise
    end

    File.rename(tmp_path, file_path)
    file_paths << file_path
  end

  # Search inside for other schemaLocations
  file_paths.each do |file_path|
    new_xsd_uris = find_schemas(file_path)
  end

  if !new_xsd_uris.reject(&:empty?).empty?
    download_xsd(new_xsd_uris)
  end
end

.fetch_schema(xsd_uri) ⇒ Object

If already downloaded, uses offline version. If not, downloads from the URI provided. If mirror list is configured, searches for mirrored URI instead.

Example:

Kataba.fetch_schema("http://www.loc.gov/standards/mods/v3/mods-3-5.xsd")

Arguments:

xsd_uri: (String)


64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/kataba.rb', line 64

def self.fetch_schema(xsd_uri)
  uri_md5 = Digest::MD5.hexdigest(xsd_uri)
  dir_path = "#{self.configuration.offline_storage}"
  xsd_path = "#{dir_path}/#{uri_md5}.xsd"

  attempts = 0
  begin
    # Does the offline version exist already?
    if !(File.exist?(xsd_path))
      # If not, go download
      xsd_array = []
      xsd_array << xsd_uri
      download_xsd(xsd_array)
    end

    # Validate and return Nokogiri schema
    Dir.chdir(dir_path) do
      return Nokogiri::XML::Schema(IO.read(xsd_path))
    end
  rescue Nokogiri::XML::SyntaxError
    # Poisoned cache (e.g. a pre-fix install that stored a bad fetch).
    # Evict the offending file and refetch once.
    File.delete(xsd_path) if File.exist?(xsd_path)
    attempts += 1
    retry if attempts < 2
    raise
  end
end

.find_schemas(xml_file_path) ⇒ Object



154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# File 'lib/kataba.rb', line 154

def self.find_schemas(xml_file_path)
  xsd_uri_array = []

  # Open XML file
  doc = File.open(xml_file_path) { |f| Nokogiri::XML(f) }
  # search for schemaLocations
  doc.xpath("//@schemaLocation").each do |node|
    if !node.value.to_s.empty?
      # Add to array
      xsd_uri_array << node.value
      # Get MD5
      uri_md5 = Digest::MD5.hexdigest(node.value)
      # Reassign attribute value
      node.value = "#{uri_md5}.xsd"
    end
  end

  # Overwrite with md5'd doc
  File.write(xml_file_path, doc.to_xml)

  return xsd_uri_array
end

.resetObject

Undoes any configuration - this method was built for testing purposes

Example:

Kataba.reset


30
31
32
# File 'lib/kataba.rb', line 30

def self.reset
  @configuration = Configuration.new
end