Module: TorchAudio::Datasets::Utils

Defined in:
lib/torchaudio/datasets/utils.rb

Class Method Summary collapse

Class Method Details

.download_url(url, download_folder, filename: nil, hash_value: nil, hash_type: "sha256") ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
# File 'lib/torchaudio/datasets/utils.rb', line 5

def download_url(url, download_folder, filename: nil, hash_value: nil, hash_type: "sha256")
  filename ||= File.basename(url)
  filepath = File.join(download_folder, filename)

  if File.exist?(filepath)
    raise "#{filepath} already exists. Delete the file manually and retry."
  end

  puts "Downloading #{url}..."
  download_url_to_file(url, filepath, hash_value, hash_type)
end

.download_url_to_file(url, dst, hash_value, hash_type) ⇒ Object

follows redirects



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/torchaudio/datasets/utils.rb', line 18

def download_url_to_file(url, dst, hash_value, hash_type)
  URI.parse(url).open(max_redirects: 10) do |download|
    # TODO use hash_type
    digest =
      if download.respond_to?(:path)
        download.flush
        Digest::MD5.file(download.path).hexdigest
      else
        Digest::MD5.hexdigest(download.string)
      end

    # check hash
    if digest != hash_value
      raise "The hash of #{dst} does not match. Delete the file manually and retry."
    end

    IO.copy_stream(download, dst)
  end

  dst
end

.extract_archive(from_path, to_path: nil, overwrite: nil) ⇒ Object

extract_tar_gz doesn't list files, so just return to_path



41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/torchaudio/datasets/utils.rb', line 41

def extract_archive(from_path, to_path: nil, overwrite: nil)
  to_path ||= File.dirname(from_path)

  if from_path.end_with?(".tar.gz") || from_path.end_with?(".tgz")
    File.open(from_path, "rb") do |io|
      Gem::Package.new("").extract_tar_gz(io, to_path)
    end
    return to_path
  end

  raise "We currently only support tar.gz and tgz archives."
end

.walk_files(root, suffix, prefix: false, remove_suffix: false) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/torchaudio/datasets/utils.rb', line 54

def walk_files(root, suffix, prefix: false, remove_suffix: false)
  return enum_for(:walk_files, root, suffix, prefix: prefix, remove_suffix: remove_suffix) unless block_given?

  Dir.glob("**/*", base: root).sort.each do |f|
    if f.end_with?(suffix)
      if remove_suffix
        f = f[0..(-suffix.length - 1)]
      end

      if prefix
        raise "Not implemented yet"
        # f = File.join(dirpath, f)
      end

      yield f
    end
  end
end