Module: Philiprehberger::Checksum

Defined in:
lib/philiprehberger/checksum.rb,
lib/philiprehberger/checksum/version.rb

Defined Under Namespace

Classes: Error

Constant Summary collapse

CHUNK_SIZE =
8192
ALGORITHMS =
{
  md5: Digest::MD5,
  sha1: Digest::SHA1,
  sha256: Digest::SHA256,
  sha512: Digest::SHA512
}.freeze
HMAC_ALGORITHMS =
{
  sha1: 'SHA1',
  sha256: 'SHA256',
  sha512: 'SHA512'
}.freeze
VERSION =
'0.6.0'

Class Method Summary collapse

Class Method Details

.compare_files(path1, path2, algo: :sha256) ⇒ Boolean

Compare two files by checksum

Parameters:

  • path1 (String)

    path to the first file

  • path2 (String)

    path to the second file

  • algo (Symbol) (defaults to: :sha256)

    algorithm to use (:md5, :sha1, :sha256, :sha512, :crc32)

Returns:

  • (Boolean)

    true if both files have the same checksum

Raises:

  • (Error)

    if either file does not exist or is not readable



241
242
243
# File 'lib/philiprehberger/checksum.rb', line 241

def self.compare_files(path1, path2, algo: :sha256)
  file_digest(path1, algo: algo) == file_digest(path2, algo: algo)
end

.crc32(string, format: :hex) ⇒ String

Compute a CRC32 checksum for a string

Parameters:

  • string (String)

    the input string

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum



69
70
71
72
# File 'lib/philiprehberger/checksum.rb', line 69

def self.crc32(string, format: :hex)
  value = Zlib.crc32(string)
  format_crc32(value, format: format)
end

.digest(string, algo:, format: :hex) ⇒ String

Compute a checksum for a string using any supported algorithm

Parameters:

  • string (String)

    the input string

  • algo (Symbol)

    algorithm (:md5, :sha1, :sha256, :sha512, :crc32)

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum

Raises:

  • (Error)

    if the algorithm is unknown



209
210
211
212
213
214
215
216
# File 'lib/philiprehberger/checksum.rb', line 209

def self.digest(string, algo:, format: :hex)
  return crc32(string, format: format) if algo == :crc32

  klass = ALGORITHMS[algo]
  raise Error, "unknown algorithm: #{algo}" unless klass

  digest_string(klass, string, format: format)
end

.directory_checksum(path, algo: :sha256, format: :hex) ⇒ String

Compute a combined checksum of all files in a directory.

Hashes each file’s relative path and content checksum in sorted order, then produces a single digest. Useful for cache invalidation.

Parameters:

  • path (String)

    path to the directory

  • algo (Symbol) (defaults to: :sha256)

    algorithm (:md5, :sha1, :sha256, :sha512)

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the combined checksum

Raises:

  • (Error)

    if the path is not a directory or algorithm is unknown



337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
# File 'lib/philiprehberger/checksum.rb', line 337

def self.directory_checksum(path, algo: :sha256, format: :hex)
  raise Error, "not a directory: #{path}" unless File.directory?(path)

  klass = ALGORITHMS[algo]
  raise Error, "unknown algorithm: #{algo}" unless klass

  file_paths = Dir.glob(File.join(path, '**', '*')).select { |f| File.file?(f) }.sort
  combined = klass.new

  file_paths.each do |file_path|
    relative = file_path.delete_prefix("#{path}/")
    combined.update(relative)
    combined.update(digest_file(klass, file_path, format: :hex))
  end

  format_output(combined, format: format)
end

.file_crc32(path, format: :hex) ⇒ String

Compute a CRC32 checksum for a file using streaming reads

Parameters:

  • path (String)

    path to the file

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum

Raises:

  • (Error)

    if the file does not exist or is not readable



193
194
195
196
197
198
199
200
# File 'lib/philiprehberger/checksum.rb', line 193

def self.file_crc32(path, format: :hex)
  validate_file!(path)
  value = 0
  File.open(path, 'rb') do |io|
    value = Zlib.crc32(io.read(CHUNK_SIZE), value) until io.eof?
  end
  format_crc32(value, format: format)
end

.file_digest(path, algo:, format: :hex) ⇒ String

Compute a checksum for a file using any supported algorithm

Parameters:

  • path (String)

    path to the file

  • algo (Symbol)

    algorithm (:md5, :sha1, :sha256, :sha512, :crc32)

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum

Raises:

  • (Error)

    if the file does not exist or the algorithm is unknown



225
226
227
228
229
230
231
232
# File 'lib/philiprehberger/checksum.rb', line 225

def self.file_digest(path, algo:, format: :hex)
  return file_crc32(path, format: format) if algo == :crc32

  klass = ALGORITHMS[algo]
  raise Error, "unknown algorithm: #{algo}" unless klass

  digest_file(klass, path, format: format)
end

.file_hmac(path, key:, algo: :sha256, format: :hex) ⇒ String

Compute an HMAC for a file using streaming reads

Parameters:

  • path (String)

    path to the file

  • key (String)

    the HMAC key

  • algo (Symbol) (defaults to: :sha256)

    algorithm (:sha1, :sha256, :sha512)

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the HMAC digest

Raises:

  • (Error)

    if the file does not exist or the algorithm is unknown



279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
# File 'lib/philiprehberger/checksum.rb', line 279

def self.file_hmac(path, key:, algo: :sha256, format: :hex)
  algo_name = HMAC_ALGORITHMS[algo]
  raise Error, "unknown HMAC algorithm: #{algo}" unless algo_name

  validate_file!(path)
  hmac = OpenSSL::HMAC.new(key, algo_name)
  File.open(path, 'rb') do |io|
    hmac.update(io.read(CHUNK_SIZE)) until io.eof?
  end

  case format
  when :hex then hmac.hexdigest
  when :base64 then Base64.strict_encode64(hmac.digest)
  else raise Error, "unknown format: #{format}"
  end
end

.file_md5(path, format: :hex) ⇒ String

Compute an MD5 checksum for a file using streaming reads

Parameters:

  • path (String)

    path to the file

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum

Raises:

  • (Error)

    if the file does not exist or is not readable



80
81
82
# File 'lib/philiprehberger/checksum.rb', line 80

def self.file_md5(path, format: :hex)
  digest_file(Digest::MD5, path, format: format)
end

.file_multi(path, *algos, format: :hex) ⇒ Hash<Symbol, String>

Compute multiple checksums for a file in a single read pass

Parameters:

  • path (String)

    path to the file

  • algos (Array<Symbol>)

    algorithms to compute (:md5, :sha256, :sha512, :crc32)

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (Hash<Symbol, String>)

    algorithm => checksum pairs

Raises:

  • (Error)

    if the file does not exist or an unknown algorithm is given



101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/philiprehberger/checksum.rb', line 101

def self.file_multi(path, *algos, format: :hex)
  validate_file!(path)
  algos = algos.flatten
  raise Error, 'at least one algorithm is required' if algos.empty?

  digests = {}
  crc32_value = nil

  algos.each do |algo|
    if algo == :crc32
      crc32_value = 0
    elsif ALGORITHMS.key?(algo)
      digests[algo] = ALGORITHMS[algo].new
    else
      raise Error, "unknown algorithm: #{algo}"
    end
  end

  File.open(path, 'rb') do |io|
    while (chunk = io.read(CHUNK_SIZE))
      digests.each_value { |d| d.update(chunk) }
      crc32_value = Zlib.crc32(chunk, crc32_value) unless crc32_value.nil?
    end
  end

  result = {}
  algos.each do |algo|
    result[algo] = if algo == :crc32
                     format_crc32(crc32_value, format: format)
                   else
                     format_output(digests[algo], format: format)
                   end
  end
  result
end

.file_sha1(path, format: :hex) ⇒ String

Compute a SHA-1 checksum for a file using streaming reads

Parameters:

  • path (String)

    path to the file

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum

Raises:

  • (Error)

    if the file does not exist or is not readable



173
174
175
# File 'lib/philiprehberger/checksum.rb', line 173

def self.file_sha1(path, format: :hex)
  digest_file(Digest::SHA1, path, format: format)
end

.file_sha256(path, format: :hex) ⇒ String

Compute a SHA-256 checksum for a file using streaming reads

Parameters:

  • path (String)

    path to the file

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum

Raises:

  • (Error)

    if the file does not exist or is not readable



90
91
92
# File 'lib/philiprehberger/checksum.rb', line 90

def self.file_sha256(path, format: :hex)
  digest_file(Digest::SHA256, path, format: format)
end

.file_sha512(path, format: :hex) ⇒ String

Compute a SHA-512 checksum for a file using streaming reads

Parameters:

  • path (String)

    path to the file

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum

Raises:

  • (Error)

    if the file does not exist or is not readable



183
184
185
# File 'lib/philiprehberger/checksum.rb', line 183

def self.file_sha512(path, format: :hex)
  digest_file(Digest::SHA512, path, format: format)
end

.files(paths, algo: :sha256, format: :hex) ⇒ Hash<String, String>

Hash multiple files, returning a hash of { path => digest }

Parameters:

  • paths (Array<String>)

    file paths to hash

  • algo (Symbol) (defaults to: :sha256)

    algorithm (:md5, :sha1, :sha256, :sha512, :crc32)

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (Hash<String, String>)

    path => digest pairs

Raises:

  • (Error)

    if any file does not exist or an unknown algorithm is given



252
253
254
255
256
# File 'lib/philiprehberger/checksum.rb', line 252

def self.files(paths, algo: :sha256, format: :hex)
  paths.to_h do |path|
    [path, file_digest(path, algo: algo, format: format)]
  end
end

.hmac_sha1(string, key:, format: :hex) ⇒ String

Compute an HMAC-SHA1 for a string

Parameters:

  • string (String)

    the input string

  • key (String)

    the HMAC key

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the HMAC digest



143
144
145
# File 'lib/philiprehberger/checksum.rb', line 143

def self.hmac_sha1(string, key:, format: :hex)
  hmac_digest('SHA1', string, key, format: format)
end

.hmac_sha256(string, key:, format: :hex) ⇒ String

Compute an HMAC-SHA256 for a string

Parameters:

  • string (String)

    the input string

  • key (String)

    the HMAC key

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the HMAC digest



153
154
155
# File 'lib/philiprehberger/checksum.rb', line 153

def self.hmac_sha256(string, key:, format: :hex)
  hmac_digest('SHA256', string, key, format: format)
end

.hmac_sha512(string, key:, format: :hex) ⇒ String

Compute an HMAC-SHA512 for a string

Parameters:

  • string (String)

    the input string

  • key (String)

    the HMAC key

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the HMAC digest



163
164
165
# File 'lib/philiprehberger/checksum.rb', line 163

def self.hmac_sha512(string, key:, format: :hex)
  hmac_digest('SHA512', string, key, format: format)
end

.md5(string, format: :hex) ⇒ String

Compute an MD5 checksum for a string

Parameters:

  • string (String)

    the input string

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum



33
34
35
# File 'lib/philiprehberger/checksum.rb', line 33

def self.md5(string, format: :hex)
  digest_string(Digest::MD5, string, format: format)
end

.sha1(string, format: :hex) ⇒ String

Compute a SHA-1 checksum for a string

Parameters:

  • string (String)

    the input string

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum



42
43
44
# File 'lib/philiprehberger/checksum.rb', line 42

def self.sha1(string, format: :hex)
  digest_string(Digest::SHA1, string, format: format)
end

.sha256(string, format: :hex) ⇒ String

Compute a SHA-256 checksum for a string

Parameters:

  • string (String)

    the input string

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum



51
52
53
# File 'lib/philiprehberger/checksum.rb', line 51

def self.sha256(string, format: :hex)
  digest_string(Digest::SHA256, string, format: format)
end

.sha512(string, format: :hex) ⇒ String

Compute a SHA-512 checksum for a string

Parameters:

  • string (String)

    the input string

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum



60
61
62
# File 'lib/philiprehberger/checksum.rb', line 60

def self.sha512(string, format: :hex)
  digest_string(Digest::SHA512, string, format: format)
end

.verify?(path, format: :hex, **expected) ⇒ Boolean

Verify a file’s checksum against expected values

Parameters:

  • path (String)

    path to the file

  • format (Symbol) (defaults to: :hex)

    output format used for expected values

  • expected (Hash<Symbol, String>)

    algorithm => expected checksum pairs

Returns:

  • (Boolean)

    true if all checksums match

Raises:

  • (Error)

    if the file does not exist or an unknown algorithm is given



318
319
320
321
322
323
324
325
# File 'lib/philiprehberger/checksum.rb', line 318

def self.verify?(path, format: :hex, **expected)
  raise Error, 'at least one expected checksum is required' if expected.empty?

  actual = file_multi(path, *expected.keys, format: format)
  expected.all? do |algo, expected_value|
    secure_compare(actual[algo], expected_value)
  end
end

.verify_hmac?(string, expected, key:, algo: :sha256) ⇒ Boolean

Verify an HMAC with timing-safe comparison

Parameters:

  • string (String)

    the input string

  • expected (String)

    the expected HMAC hex digest

  • key (String)

    the HMAC key

  • algo (Symbol) (defaults to: :sha256)

    algorithm (:sha1, :sha256, or :sha512)

Returns:

  • (Boolean)

    true if the HMAC matches

Raises:



303
304
305
306
307
308
309
# File 'lib/philiprehberger/checksum.rb', line 303

def self.verify_hmac?(string, expected, key:, algo: :sha256)
  algo_name = HMAC_ALGORITHMS[algo]
  raise Error, "unknown HMAC algorithm: #{algo}" unless algo_name

  actual = hmac_digest(algo_name, string, key, format: :hex)
  secure_compare(actual, expected)
end

.verify_string?(string, expected, algo: :sha256, format: :hex) ⇒ Boolean

Verify a string’s checksum with timing-safe comparison

Parameters:

  • string (String)

    the input string

  • expected (String)

    the expected checksum

  • algo (Symbol) (defaults to: :sha256)

    algorithm (:md5, :sha1, :sha256, :sha512, :crc32)

  • format (Symbol) (defaults to: :hex)

    output format used for the expected value (:hex or :base64)

Returns:

  • (Boolean)

    true if the computed checksum matches

Raises:

  • (Error)

    if the algorithm is unknown



266
267
268
269
# File 'lib/philiprehberger/checksum.rb', line 266

def self.verify_string?(string, expected, algo: :sha256, format: :hex)
  actual = digest(string, algo: algo, format: format)
  secure_compare(actual, expected)
end