Module: Philiprehberger::Checksum

Defined in:
lib/philiprehberger/checksum.rb,
lib/philiprehberger/checksum/version.rb

Defined Under Namespace

Classes: Error

Constant Summary collapse

CHUNK_SIZE =
8192
ALGORITHMS =
{
  md5: Digest::MD5,
  sha1: Digest::SHA1,
  sha256: Digest::SHA256,
  sha384: Digest::SHA384,
  sha512: Digest::SHA512
}.freeze
HMAC_ALGORITHMS =
{
  sha1: 'SHA1',
  sha256: 'SHA256',
  sha384: 'SHA384',
  sha512: 'SHA512'
}.freeze
VERSION =
'0.7.0'

Class Method Summary collapse

Class Method Details

.compare_files(path1, path2, algo: :sha256) ⇒ Boolean

Compare two files by checksum

Parameters:

  • path1 (String)

    path to the first file

  • path2 (String)

    path to the second file

  • algo (Symbol) (defaults to: :sha256)

    algorithm to use (:md5, :sha1, :sha256, :sha512, :crc32)

Returns:

  • (Boolean)

    true if both files have the same checksum

Raises:

  • (Error)

    if either file does not exist or is not readable



272
273
274
# File 'lib/philiprehberger/checksum.rb', line 272

def self.compare_files(path1, path2, algo: :sha256)
  file_digest(path1, algo: algo) == file_digest(path2, algo: algo)
end

.crc32(string, format: :hex) ⇒ String

Compute a CRC32 checksum for a string

Parameters:

  • string (String)

    the input string

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum



80
81
82
83
# File 'lib/philiprehberger/checksum.rb', line 80

def self.crc32(string, format: :hex)
  value = Zlib.crc32(string)
  format_crc32(value, format: format)
end

.digest(string, algo:, format: :hex) ⇒ String

Compute a checksum for a string using any supported algorithm

Parameters:

  • string (String)

    the input string

  • algo (Symbol)

    algorithm (:md5, :sha1, :sha256, :sha512, :crc32)

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum

Raises:

  • (Error)

    if the algorithm is unknown



240
241
242
243
244
245
246
247
# File 'lib/philiprehberger/checksum.rb', line 240

def self.digest(string, algo:, format: :hex)
  return crc32(string, format: format) if algo == :crc32

  klass = ALGORITHMS[algo]
  raise Error, "unknown algorithm: #{algo}" unless klass

  digest_string(klass, string, format: format)
end

.directory_checksum(path, algo: :sha256, format: :hex) ⇒ String

Compute a combined checksum of all files in a directory.

Hashes each file’s relative path and content checksum in sorted order, then produces a single digest. Useful for cache invalidation.

Parameters:

  • path (String)

    path to the directory

  • algo (Symbol) (defaults to: :sha256)

    algorithm (:md5, :sha1, :sha256, :sha512)

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the combined checksum

Raises:

  • (Error)

    if the path is not a directory or algorithm is unknown



368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
# File 'lib/philiprehberger/checksum.rb', line 368

def self.directory_checksum(path, algo: :sha256, format: :hex)
  raise Error, "not a directory: #{path}" unless File.directory?(path)

  klass = ALGORITHMS[algo]
  raise Error, "unknown algorithm: #{algo}" unless klass

  file_paths = Dir.glob(File.join(path, '**', '*')).select { |f| File.file?(f) }.sort
  combined = klass.new

  file_paths.each do |file_path|
    relative = file_path.delete_prefix("#{path}/")
    combined.update(relative)
    combined.update(digest_file(klass, file_path, format: :hex))
  end

  format_output(combined, format: format)
end

.file_crc32(path, format: :hex) ⇒ String

Compute a CRC32 checksum for a file using streaming reads

Parameters:

  • path (String)

    path to the file

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum

Raises:

  • (Error)

    if the file does not exist or is not readable



224
225
226
227
228
229
230
231
# File 'lib/philiprehberger/checksum.rb', line 224

def self.file_crc32(path, format: :hex)
  validate_file!(path)
  value = 0
  File.open(path, 'rb') do |io|
    value = Zlib.crc32(io.read(CHUNK_SIZE), value) until io.eof?
  end
  format_crc32(value, format: format)
end

.file_digest(path, algo:, format: :hex) ⇒ String

Compute a checksum for a file using any supported algorithm

Parameters:

  • path (String)

    path to the file

  • algo (Symbol)

    algorithm (:md5, :sha1, :sha256, :sha512, :crc32)

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum

Raises:

  • (Error)

    if the file does not exist or the algorithm is unknown



256
257
258
259
260
261
262
263
# File 'lib/philiprehberger/checksum.rb', line 256

def self.file_digest(path, algo:, format: :hex)
  return file_crc32(path, format: format) if algo == :crc32

  klass = ALGORITHMS[algo]
  raise Error, "unknown algorithm: #{algo}" unless klass

  digest_file(klass, path, format: format)
end

.file_hmac(path, key:, algo: :sha256, format: :hex) ⇒ String

Compute an HMAC for a file using streaming reads

Parameters:

  • path (String)

    path to the file

  • key (String)

    the HMAC key

  • algo (Symbol) (defaults to: :sha256)

    algorithm (:sha1, :sha256, :sha512)

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the HMAC digest

Raises:

  • (Error)

    if the file does not exist or the algorithm is unknown



310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
# File 'lib/philiprehberger/checksum.rb', line 310

def self.file_hmac(path, key:, algo: :sha256, format: :hex)
  algo_name = HMAC_ALGORITHMS[algo]
  raise Error, "unknown HMAC algorithm: #{algo}" unless algo_name

  validate_file!(path)
  hmac = OpenSSL::HMAC.new(key, algo_name)
  File.open(path, 'rb') do |io|
    hmac.update(io.read(CHUNK_SIZE)) until io.eof?
  end

  case format
  when :hex then hmac.hexdigest
  when :base64 then Base64.strict_encode64(hmac.digest)
  else raise Error, "unknown format: #{format}"
  end
end

.file_md5(path, format: :hex) ⇒ String

Compute an MD5 checksum for a file using streaming reads

Parameters:

  • path (String)

    path to the file

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum

Raises:

  • (Error)

    if the file does not exist or is not readable



91
92
93
# File 'lib/philiprehberger/checksum.rb', line 91

def self.file_md5(path, format: :hex)
  digest_file(Digest::MD5, path, format: format)
end

.file_multi(path, *algos, format: :hex) ⇒ Hash<Symbol, String>

Compute multiple checksums for a file in a single read pass

Parameters:

  • path (String)

    path to the file

  • algos (Array<Symbol>)

    algorithms to compute (:md5, :sha256, :sha512, :crc32)

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (Hash<Symbol, String>)

    algorithm => checksum pairs

Raises:

  • (Error)

    if the file does not exist or an unknown algorithm is given



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/philiprehberger/checksum.rb', line 112

def self.file_multi(path, *algos, format: :hex)
  validate_file!(path)
  algos = algos.flatten
  raise Error, 'at least one algorithm is required' if algos.empty?

  digests = {}
  crc32_value = nil

  algos.each do |algo|
    if algo == :crc32
      crc32_value = 0
    elsif ALGORITHMS.key?(algo)
      digests[algo] = ALGORITHMS[algo].new
    else
      raise Error, "unknown algorithm: #{algo}"
    end
  end

  File.open(path, 'rb') do |io|
    while (chunk = io.read(CHUNK_SIZE))
      digests.each_value { |d| d.update(chunk) }
      crc32_value = Zlib.crc32(chunk, crc32_value) unless crc32_value.nil?
    end
  end

  result = {}
  algos.each do |algo|
    result[algo] = if algo == :crc32
                     format_crc32(crc32_value, format: format)
                   else
                     format_output(digests[algo], format: format)
                   end
  end
  result
end

.file_sha1(path, format: :hex) ⇒ String

Compute a SHA-1 checksum for a file using streaming reads

Parameters:

  • path (String)

    path to the file

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum

Raises:

  • (Error)

    if the file does not exist or is not readable



194
195
196
# File 'lib/philiprehberger/checksum.rb', line 194

def self.file_sha1(path, format: :hex)
  digest_file(Digest::SHA1, path, format: format)
end

.file_sha256(path, format: :hex) ⇒ String

Compute a SHA-256 checksum for a file using streaming reads

Parameters:

  • path (String)

    path to the file

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum

Raises:

  • (Error)

    if the file does not exist or is not readable



101
102
103
# File 'lib/philiprehberger/checksum.rb', line 101

def self.file_sha256(path, format: :hex)
  digest_file(Digest::SHA256, path, format: format)
end

.file_sha384(path, format: :hex) ⇒ String

Compute a SHA-384 checksum for a file using streaming reads

Parameters:

  • path (String)

    path to the file

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum

Raises:

  • (Error)

    if the file does not exist or is not readable



204
205
206
# File 'lib/philiprehberger/checksum.rb', line 204

def self.file_sha384(path, format: :hex)
  digest_file(Digest::SHA384, path, format: format)
end

.file_sha512(path, format: :hex) ⇒ String

Compute a SHA-512 checksum for a file using streaming reads

Parameters:

  • path (String)

    path to the file

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum

Raises:

  • (Error)

    if the file does not exist or is not readable



214
215
216
# File 'lib/philiprehberger/checksum.rb', line 214

def self.file_sha512(path, format: :hex)
  digest_file(Digest::SHA512, path, format: format)
end

.files(paths, algo: :sha256, format: :hex) ⇒ Hash<String, String>

Hash multiple files, returning a hash of { path => digest }

Parameters:

  • paths (Array<String>)

    file paths to hash

  • algo (Symbol) (defaults to: :sha256)

    algorithm (:md5, :sha1, :sha256, :sha512, :crc32)

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (Hash<String, String>)

    path => digest pairs

Raises:

  • (Error)

    if any file does not exist or an unknown algorithm is given



283
284
285
286
287
# File 'lib/philiprehberger/checksum.rb', line 283

def self.files(paths, algo: :sha256, format: :hex)
  paths.to_h do |path|
    [path, file_digest(path, algo: algo, format: format)]
  end
end

.hmac_sha1(string, key:, format: :hex) ⇒ String

Compute an HMAC-SHA1 for a string

Parameters:

  • string (String)

    the input string

  • key (String)

    the HMAC key

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the HMAC digest



154
155
156
# File 'lib/philiprehberger/checksum.rb', line 154

def self.hmac_sha1(string, key:, format: :hex)
  hmac_digest('SHA1', string, key, format: format)
end

.hmac_sha256(string, key:, format: :hex) ⇒ String

Compute an HMAC-SHA256 for a string

Parameters:

  • string (String)

    the input string

  • key (String)

    the HMAC key

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the HMAC digest



164
165
166
# File 'lib/philiprehberger/checksum.rb', line 164

def self.hmac_sha256(string, key:, format: :hex)
  hmac_digest('SHA256', string, key, format: format)
end

.hmac_sha384(string, key:, format: :hex) ⇒ String

Compute an HMAC-SHA384 for a string

Parameters:

  • string (String)

    the input string

  • key (String)

    the HMAC key

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the HMAC digest



174
175
176
# File 'lib/philiprehberger/checksum.rb', line 174

def self.hmac_sha384(string, key:, format: :hex)
  hmac_digest('SHA384', string, key, format: format)
end

.hmac_sha512(string, key:, format: :hex) ⇒ String

Compute an HMAC-SHA512 for a string

Parameters:

  • string (String)

    the input string

  • key (String)

    the HMAC key

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the HMAC digest



184
185
186
# File 'lib/philiprehberger/checksum.rb', line 184

def self.hmac_sha512(string, key:, format: :hex)
  hmac_digest('SHA512', string, key, format: format)
end

.md5(string, format: :hex) ⇒ String

Compute an MD5 checksum for a string

Parameters:

  • string (String)

    the input string

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum



35
36
37
# File 'lib/philiprehberger/checksum.rb', line 35

def self.md5(string, format: :hex)
  digest_string(Digest::MD5, string, format: format)
end

.sha1(string, format: :hex) ⇒ String

Compute a SHA-1 checksum for a string

Parameters:

  • string (String)

    the input string

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum



44
45
46
# File 'lib/philiprehberger/checksum.rb', line 44

def self.sha1(string, format: :hex)
  digest_string(Digest::SHA1, string, format: format)
end

.sha256(string, format: :hex) ⇒ String

Compute a SHA-256 checksum for a string

Parameters:

  • string (String)

    the input string

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum



53
54
55
# File 'lib/philiprehberger/checksum.rb', line 53

def self.sha256(string, format: :hex)
  digest_string(Digest::SHA256, string, format: format)
end

.sha384(string, format: :hex) ⇒ String

Compute a SHA-384 checksum for a string

Parameters:

  • string (String)

    the input string

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum



62
63
64
# File 'lib/philiprehberger/checksum.rb', line 62

def self.sha384(string, format: :hex)
  digest_string(Digest::SHA384, string, format: format)
end

.sha512(string, format: :hex) ⇒ String

Compute a SHA-512 checksum for a string

Parameters:

  • string (String)

    the input string

  • format (Symbol) (defaults to: :hex)

    output format (:hex or :base64)

Returns:

  • (String)

    the checksum



71
72
73
# File 'lib/philiprehberger/checksum.rb', line 71

def self.sha512(string, format: :hex)
  digest_string(Digest::SHA512, string, format: format)
end

.verify?(path, format: :hex, **expected) ⇒ Boolean

Verify a file’s checksum against expected values

Parameters:

  • path (String)

    path to the file

  • format (Symbol) (defaults to: :hex)

    output format used for expected values

  • expected (Hash<Symbol, String>)

    algorithm => expected checksum pairs

Returns:

  • (Boolean)

    true if all checksums match

Raises:

  • (Error)

    if the file does not exist or an unknown algorithm is given



349
350
351
352
353
354
355
356
# File 'lib/philiprehberger/checksum.rb', line 349

def self.verify?(path, format: :hex, **expected)
  raise Error, 'at least one expected checksum is required' if expected.empty?

  actual = file_multi(path, *expected.keys, format: format)
  expected.all? do |algo, expected_value|
    secure_compare(actual[algo], expected_value)
  end
end

.verify_hmac?(string, expected, key:, algo: :sha256) ⇒ Boolean

Verify an HMAC with timing-safe comparison

Parameters:

  • string (String)

    the input string

  • expected (String)

    the expected HMAC hex digest

  • key (String)

    the HMAC key

  • algo (Symbol) (defaults to: :sha256)

    algorithm (:sha1, :sha256, or :sha512)

Returns:

  • (Boolean)

    true if the HMAC matches

Raises:



334
335
336
337
338
339
340
# File 'lib/philiprehberger/checksum.rb', line 334

def self.verify_hmac?(string, expected, key:, algo: :sha256)
  algo_name = HMAC_ALGORITHMS[algo]
  raise Error, "unknown HMAC algorithm: #{algo}" unless algo_name

  actual = hmac_digest(algo_name, string, key, format: :hex)
  secure_compare(actual, expected)
end

.verify_string?(string, expected, algo: :sha256, format: :hex) ⇒ Boolean

Verify a string’s checksum with timing-safe comparison

Parameters:

  • string (String)

    the input string

  • expected (String)

    the expected checksum

  • algo (Symbol) (defaults to: :sha256)

    algorithm (:md5, :sha1, :sha256, :sha512, :crc32)

  • format (Symbol) (defaults to: :hex)

    output format used for the expected value (:hex or :base64)

Returns:

  • (Boolean)

    true if the computed checksum matches

Raises:

  • (Error)

    if the algorithm is unknown



297
298
299
300
# File 'lib/philiprehberger/checksum.rb', line 297

def self.verify_string?(string, expected, algo: :sha256, format: :hex)
  actual = digest(string, algo: algo, format: format)
  secure_compare(actual, expected)
end