Class: StorageGuardian::DuplicateDetector

Inherits:
Object
  • Object
show all
Defined in:
lib/storage_guardian/duplicate_detector.rb

Instance Method Summary collapse

Constructor Details

#initialize(entries) ⇒ DuplicateDetector

Returns a new instance of DuplicateDetector.



7
8
9
# File 'lib/storage_guardian/duplicate_detector.rb', line 7

def initialize(entries)
  @entries = entries
end

Instance Method Details

#detectObject

Returns groups of entries that share the same content hash



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/storage_guardian/duplicate_detector.rb', line 12

def detect
  size_groups = @entries.group_by { |e| e.size }
  size_groups.reject! { |size, _| size == 0 }

  groups = []
  size_groups.each_value do |group|
    next if group.size < 2

    hashed = group.map { |e| [e, compute_hash(e.path)] }
    hash_groups = hashed.group_by { |_, h| h }

    hash_groups.each_value do |hash_group|
      next if hash_group.size < 2
      groups << hash_group.map { |entry, _| entry }
    end
  end
  groups
end