Class: Licensee::Matchers::Dice

Inherits:
Matcher
  • Object
show all
Defined in:
lib/licensee/matchers/dice.rb

Overview

Similarity matcher based on the Dice coefficient over wordsets.

Constant Summary

Constants inherited from Matcher

Matcher::HASH_METHODS

Instance Attribute Summary

Attributes inherited from Matcher

#file

Instance Method Summary collapse

Methods inherited from Matcher

#initialize, #name

Methods included from HashHelper

#serialize_hash_value, #to_h

Constructor Details

This class inherits a constructor from Licensee::Matchers::Matcher

Instance Method Details

#confidenceObject

Confidence that the matched license is a match



53
54
55
# File 'lib/licensee/matchers/dice.rb', line 53

def confidence
  @confidence ||= match ? match.similarity(file) : 0
end

#matchObject

Return the first potential license that is more similar than the confidence threshold



9
10
11
12
13
14
15
# File 'lib/licensee/matchers/dice.rb', line 9

def match
  @match ||= if matches.empty?
               nil
             else
               matches.first[0]
             end
end

#matchesObject



45
46
47
48
49
50
# File 'lib/licensee/matchers/dice.rb', line 45

def matches
  @matches ||= matches_by_similarity.select do |license, similarity|
    similarity >= minimum_confidence &&
      license.bigram_similarity(file) >= minimum_bigram_confidence
  end
end

#matches_by_similarityObject Also known as: licenses_by_similarity



35
36
37
38
39
40
41
42
# File 'lib/licensee/matchers/dice.rb', line 35

def matches_by_similarity
  @matches_by_similarity ||= begin
    matches = potential_matches.map do |potential_match|
      [potential_match, potential_match.similarity(file)]
    end
    matches.sort_by { |_, similarity| similarity }.reverse
  end
end

#potential_matchesObject Also known as: potential_licenses

Licenses that may be a match for this file. To avoid false positives:

  1. Creative commons licenses cannot be matched against license files that begin with the title of a non-open source CC license variant

  2. The percentage change in file length may not exceed the inverse of the confidence threshold



24
25
26
27
28
29
30
31
32
# File 'lib/licensee/matchers/dice.rb', line 24

def potential_matches
  @potential_matches ||= super.select do |license|
    if license.creative_commons? && file.potential_false_positive?
      false
    else
      license.wordset
    end
  end
end