Class: Licensee::ProjectFiles::LicenseFile

Inherits:
ProjectFile
  • Object
show all
Includes:
ContentHelper
Defined in:
lib/licensee/project_files/license_file.rb

Overview

A project file that contains the license text (e.g., LICENSE, COPYING).

Direct Known Subclasses

ReadmeFile

Constant Summary collapse

PREFERRED_EXT =

List of extensions to give preference to

%w[md markdown txt html].freeze
PREFERRED_EXT_REGEX =
/\.#{Regexp.union(PREFERRED_EXT)}\z/
LICENSE_EXT_REGEX =

Regex to match any extension and periods in version numbers except .spdx or .header

%r{\.(?!spdx|header)([^./]|\.\d)+\z}i
OTHER_EXT_REGEX =

Regex to match any extension and periods in version numbers except a few unlikely as license texts with complex filenames

%r{\.(?!xml|sh|go|gemspec)([^./]|\.\d)+\z}i
ANY_EXT_REGEX =

Regex to match any extension and periods in version numbers

%r{\.([^./]|\.\d)+\z}i
LICENSE_REGEX =

Regex to match, LICENSE, LICENCE, unlicense, etc.

/(un)?licen[sc]e/i
COPYING_REGEX =

Regex to match COPYING

/copying/i
/copyright/i
OFL_REGEX =

Regex to match OFL.

/ofl/i
PATENTS_REGEX =

BSD + PATENTS patent file

/patents/i
FILENAME_REGEXES =

Hash of Regex => score with which to score potential license files

{
  /\A#{LICENSE_REGEX}\z/                                => 1.00,  # LICENSE
  /\A#{LICENSE_REGEX}#{PREFERRED_EXT_REGEX}\z/          => 0.95,  # LICENSE.md
  /\A#{COPYING_REGEX}\z/                                => 0.90,  # COPYING
  /\A#{COPYING_REGEX}#{PREFERRED_EXT_REGEX}\z/          => 0.85,  # COPYING.md
  /\A#{LICENSE_REGEX}#{LICENSE_EXT_REGEX}\z/            => 0.80,  # LICENSE.textile
  /\A#{COPYING_REGEX}#{ANY_EXT_REGEX}\z/                => 0.75,  # COPYING.textile
  /\A#{LICENSE_REGEX}[-_][^.]*#{OTHER_EXT_REGEX}?\z/    => 0.70,  # LICENSE-MIT
  /\A#{COPYING_REGEX}[-_][^.]*#{OTHER_EXT_REGEX}?\z/    => 0.65,  # COPYING-MIT
  /\A\w+[-_]#{LICENSE_REGEX}[^.]*#{OTHER_EXT_REGEX}?\z/ => 0.60,  # MIT-LICENSE-MIT
  /\A\w+[-_]#{COPYING_REGEX}[^.]*#{OTHER_EXT_REGEX}?\z/ => 0.55,  # MIT-COPYING
  /\A#{OFL_REGEX}#{PREFERRED_EXT_REGEX}/                => 0.50,  # OFL.md
  /\A#{OFL_REGEX}#{OTHER_EXT_REGEX}/                    => 0.45,  # OFL.textile
  /\A#{OFL_REGEX}\z/                                    => 0.40,  # OFL
  /\A#{COPYRIGHT_REGEX}\z/                              => 0.35,  # COPYRIGHT
  /\A#{COPYRIGHT_REGEX}#{PREFERRED_EXT_REGEX}\z/        => 0.30,  # COPYRIGHT.txt
  /\A#{COPYRIGHT_REGEX}#{OTHER_EXT_REGEX}\z/            => 0.25,  # COPYRIGHT.textile
  /\A#{COPYRIGHT_REGEX}[-_][^.]*#{OTHER_EXT_REGEX}?\z/  => 0.20,  # COPYRIGHT-MIT
  /\A#{PATENTS_REGEX}\z/                                => 0.15,  # PATENTS
  /\A#{PATENTS_REGEX}#{OTHER_EXT_REGEX}\z/              => 0.10,  # PATENTS.txt
  //                                                    => 0.00   # Catch all
}.freeze
CC_FALSE_POSITIVE_REGEX =

CC-NC and CC-ND are not open source licenses and should not be detected as CC-BY or CC-BY-SA which are 98%+ similar

/
  ^(creative\ commons\ )?Attribution-(NonCommercial|NoDerivatives)
/xi
LICENSES_FILENAME_REGEX =

SPDX license IDs allow letters/numbers with dashes/dots; also allow LicenseRef-* per the spec. Require a standard text-ish extension.

/
  \A
  (?:
    LicenseRef-[A-Za-z0-9](?:[-A-Za-z0-9.]*[A-Za-z0-9])?
    |
    [A-Za-z0-9](?:[A-Za-z0-9.-]*[A-Za-z0-9])?
  )
  #{PREFERRED_EXT_REGEX}
  \z
/ix

Constants included from ContentHelper::Constants

ContentHelper::Constants::DIGEST, ContentHelper::Constants::END_OF_TERMS_REGEX, ContentHelper::Constants::NORMALIZATIONS, ContentHelper::Constants::REGEXES, ContentHelper::Constants::START_REGEX, ContentHelper::Constants::STRIP_METHODS, ContentHelper::Constants::VARIETAL_WORDS

Constants inherited from ProjectFile

ProjectFile::ENCODING, ProjectFile::ENCODING_OPTIONS, ProjectFile::HASH_METHODS

Instance Attribute Summary

Attributes inherited from ProjectFile

#content

Class Method Summary collapse

Instance Method Summary collapse

Methods included from ContentHelper

#bigrams, const_missing, #content_hash, format_percent, #length, #length_delta, normalize_for_wrapping, title_regex, #wordset, wrap, wrap_line, wrap_lines

Methods included from ContentHelper::SimilarityMethods

#bigram_similarity, #similarity

Methods included from ContentHelper::NormalizationMethods

#content_normalized, #content_without_title_and_version, #normalize_content

Methods inherited from ProjectFile

#confidence, #content_hash, #content_normalized, #copyright?, #directory, #filename, #initialize, #matched_license, #matcher, #path, #path_relative_to_root

Methods included from HashHelper

#serialize_hash_value, #to_h

Constructor Details

This class inherits a constructor from Licensee::ProjectFiles::ProjectFile

Class Method Details

.lesser_gpl_score(filename) ⇒ Object

case-insensitive block to determine if the given file is LICENSE.lesser



130
131
132
# File 'lib/licensee/project_files/license_file.rb', line 130

def self.lesser_gpl_score(filename)
  filename.casecmp('copying.lesser').zero? ? 1 : 0
end

.name_score(dir, filename = nil) ⇒ Object



114
115
116
117
118
119
120
# File 'lib/licensee/project_files/license_file.rb', line 114

def self.name_score(dir, filename = nil)
  dir, filename = normalize_name_score_args(dir, filename)
  return 0.0 unless filename
  return FILENAME_REGEXES.find { |regex, _| filename.match? regex }[1] unless dir == 'LICENSES'

  filename.match?(LICENSES_FILENAME_REGEX) ? 1.0 : 0.0
end

Instance Method Details

#attributionObject



85
86
87
88
89
90
91
# File 'lib/licensee/project_files/license_file.rb', line 85

def attribution
  @attribution ||= if copyright? || license.content&.include?('[fullname]')
                     matches = Matchers::Copyright::REGEX
                               .match(content_without_title_and_version)
                     matches[0] if matches
                   end
end

#gpl?Boolean

Returns:

  • (Boolean)


102
103
104
# File 'lib/licensee/project_files/license_file.rb', line 102

def gpl?
  license&.gpl?
end

#lgpl?Boolean

Returns:

  • (Boolean)


98
99
100
# File 'lib/licensee/project_files/license_file.rb', line 98

def lgpl?
  LicenseFile.lesser_gpl_score(filename) == 1 && license&.lgpl?
end

#licenseObject



106
107
108
109
110
111
112
# File 'lib/licensee/project_files/license_file.rb', line 106

def license
  if matcher&.match
    matcher.match
  else
    License.find('other')
  end
end

#possible_matchersObject



81
82
83
# File 'lib/licensee/project_files/license_file.rb', line 81

def possible_matchers
  [Matchers::Copyright, Matchers::Exact, Matchers::Dice]
end

#potential_false_positive?Boolean

Is this file likely to result in a creative commons false positive?

Returns:

  • (Boolean)


94
95
96
# File 'lib/licensee/project_files/license_file.rb', line 94

def potential_false_positive?
  content.strip =~ CC_FALSE_POSITIVE_REGEX
end