Module: HEITT::Analyzer

Defined in:
lib/heitt.rb

Class Method Summary collapse

Class Method Details

.algorithm_scores(keyword_counts, database: HEITT::DATABASE) ⇒ Object



130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/heitt.rb', line 130

def self.algorithm_scores(keyword_counts, database: HEITT::DATABASE)
  scores = {}
  return scores if keyword_counts.nil?

  database.each do |entry|
    modes = get_modes(entry)
    next unless modes
    modes.each do |mode|
      contexts = mode[:context] || []
      next if contexts.empty?
      total = contexts.sum {|kw| keyword_counts[kw.downcase] || 0}
      scores[mode[:name]] = total if total > 0
    end
  end
  scores
end

.analyze(text, database: HEITT::DATABASE) ⇒ Object



45
46
47
48
# File 'lib/heitt.rb', line 45

def self.analyze(text, database: HEITT::DATABASE)
  keyword_counts = keyword_counts(text.downcase, database: database)
  algorithm_scores(keyword_counts, database: database)
end

.assign_confidence(scores_hash, prefix_matched_mode = nil) ⇒ Object



158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/heitt.rb', line 158

def self.assign_confidence(scores_hash,  prefix_matched_mode=nil)
  all_scores = scores_hash.values

  return {} if all_scores.empty?

  avg_score = all_scores.sum.to_f / all_scores.size
 
  scores_hash.transform_values do |score|
    if score == 0
      "regex-match"
    else
      mode_name = scores_hash.key(score)
      is_prefix_mode = (prefix_matched_mode == mode_name)
      deviation = (score - avg_score) / avg_score

      case deviation
      when 2.0..Float::INFINITY
        "high"
      when 0.5..2.0
        is_prefix_mode ? "high" : "medium-high"
      else
        is_prefix_mode ? "medium-high" : "medium-low"
      end
    end 
  end
end

.entropy(text) ⇒ Object

this code is an inspiration of “github.com/chrisjchandler/entropy/blob/main/entropy.go



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/heitt.rb', line 98

def self.entropy(text)
  frequency = Hash.new(0)
  text.each_char { |ch| frequency[ch] += 1 }

  #calculate the total number of characters
  total = text.length.to_f
  #caluclate entropy
  entropy = 0.0
  frequency.each_value do |count|
    probability = count.to_f / total
    entropy += probability * Math.log2(probability)
  end
  #negate the sum as entropy is positive
  -entropy
end

.extract_prefix(text, offset) ⇒ Object



50
51
52
53
# File 'lib/heitt.rb', line 50

def self.extract_prefix(text, offset)
  line_start = text.rindex("\n", offset) || 0
  text[line_start...offset]
end

.get_modes(entry) ⇒ Object



92
93
94
95
# File 'lib/heitt.rb', line 92

def self.get_modes(entry)
  entry[:modes] || entry[:algorithms] || entry[:hashes] || 
  entry[:candidates] || entry[:types] || entry[:hashtypes]
end

.high_entropy?(text, min_ent) ⇒ Boolean

Returns:

  • (Boolean)


55
56
57
# File 'lib/heitt.rb', line 55

def self.high_entropy?(text, min_ent)
  entropy(text) >= min_ent
end

.keyword_counts(content_lower, database: HEITT::DATABASE) ⇒ Object



114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/heitt.rb', line 114

def self.keyword_counts(content_lower, database: HEITT::DATABASE)
  keywords = database.flat_map do |entry|
    modes = get_modes(entry)
    next [] unless modes
    modes.flat_map {|mode| mode[:context] || []}
  end.uniq.map(&:downcase)

  counts = {}
  keywords.each do |kw|
    count = content_lower.scan(/\b#{Regexp.escape(kw)}\b/).size
    counts[kw] = count if count > 0
  end
  counts
end

.prefix_match?(mode, delim_prefix) ⇒ Boolean

Returns:

  • (Boolean)


148
149
150
151
152
153
154
155
# File 'lib/heitt.rb', line 148

def self.prefix_match?(mode, delim_prefix)
  prefixes = mode[:prefixes] || []
  return false if prefixes.empty?

  delimiters =  "= : "
  raw_prefix = delim_prefix.strip.split(/[#{Regexp.escape(delimiters)}]/).last&.strip&.downcase
  prefixes.map(&:downcase).include?(raw_prefix)
end

.score_candidates(modes, delim_prefix, context_scores) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/heitt.rb', line 60

def self.score_candidates(modes, delim_prefix, context_scores)
  prefix_matched_mode = nil
  #context based scoring
  matches = modes.map do |mode|
    score_data = context_scores[mode[:name]]
    score = score_data || 0

    if prefix_match?(mode, delim_prefix)
      #boost score as confidence is high if prefix matched
      prefix_matched_mode = mode[:name]
      score += 20
    end
    {
      name: mode[:name],
      hashcat: mode[:hashcat],
      john: mode[:john],
      description: mode[:description],
      extended: mode[:extended],
      score: score
    }
  end 
  return [] if matches.empty?

  #calculate confidence
  scores_hash = matches.map {|m| [m[:name], m[:score]]}.to_h 
  
  confidences = assign_confidence(scores_hash, prefix_matched_mode)
  matches.map{|m| m.merge(confidence: confidences[m[:name]])}.sort_by {|m| -m[:score]}
end