Module: HEITT::Analyzer

Defined in:
lib/heitt/analyzer.rb

Class Method Summary collapse

Class Method Details

.algorithm_scores(keyword_counts, profiles: HEITT::PROFILES) ⇒ Object

database: HEITT::DATABASE)



97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/heitt/analyzer.rb', line 97

def self.algorithm_scores(keyword_counts, profiles: HEITT::PROFILES)#database: HEITT::DATABASE)
  scores = {}
  return scores if keyword_counts.nil?

  profiles.each do |name, profile| #database.each do |entry|
    context = profile[:context] || []
    next if context.empty?
    #modes = get_modes(entry)
    #next unless modes
    #modes.each do |mode|
    #  contexts = mode[:context] || []
    #  next if contexts.empty?
    total = context.sum {|kw| keyword_counts[kw.downcase] || 0}
    scores[name] = total if total > 0
  #  end
  end
  scores
end

.analyze(text, profiles: HEITT::PROFILES) ⇒ Object

database: HEITT::DATABASE)



6
7
8
9
10
11
# File 'lib/heitt/analyzer.rb', line 6

def self.analyze(text, profiles: HEITT::PROFILES)#database: HEITT::DATABASE)
  HEITT::Logger.debug("Counting keywords...")
  keyword_counts = keyword_counts(text.downcase, profiles: profiles)
  HEITT::Logger.debug("Counted keywords: #{keyword_counts}")
  algorithm_scores(keyword_counts, profiles: profiles)
end

.assign_confidence(scores_hash, prefix_matched_mode = nil) ⇒ Object



128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/heitt/analyzer.rb', line 128

def self.assign_confidence(scores_hash,  prefix_matched_mode=nil)
  all_scores = scores_hash.values

  return {} if all_scores.empty?

  avg_score = all_scores.sum.to_f / all_scores.size
 
  scores_hash.transform_values do |score|
    if score == 0
      "regex-match"
    else
      mode_name = scores_hash.key(score)
      is_prefix_mode = (prefix_matched_mode == mode_name)
      deviation = (score - avg_score) / avg_score

      case deviation
      when 2.0..Float::INFINITY
        "high"
      when 0.5..2.0
        is_prefix_mode ? "high" : "medium-high"
      else
        is_prefix_mode ? "medium-high" : "medium-low"
      end
    end 
  end
end

.entropy(text) ⇒ Object

this code is an inspiration of “github.com/chrisjchandler/entropy/blob/main/entropy.go



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/heitt/analyzer.rb', line 64

def self.entropy(text)
  frequency = Hash.new(0)
  text.each_char { |ch| frequency[ch] += 1 }

  #calculate the total number of characters
  total = text.length.to_f
  #caluclate entropy
  entropy = 0.0
  frequency.each_value do |count|
    probability = count.to_f / total
    entropy += probability * Math.log2(probability)
  end
  #negate the sum as entropy is positive
  -entropy
end

.extract_prefix(text, offset) ⇒ Object



13
14
15
16
# File 'lib/heitt/analyzer.rb', line 13

def self.extract_prefix(text, offset)
  line_start = text.rindex("\n", offset) || 0
  text[line_start...offset]
end

.high_entropy?(text, min_ent) ⇒ Boolean

Returns:

  • (Boolean)


18
19
20
# File 'lib/heitt/analyzer.rb', line 18

def self.high_entropy?(text, min_ent)
  entropy(text) >= min_ent
end

.keyword_counts(content_lower, profiles: HEITT::PROFILES) ⇒ Object

HEITT::DATABASE)



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/heitt/analyzer.rb', line 80

def self.keyword_counts(content_lower, profiles: HEITT::PROFILES) #HEITT::DATABASE)
  keywords = profiles.values.flat_map { |p| p[:context] || [] }.uniq.map(&:downcase)
  #database.flat_map do |entry|
    #modes = get_modes(entry)
    #next [] unless modes
    #modes.flat_map {|mode| HEITT::PROFILES[mode[:name]][:context] || []}
  #end#.uniq.map(&:downcase)

  counts = {}
  keywords.each do |kw|
    count = content_lower.scan(/\b#{Regexp.escape(kw)}\b/).size
    counts[kw] = count if count > 0
  end
  counts
end

.prefix_match?(profile, delim_prefix) ⇒ Boolean

Returns:

  • (Boolean)


117
118
119
120
121
122
123
124
125
# File 'lib/heitt/analyzer.rb', line 117

def self.prefix_match?(profile, delim_prefix)
  #prefixes = mode[:prefixes] || []
  prefixes = profile[:prefixes] || []
  return false if prefixes.empty?

  delimiters =  "= : "
  raw_prefix = delim_prefix.strip.split(/[#{Regexp.escape(delimiters)}]/).last&.strip&.downcase
  prefixes.map(&:downcase).include?(raw_prefix)
end

.score_candidates(modes, delim_prefix, context_scores, profiles: HEITT::PROFILES) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/heitt/analyzer.rb', line 23

def self.score_candidates(modes, delim_prefix, context_scores, profiles: HEITT::PROFILES)
  prefix_matched_mode = nil
  #context based scoring
  matches = modes.map do |mode|
    profile = profiles[mode[:name]] || {}
    score = context_scores[mode[:name]] || 0
    #score = score_data || 0

    if prefix_match?(profile, delim_prefix)
      #boost score as confidence is high if prefix matched
      prefix_matched_mode = mode[:name]
      score += 20
    end
    {
      name: mode[:name],
      hashcat: mode[:hashcat],
      john: mode[:john],
      extended: mode[:extended],
      description: profile[:description],
      score: score
    }
  end 
  return [] if matches.empty?

  #calculate confidence
  scores_hash = matches.map {|m| [m[:name], m[:score]]}.to_h 
  
  confidences = assign_confidence(scores_hash, prefix_matched_mode)
  scored_candidates = matches.map{|m| m.merge(confidence: confidences[m[:name]])}.sort_by {|m| -m[:score]}
  HEITT::Logger.debug("Scored Algorithm: #{scored_candidates.map{|s| s[:name] }}  =>   Calculated Confidence: #{scored_candidates.map{|s| s[:confidence] }}")
  scored_candidates
end