Module: HEITT::Analyzer
- Defined in:
- lib/heitt.rb
Class Method Summary collapse
- .algorithm_scores(keyword_counts, database: HEITT::DATABASE) ⇒ Object
- .analyze(text, database: HEITT::DATABASE) ⇒ Object
- .assign_confidence(scores_hash, prefix_matched_mode = nil) ⇒ Object
-
.entropy(text) ⇒ Object
this code is an inspiration of “github.com/chrisjchandler/entropy/blob/main/entropy.go”.
- .extract_prefix(text, offset) ⇒ Object
- .get_modes(entry) ⇒ Object
- .high_entropy?(text, min_ent) ⇒ Boolean
- .keyword_counts(content_lower, database: HEITT::DATABASE) ⇒ Object
- .prefix_match?(mode, delim_prefix) ⇒ Boolean
- .score_candidates(modes, delim_prefix, context_scores) ⇒ Object
Class Method Details
.algorithm_scores(keyword_counts, database: HEITT::DATABASE) ⇒ Object
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
# File 'lib/heitt.rb', line 130 def self.algorithm_scores(keyword_counts, database: HEITT::DATABASE) scores = {} return scores if keyword_counts.nil? database.each do |entry| modes = get_modes(entry) next unless modes modes.each do |mode| contexts = mode[:context] || [] next if contexts.empty? total = contexts.sum {|kw| keyword_counts[kw.downcase] || 0} scores[mode[:name]] = total if total > 0 end end scores end |
.analyze(text, database: HEITT::DATABASE) ⇒ Object
45 46 47 48 |
# File 'lib/heitt.rb', line 45 def self.analyze(text, database: HEITT::DATABASE) keyword_counts = keyword_counts(text.downcase, database: database) algorithm_scores(keyword_counts, database: database) end |
.assign_confidence(scores_hash, prefix_matched_mode = nil) ⇒ Object
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
# File 'lib/heitt.rb', line 158 def self.assign_confidence(scores_hash, prefix_matched_mode=nil) all_scores = scores_hash.values return {} if all_scores.empty? avg_score = all_scores.sum.to_f / all_scores.size scores_hash.transform_values do |score| if score == 0 "regex-match" else mode_name = scores_hash.key(score) is_prefix_mode = (prefix_matched_mode == mode_name) deviation = (score - avg_score) / avg_score case deviation when 2.0..Float::INFINITY "high" when 0.5..2.0 is_prefix_mode ? "high" : "medium-high" else is_prefix_mode ? "medium-high" : "medium-low" end end end end |
.entropy(text) ⇒ Object
this code is an inspiration of “github.com/chrisjchandler/entropy/blob/main/entropy.go”
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/heitt.rb', line 98 def self.entropy(text) frequency = Hash.new(0) text.each_char { |ch| frequency[ch] += 1 } #calculate the total number of characters total = text.length.to_f #caluclate entropy entropy = 0.0 frequency.each_value do |count| probability = count.to_f / total entropy += probability * Math.log2(probability) end #negate the sum as entropy is positive -entropy end |
.extract_prefix(text, offset) ⇒ Object
50 51 52 53 |
# File 'lib/heitt.rb', line 50 def self.extract_prefix(text, offset) line_start = text.rindex("\n", offset) || 0 text[line_start...offset] end |
.get_modes(entry) ⇒ Object
92 93 94 95 |
# File 'lib/heitt.rb', line 92 def self.get_modes(entry) entry[:modes] || entry[:algorithms] || entry[:hashes] || entry[:candidates] || entry[:types] || entry[:hashtypes] end |
.high_entropy?(text, min_ent) ⇒ Boolean
55 56 57 |
# File 'lib/heitt.rb', line 55 def self.high_entropy?(text, min_ent) entropy(text) >= min_ent end |
.keyword_counts(content_lower, database: HEITT::DATABASE) ⇒ Object
114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
# File 'lib/heitt.rb', line 114 def self.keyword_counts(content_lower, database: HEITT::DATABASE) keywords = database.flat_map do |entry| modes = get_modes(entry) next [] unless modes modes.flat_map {|mode| mode[:context] || []} end.uniq.map(&:downcase) counts = {} keywords.each do |kw| count = content_lower.scan(/\b#{Regexp.escape(kw)}\b/).size counts[kw] = count if count > 0 end counts end |
.prefix_match?(mode, delim_prefix) ⇒ Boolean
148 149 150 151 152 153 154 155 |
# File 'lib/heitt.rb', line 148 def self.prefix_match?(mode, delim_prefix) prefixes = mode[:prefixes] || [] return false if prefixes.empty? delimiters = "= : " raw_prefix = delim_prefix.strip.split(/[#{Regexp.escape(delimiters)}]/).last&.strip&.downcase prefixes.map(&:downcase).include?(raw_prefix) end |
.score_candidates(modes, delim_prefix, context_scores) ⇒ Object
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
# File 'lib/heitt.rb', line 60 def self.score_candidates(modes, delim_prefix, context_scores) prefix_matched_mode = nil #context based scoring matches = modes.map do |mode| score_data = context_scores[mode[:name]] score = score_data || 0 if prefix_match?(mode, delim_prefix) #boost score as confidence is high if prefix matched prefix_matched_mode = mode[:name] score += 20 end { name: mode[:name], hashcat: mode[:hashcat], john: mode[:john], description: mode[:description], extended: mode[:extended], score: score } end return [] if matches.empty? #calculate confidence scores_hash = matches.map {|m| [m[:name], m[:score]]}.to_h confidences = assign_confidence(scores_hash, prefix_matched_mode) matches.map{|m| m.merge(confidence: confidences[m[:name]])}.sort_by {|m| -m[:score]} end |