Module: HEITT::Analyzer
- Defined in:
- lib/heitt/analyzer.rb
Class Method Summary collapse
-
.algorithm_scores(keyword_counts, profiles: HEITT::PROFILES) ⇒ Object
database: HEITT::DATABASE).
-
.analyze(text, profiles: HEITT::PROFILES) ⇒ Object
database: HEITT::DATABASE).
- .assign_confidence(scores_hash, prefix_matched_mode = nil) ⇒ Object
-
.entropy(text) ⇒ Object
this code is an inspiration of “github.com/chrisjchandler/entropy/blob/main/entropy.go”.
- .extract_prefix(text, offset) ⇒ Object
- .high_entropy?(text, min_ent) ⇒ Boolean
-
.keyword_counts(content_lower, profiles: HEITT::PROFILES) ⇒ Object
HEITT::DATABASE).
- .prefix_match?(profile, delim_prefix) ⇒ Boolean
- .score_candidates(modes, delim_prefix, context_scores, profiles: HEITT::PROFILES) ⇒ Object
Class Method Details
.algorithm_scores(keyword_counts, profiles: HEITT::PROFILES) ⇒ Object
database: HEITT::DATABASE)
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# File 'lib/heitt/analyzer.rb', line 97 def self.algorithm_scores(keyword_counts, profiles: HEITT::PROFILES)#database: HEITT::DATABASE) scores = {} return scores if keyword_counts.nil? profiles.each do |name, profile| #database.each do |entry| context = profile[:context] || [] next if context.empty? #modes = get_modes(entry) #next unless modes #modes.each do |mode| # contexts = mode[:context] || [] # next if contexts.empty? total = context.sum {|kw| keyword_counts[kw.downcase] || 0} scores[name] = total if total > 0 # end end scores end |
.analyze(text, profiles: HEITT::PROFILES) ⇒ Object
database: HEITT::DATABASE)
6 7 8 9 10 11 |
# File 'lib/heitt/analyzer.rb', line 6 def self.analyze(text, profiles: HEITT::PROFILES)#database: HEITT::DATABASE) HEITT::Logger.debug("Counting keywords...") keyword_counts = keyword_counts(text.downcase, profiles: profiles) HEITT::Logger.debug("Counted keywords: #{keyword_counts}") algorithm_scores(keyword_counts, profiles: profiles) end |
.assign_confidence(scores_hash, prefix_matched_mode = nil) ⇒ Object
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# File 'lib/heitt/analyzer.rb', line 128 def self.assign_confidence(scores_hash, prefix_matched_mode=nil) all_scores = scores_hash.values return {} if all_scores.empty? avg_score = all_scores.sum.to_f / all_scores.size scores_hash.transform_values do |score| if score == 0 "regex-match" else mode_name = scores_hash.key(score) is_prefix_mode = (prefix_matched_mode == mode_name) deviation = (score - avg_score) / avg_score case deviation when 2.0..Float::INFINITY "high" when 0.5..2.0 is_prefix_mode ? "high" : "medium-high" else is_prefix_mode ? "medium-high" : "medium-low" end end end end |
.entropy(text) ⇒ Object
this code is an inspiration of “github.com/chrisjchandler/entropy/blob/main/entropy.go”
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/heitt/analyzer.rb', line 64 def self.entropy(text) frequency = Hash.new(0) text.each_char { |ch| frequency[ch] += 1 } #calculate the total number of characters total = text.length.to_f #caluclate entropy entropy = 0.0 frequency.each_value do |count| probability = count.to_f / total entropy += probability * Math.log2(probability) end #negate the sum as entropy is positive -entropy end |
.extract_prefix(text, offset) ⇒ Object
13 14 15 16 |
# File 'lib/heitt/analyzer.rb', line 13 def self.extract_prefix(text, offset) line_start = text.rindex("\n", offset) || 0 text[line_start...offset] end |
.high_entropy?(text, min_ent) ⇒ Boolean
18 19 20 |
# File 'lib/heitt/analyzer.rb', line 18 def self.high_entropy?(text, min_ent) entropy(text) >= min_ent end |
.keyword_counts(content_lower, profiles: HEITT::PROFILES) ⇒ Object
HEITT::DATABASE)
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
# File 'lib/heitt/analyzer.rb', line 80 def self.keyword_counts(content_lower, profiles: HEITT::PROFILES) #HEITT::DATABASE) keywords = profiles.values.flat_map { |p| p[:context] || [] }.uniq.map(&:downcase) #database.flat_map do |entry| #modes = get_modes(entry) #next [] unless modes #modes.flat_map {|mode| HEITT::PROFILES[mode[:name]][:context] || []} #end#.uniq.map(&:downcase) counts = {} keywords.each do |kw| count = content_lower.scan(/\b#{Regexp.escape(kw)}\b/).size counts[kw] = count if count > 0 end counts end |
.prefix_match?(profile, delim_prefix) ⇒ Boolean
117 118 119 120 121 122 123 124 125 |
# File 'lib/heitt/analyzer.rb', line 117 def self.prefix_match?(profile, delim_prefix) #prefixes = mode[:prefixes] || [] prefixes = profile[:prefixes] || [] return false if prefixes.empty? delimiters = "= : " raw_prefix = delim_prefix.strip.split(/[#{Regexp.escape(delimiters)}]/).last&.strip&.downcase prefixes.map(&:downcase).include?(raw_prefix) end |
.score_candidates(modes, delim_prefix, context_scores, profiles: HEITT::PROFILES) ⇒ Object
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/heitt/analyzer.rb', line 23 def self.score_candidates(modes, delim_prefix, context_scores, profiles: HEITT::PROFILES) prefix_matched_mode = nil #context based scoring matches = modes.map do |mode| profile = profiles[mode[:name]] || {} score = context_scores[mode[:name]] || 0 #score = score_data || 0 if prefix_match?(profile, delim_prefix) #boost score as confidence is high if prefix matched prefix_matched_mode = mode[:name] score += 20 end { name: mode[:name], hashcat: mode[:hashcat], john: mode[:john], extended: mode[:extended], description: profile[:description], score: score } end return [] if matches.empty? #calculate confidence scores_hash = matches.map {|m| [m[:name], m[:score]]}.to_h confidences = assign_confidence(scores_hash, prefix_matched_mode) scored_candidates = matches.map{|m| m.merge(confidence: confidences[m[:name]])}.sort_by {|m| -m[:score]} HEITT::Logger.debug("Scored Algorithm: #{scored_candidates.map{|s| s[:name] }} => Calculated Confidence: #{scored_candidates.map{|s| s[:confidence] }}") scored_candidates end |