Class: Legion::Extensions::Apollo::Actor::EntityWatchdog

Inherits:
Legion::Extensions::Actors::Every
  • Object
show all
Includes:
Runners::EntityExtractor, Runners::Knowledge, Settings::Helper
Defined in:
lib/legion/extensions/apollo/actors/entity_watchdog.rb

Constant Summary collapse

DEDUP_THRESHOLD_DEFAULT =
0.92
TASK_LOG_LOOKBACK_SECONDS =
300
TASK_LOG_LIMIT =
50

Constants included from Runners::EntityExtractor

Runners::EntityExtractor::DEFAULT_ENTITY_TYPES, Runners::EntityExtractor::DEFAULT_MIN_CONFIDENCE

Constants included from Runners::Knowledge

Runners::Knowledge::CONFLICT_CHECK_MAX_CHARS, Runners::Knowledge::CONTENT_TYPE_ALIASES, Runners::Knowledge::DEFAULT_QUERY_STATUS, Runners::Knowledge::DOMAIN_ISOLATION, Runners::Knowledge::UNSET

Instance Method Summary collapse

Methods included from Runners::EntityExtractor

#entity_extraction_prompt, #entity_schema, #extract_entities

Methods included from Runners::Knowledge

#deprecate_entry, #handle_erasure_request, #handle_ingest, #handle_query, #handle_traverse, #prepare_mesh_export, #query_knowledge, #redistribute_knowledge, #related_entries, #retrieve_relevant, #store_knowledge

Instance Method Details

#check_subtask?Boolean

Returns:

  • (Boolean)


26
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 26

def check_subtask?  = false

#dedup_similarity_thresholdObject



128
129
130
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 128

def dedup_similarity_threshold
  settings[:entity_watchdog][:dedup_threshold].to_f
end

#enabled?Boolean

rubocop:disable Legion/Extension/ActorEnabledSideEffects

Returns:

  • (Boolean)


29
30
31
32
33
34
35
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 29

def enabled? # rubocop:disable Legion/Extension/ActorEnabledSideEffects
  defined?(Legion::Extensions::Apollo::Runners::EntityExtractor) &&
    Legion.const_defined?(:Transport, false)
rescue StandardError => e
  handle_exception(e, level: :warn, operation: 'apollo.entity_watchdog.enabled')
  false
end

#entity_exists_in_apollo?(entity) ⇒ Boolean

Returns:

  • (Boolean)


88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 88

def entity_exists_in_apollo?(entity)
  result = retrieve_relevant(
    query:          entity[:name].to_s,
    limit:          1,
    min_confidence: Helpers::Confidence.apollo_setting(:entity_watchdog, :exists_min_confidence, default: 0.1),
    tags:           [entity[:type].to_s]
  )
  return false unless result[:success] && result[:count].positive?

  closest = result[:entries].first
  distance = closest[:distance].to_f
  distance <= (1.0 - dedup_similarity_threshold)
rescue StandardError => e
  handle_exception(e, level: :warn, operation: 'apollo.entity_watchdog.entity_exists_in_apollo')
  false
end

#entity_typesObject



120
121
122
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 120

def entity_types
  Array(settings[:entity_watchdog][:types]).map(&:to_s)
end

#generate_task?Boolean

Returns:

  • (Boolean)


27
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 27

def generate_task?  = false

#mark_task_log_text_processed(text) ⇒ Object



136
137
138
139
140
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 136

def mark_task_log_text_processed(text)
  hashes = processed_task_log_hashes
  hashes[task_log_text_hash(text)] = true
  hashes.shift while hashes.size > processed_task_log_hash_limit
end

#min_entity_confidenceObject



124
125
126
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 124

def min_entity_confidence
  settings[:entity_watchdog][:min_confidence].to_f
end

#processed_task_log_hash_limitObject



146
147
148
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 146

def processed_task_log_hash_limit
  [settings[:entity_watchdog][:log_limit].to_i * 4, 100].max
end

#processed_task_log_hashesObject



142
143
144
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 142

def processed_task_log_hashes
  @processed_task_log_hashes ||= {}
end

#publish_entity_ingest(entity) ⇒ Object



105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 105

def publish_entity_ingest(entity)
  return unless defined?(Legion::Extensions::Apollo::Transport::Messages::Ingest)

  Legion::Extensions::Apollo::Transport::Messages::Ingest.new(
    content:      "#{entity[:type].to_s.capitalize}: #{entity[:name]}",
    content_type: 'concept',
    tags:         [entity[:type].to_s, 'entity_watchdog'],
    source_agent: 'lex-apollo:entity_watchdog',
    context:      { entity_type: entity[:type], original_name: entity[:name] }
  ).publish
  log.info("EntityWatchdog published entity type=#{entity[:type]} name=#{entity[:name]}")
rescue StandardError => e
  handle_exception(e, level: :error, operation: 'apollo.entity_watchdog.publish_entity_ingest')
end

#recent_task_log_textsObject



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 69

def recent_task_log_texts
  return [] unless defined?(Legion::Data) && defined?(Legion::Data::Model::TaskLog)

  lookback = settings[:entity_watchdog][:lookback_seconds]
  log_limit = settings[:entity_watchdog][:log_limit]
  cutoff = Time.now - lookback
  logs = Legion::Data::Model::TaskLog
         .where { created_at >= cutoff }
         .order(Sequel.desc(:created_at))
         .limit(log_limit)
         .select_map(:message)
  texts = logs.map(&:to_s).reject(&:empty?).uniq
  log.debug("EntityWatchdog recent_task_log_texts lookback=#{lookback} limit=#{log_limit} raw=#{logs.size} unique=#{texts.size}")
  texts
rescue StandardError => e
  handle_exception(e, level: :warn, operation: 'apollo.entity_watchdog.recent_task_log_texts')
  []
end

#run_now?Boolean

Returns:

  • (Boolean)


24
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 24

def run_now?        = false

#runner_classObject



21
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 21

def runner_class    = self.class

#runner_functionObject



22
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 22

def runner_function = 'scan_and_ingest'

#scan_and_ingestObject



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 37

def scan_and_ingest
  texts = recent_task_log_texts
  log.debug("EntityWatchdog scan_and_ingest log_texts=#{texts.size}")
  return { success: true, ingested: 0, reason: :no_logs } if texts.empty?

  ingested = 0
  texts.each do |text|
    next if task_log_text_processed?(text)

    result = extract_entities(
      text:           text,
      entity_types:   entity_types,
      min_confidence: min_entity_confidence
    )
    next unless result[:success]

    mark_task_log_text_processed(text) unless result[:source] == :unavailable
    result[:entities].each do |entity|
      next if entity_exists_in_apollo?(entity)

      publish_entity_ingest(entity)
      ingested += 1
    end
  end

  log.info("EntityWatchdog ingested=#{ingested} logs_scanned=#{texts.size}")
  { success: true, ingested: ingested, logs_scanned: texts.size }
rescue StandardError => e
  handle_exception(e, level: :error, operation: 'apollo.entity_watchdog.scan_and_ingest')
  { success: false, error: e.message }
end

#task_log_text_hash(text) ⇒ Object



150
151
152
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 150

def task_log_text_hash(text)
  Digest::SHA256.hexdigest(text.to_s)
end

#task_log_text_processed?(text) ⇒ Boolean

Returns:

  • (Boolean)


132
133
134
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 132

def task_log_text_processed?(text)
  processed_task_log_hashes.key?(task_log_text_hash(text))
end

#timeObject



23
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 23

def time            = settings[:actors][:entity_watchdog_interval]

#use_runner?Boolean

Returns:

  • (Boolean)


25
# File 'lib/legion/extensions/apollo/actors/entity_watchdog.rb', line 25

def use_runner?     = false