Module: Legion::Telemetry::SafetyMetrics

Defined in:
lib/legion/telemetry/safety_metrics.rb

Constant Summary collapse

WINDOWS =
{
  actions:    60,
  failures:   300,
  successes:  300,
  confidence: 300
}.freeze

Class Method Summary collapse

Class Method Details

.actions_per_minute(agent_id) ⇒ Object



110
111
112
# File 'lib/legion/telemetry/safety_metrics.rb', line 110

def actions_per_minute(agent_id)
  @windows[:actions]&.count_for(agent: agent_id) || 0
end

.confidence_drift(agent_id) ⇒ Object



121
122
123
124
125
126
# File 'lib/legion/telemetry/safety_metrics.rb', line 121

def confidence_drift(agent_id)
  entries = @windows[:confidence]&.entries_matching(agent: agent_id) || []
  return 0.0 if entries.empty?

  entries.sum { |e| e[:delta] || 0.0 } / entries.size
end

.governance_override_totalObject



132
133
134
# File 'lib/legion/telemetry/safety_metrics.rb', line 132

def governance_override_total
  @governance_count || 0
end

.init_windowsObject



66
67
68
# File 'lib/legion/telemetry/safety_metrics.rb', line 66

def init_windows
  @windows = WINDOWS.transform_values { |secs| SlidingWindow.new(secs) }
end

.probe_detection_totalObject



136
137
138
# File 'lib/legion/telemetry/safety_metrics.rb', line 136

def probe_detection_total
  @probe_count || 0
end

.record_action(agent_id: 'unknown') ⇒ Object



82
83
84
# File 'lib/legion/telemetry/safety_metrics.rb', line 82

def record_action(agent_id: 'unknown', **)
  @windows[:actions]&.push(agent: agent_id)
end

.record_confidence(agent_id: 'unknown', delta: 0.0) ⇒ Object



106
107
108
# File 'lib/legion/telemetry/safety_metrics.rb', line 106

def record_confidence(agent_id: 'unknown', delta: 0.0, **)
  @windows[:confidence]&.push(agent: agent_id, delta: delta)
end

.record_escalation(agent_id: 'unknown') ⇒ Object

rubocop:disable Lint/UnusedMethodArgument



94
95
96
# File 'lib/legion/telemetry/safety_metrics.rb', line 94

def record_escalation(agent_id: 'unknown', **) # rubocop:disable Lint/UnusedMethodArgument
  @escalation_count = (@escalation_count || 0) + 1
end

.record_failure(agent_id: 'unknown') ⇒ Object



86
87
88
# File 'lib/legion/telemetry/safety_metrics.rb', line 86

def record_failure(agent_id: 'unknown', **)
  @windows[:failures]&.push(agent: agent_id, type: :failure)
end

.record_governanceObject



98
99
100
# File 'lib/legion/telemetry/safety_metrics.rb', line 98

def record_governance(**)
  @governance_count = (@governance_count || 0) + 1
end

.record_probeObject



102
103
104
# File 'lib/legion/telemetry/safety_metrics.rb', line 102

def record_probe(**)
  @probe_count = (@probe_count || 0) + 1
end

.record_success(agent_id: 'unknown') ⇒ Object



90
91
92
# File 'lib/legion/telemetry/safety_metrics.rb', line 90

def record_success(agent_id: 'unknown', **)
  @windows[:successes]&.push(agent: agent_id, type: :success)
end

.register_prometheus_metricsObject



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/legion/telemetry/safety_metrics.rb', line 147

def register_prometheus_metrics
  return unless defined?(Legion::Metrics) && Legion::Metrics.respond_to?(:register_gauge)

  Legion::Metrics.register_gauge(:legion_safety_actions_per_minute,
                                 'Runner invocations per agent per minute')
  Legion::Metrics.register_gauge(:legion_safety_tool_failure_ratio,
                                 'Tool failure percentage over 5m window')
  Legion::Metrics.register_gauge(:legion_safety_confidence_drift,
                                 'Rate of confidence decrease across synapses')
  Legion::Metrics.register_counter(:legion_safety_scope_escalation_total,
                                   'Denied access attempts')
  Legion::Metrics.register_counter(:legion_safety_governance_override_total,
                                   'Governance constraint violations')
  Legion::Metrics.register_counter(:legion_safety_probe_detection_total,
                                   'Detected prompt injection probes')
rescue StandardError => e
  Legion::Logging.debug "SafetyMetrics#register_prometheus_metrics failed: #{e.message}" if defined?(Legion::Logging)
  nil
end

.safety_enabled?Boolean

Returns:

  • (Boolean)


140
141
142
143
144
145
# File 'lib/legion/telemetry/safety_metrics.rb', line 140

def safety_enabled?
  Legion::Settings.dig(:telemetry, :safety, :enabled)
rescue StandardError => e
  Legion::Logging.debug "SafetyMetrics#safety_enabled? failed: #{e.message}" if defined?(Legion::Logging)
  false
end

.scope_escalation_totalObject



128
129
130
# File 'lib/legion/telemetry/safety_metrics.rb', line 128

def scope_escalation_total
  @escalation_count || 0
end

.startObject



58
59
60
61
62
63
64
# File 'lib/legion/telemetry/safety_metrics.rb', line 58

def start
  return unless safety_enabled?

  init_windows
  register_prometheus_metrics
  subscribe_events
end

.subscribe_eventsObject



70
71
72
73
74
75
76
77
78
79
80
# File 'lib/legion/telemetry/safety_metrics.rb', line 70

def subscribe_events
  return unless defined?(Legion::Events)

  Legion::Events.on('ingress.received') { |e| record_action(**e) }
  Legion::Events.on('runner.failure')               { |e| record_failure(**e) }
  Legion::Events.on('runner.success')               { |e| record_success(**e) }
  Legion::Events.on('rbac.deny')                    { |e| record_escalation(**e) }
  Legion::Events.on('governance.consent_violation') { |e| record_governance(**e) }
  Legion::Events.on('privatecore.probe_detected')   { |e| record_probe(**e) }
  Legion::Events.on('synapse.confidence_update')    { |e| record_confidence(**e) }
end

.tool_failure_ratio(agent_id) ⇒ Object



114
115
116
117
118
119
# File 'lib/legion/telemetry/safety_metrics.rb', line 114

def tool_failure_ratio(agent_id)
  fails = @windows[:failures]&.count_for(agent: agent_id) || 0
  successes = @windows[:successes]&.count_for(agent: agent_id) || 0
  total = fails + successes
  total.zero? ? 0.0 : fails.to_f / total
end