Class: AgentHarness::Orchestration::HealthMonitor

Inherits:
Object
  • Object
show all
Defined in:
lib/agent_harness/orchestration/health_monitor.rb

Overview

Monitors provider health based on success/failure metrics

Tracks success and failure rates to determine provider health status. Uses a sliding window approach to focus on recent performance.

Examples:

monitor = HealthMonitor.new
monitor.record_success(:claude)
monitor.healthy?(:claude) # => true

Constant Summary collapse

DEFAULT_WINDOW_SIZE =
100
DEFAULT_HEALTH_THRESHOLD =
0.5

Instance Method Summary collapse

Constructor Details

#initialize(config = nil, window_size: nil, health_threshold: nil) ⇒ HealthMonitor

Create a new health monitor

Parameters:

  • config (HealthCheckConfig, nil) (defaults to: nil)

    configuration object

  • window_size (Integer) (defaults to: nil)

    number of events to track

  • health_threshold (Float) (defaults to: nil)

    minimum success rate for healthy



23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/agent_harness/orchestration/health_monitor.rb', line 23

def initialize(config = nil, window_size: nil, health_threshold: nil)
  if config
    @enabled = config.enabled
    @failure_threshold = config.failure_threshold
  else
    @enabled = true
    @failure_threshold = 3
  end

  @window_size = window_size || DEFAULT_WINDOW_SIZE
  @health_threshold = health_threshold || DEFAULT_HEALTH_THRESHOLD
  @provider_metrics = Hash.new { |h, k| h[k] = ProviderHealthMetrics.new(@window_size) }
  @mutex = Mutex.new
end

Instance Method Details

#all_metricsHash<Symbol, Hash>

Get health status for all tracked providers

Returns:

  • (Hash<Symbol, Hash>)

    health status by provider



89
90
91
92
93
94
95
96
97
98
# File 'lib/agent_harness/orchestration/health_monitor.rb', line 89

def all_metrics
  @provider_metrics.transform_values do |metrics|
    {
      success_rate: metrics.success_rate,
      total_calls: metrics.total_calls,
      recent_successes: metrics.recent_successes,
      recent_failures: metrics.recent_failures
    }
  end
end

#healthy?(provider_name) ⇒ Boolean

Check if a provider is healthy

Parameters:

  • provider_name (Symbol, String)

    the provider name

Returns:

  • (Boolean)

    true if healthy



62
63
64
65
66
67
68
69
# File 'lib/agent_harness/orchestration/health_monitor.rb', line 62

def healthy?(provider_name)
  return true unless @enabled

  metrics = @provider_metrics[provider_name.to_sym]
  return true if metrics.total_calls == 0

  metrics.success_rate >= @health_threshold
end

#metrics_for(provider_name) ⇒ Hash

Get health metrics for a provider

Parameters:

  • provider_name (Symbol, String)

    the provider name

Returns:

  • (Hash)

    health metrics



75
76
77
78
79
80
81
82
83
84
# File 'lib/agent_harness/orchestration/health_monitor.rb', line 75

def metrics_for(provider_name)
  metrics = @provider_metrics[provider_name.to_sym]
  {
    success_rate: metrics.success_rate,
    total_calls: metrics.total_calls,
    recent_successes: metrics.recent_successes,
    recent_failures: metrics.recent_failures,
    healthy: healthy?(provider_name)
  }
end

#record_failure(provider_name) ⇒ void

This method returns an undefined value.

Record a failed call for a provider

Parameters:

  • provider_name (Symbol, String)

    the provider name



52
53
54
55
56
# File 'lib/agent_harness/orchestration/health_monitor.rb', line 52

def record_failure(provider_name)
  @mutex.synchronize do
    @provider_metrics[provider_name.to_sym].record_failure
  end
end

#record_success(provider_name) ⇒ void

This method returns an undefined value.

Record a successful call for a provider

Parameters:

  • provider_name (Symbol, String)

    the provider name



42
43
44
45
46
# File 'lib/agent_harness/orchestration/health_monitor.rb', line 42

def record_success(provider_name)
  @mutex.synchronize do
    @provider_metrics[provider_name.to_sym].record_success
  end
end

#reset!void

This method returns an undefined value.

Reset all health metrics



103
104
105
106
107
# File 'lib/agent_harness/orchestration/health_monitor.rb', line 103

def reset!
  @mutex.synchronize do
    @provider_metrics.clear
  end
end

#reset_provider!(provider_name) ⇒ void

This method returns an undefined value.

Reset metrics for a specific provider

Parameters:

  • provider_name (Symbol, String)

    the provider name



113
114
115
116
117
# File 'lib/agent_harness/orchestration/health_monitor.rb', line 113

def reset_provider!(provider_name)
  @mutex.synchronize do
    @provider_metrics.delete(provider_name.to_sym)
  end
end