Class: Legion::Extensions::Agentic::Affect::Reward::Helpers::RewardSignal

Inherits:
Object
  • Object
show all
Defined in:
lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeRewardSignal

Returns a new instance of RewardSignal.



13
14
15
16
17
18
19
20
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 13

def initialize
  @running_average = 0.0
  @predicted_reward = 0.0
  @last_rpe = 0.0
  @history = []
  @domain_history = {}
  @tick_count = 0
end

Instance Attribute Details

#domain_historyObject (readonly)

Returns the value of attribute domain_history.



10
11
12
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 10

def domain_history
  @domain_history
end

#historyObject (readonly)

Returns the value of attribute history.



10
11
12
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 10

def history
  @history
end

#last_rpeObject (readonly)

Returns the value of attribute last_rpe.



10
11
12
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 10

def last_rpe
  @last_rpe
end

#predicted_rewardObject (readonly)

Returns the value of attribute predicted_reward.



10
11
12
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 10

def predicted_reward
  @predicted_reward
end

#running_averageObject (readonly)

Returns the value of attribute running_average.



10
11
12
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 10

def running_average
  @running_average
end

#tick_countObject (readonly)

Returns the value of attribute tick_count.



10
11
12
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 10

def tick_count
  @tick_count
end

Instance Method Details

#anhedonic?Boolean

Returns:

  • (Boolean)


76
77
78
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 76

def anhedonic?
  @running_average < Constants::ANHEDONIA_THRESHOLD
end

#compute(source_signals) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 22

def compute(source_signals)
  @tick_count += 1
  raw_reward = weighted_sum(source_signals)
  reward = raw_reward.clamp(Constants::REWARD_RANGE[:min], Constants::REWARD_RANGE[:max])

  @last_rpe = reward - @predicted_reward
  @running_average = ema(@running_average, reward, Constants::REWARD_ALPHA)
  @predicted_reward = ema(@predicted_reward, reward, Constants::PREDICTION_ALPHA)

  record(reward, source_signals)

  {
    reward:           reward.round(4),
    rpe:              @last_rpe.round(4),
    rpe_class:        classify_rpe(@last_rpe),
    running_average:  @running_average.round(4),
    predicted_reward: @predicted_reward.round(4),
    sources:          source_signals,
    learning_signal:  learning_signal?
  }
end

#discounted_return(window = nil) ⇒ Object



92
93
94
95
96
97
98
99
100
101
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 92

def discounted_return(window = nil)
  entries = window ? @history.last(window) : @history
  return 0.0 if entries.empty?

  total = 0.0
  entries.reverse_each.with_index do |entry, idx|
    total += entry[:reward] * (Constants::TEMPORAL_DISCOUNT**idx)
  end
  total
end

#domain_average(domain) ⇒ Object



50
51
52
53
54
55
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 50

def domain_average(domain)
  entries = @domain_history[domain]
  return 0.0 if entries.nil? || entries.empty?

  entries.sum { |e| e[:reward] } / entries.size.to_f
end

#domain_trend(domain) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 57

def domain_trend(domain)
  entries = @domain_history[domain]
  return :no_data if entries.nil? || entries.size < 5

  recent = entries.last(10)
  values = recent.map { |e| e[:reward] }
  first_half = values[0...(values.size / 2)]
  second_half = values[(values.size / 2)..]
  diff = mean(second_half) - mean(first_half)

  if diff > 0.05
    :improving
  elsif diff < -0.05
    :declining
  else
    :stable
  end
end

#euphoric?Boolean

Returns:

  • (Boolean)


80
81
82
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 80

def euphoric?
  @running_average > Constants::EUPHORIA_THRESHOLD
end

#learning_signal?Boolean

Returns:

  • (Boolean)


84
85
86
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 84

def learning_signal?
  @last_rpe.abs >= Constants::RPE_THRESHOLD
end

#recent_rewards(limit = 20) ⇒ Object



88
89
90
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 88

def recent_rewards(limit = 20)
  @history.last(limit)
end

#record_domain_reward(domain, reward) ⇒ Object



44
45
46
47
48
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 44

def record_domain_reward(domain, reward)
  @domain_history[domain] ||= []
  @domain_history[domain] << { reward: reward, at: Time.now.utc }
  @domain_history[domain].shift while @domain_history[domain].size > Constants::MAX_DOMAIN_HISTORY
end

#reward_volatilityObject



103
104
105
106
107
108
109
110
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 103

def reward_volatility
  return 0.0 if @history.size < 3

  recent = @history.last(Constants::MOMENTUM_WINDOW).map { |h| h[:reward] }
  avg = mean(recent)
  variance = recent.sum { |r| (r - avg)**2 } / recent.size.to_f
  Math.sqrt(variance)
end

#to_hObject



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 112

def to_h
  {
    running_average:  @running_average.round(4),
    predicted_reward: @predicted_reward.round(4),
    last_rpe:         @last_rpe.round(4),
    rpe_class:        classify_rpe(@last_rpe),
    tick_count:       @tick_count,
    learning_signal:  learning_signal?,
    anhedonic:        anhedonic?,
    euphoric:         euphoric?,
    volatility:       reward_volatility.round(4),
    domains_tracked:  @domain_history.keys.size,
    history_size:     @history.size
  }
end