Class: Legion::Extensions::Agentic::Affect::Reward::Helpers::RewardSignal
- Inherits:
-
Object
- Object
- Legion::Extensions::Agentic::Affect::Reward::Helpers::RewardSignal
- Defined in:
- lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb
Instance Attribute Summary collapse
-
#domain_history ⇒ Object
readonly
Returns the value of attribute domain_history.
-
#history ⇒ Object
readonly
Returns the value of attribute history.
-
#last_rpe ⇒ Object
readonly
Returns the value of attribute last_rpe.
-
#predicted_reward ⇒ Object
readonly
Returns the value of attribute predicted_reward.
-
#running_average ⇒ Object
readonly
Returns the value of attribute running_average.
-
#tick_count ⇒ Object
readonly
Returns the value of attribute tick_count.
Instance Method Summary collapse
- #anhedonic? ⇒ Boolean
- #compute(source_signals) ⇒ Object
- #discounted_return(window = nil) ⇒ Object
- #domain_average(domain) ⇒ Object
- #domain_trend(domain) ⇒ Object
- #euphoric? ⇒ Boolean
-
#initialize ⇒ RewardSignal
constructor
A new instance of RewardSignal.
- #learning_signal? ⇒ Boolean
- #recent_rewards(limit = 20) ⇒ Object
- #record_domain_reward(domain, reward) ⇒ Object
- #reward_volatility ⇒ Object
- #to_h ⇒ Object
Constructor Details
#initialize ⇒ RewardSignal
Returns a new instance of RewardSignal.
13 14 15 16 17 18 19 20 |
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 13 def initialize @running_average = 0.0 @predicted_reward = 0.0 @last_rpe = 0.0 @history = [] @domain_history = {} @tick_count = 0 end |
Instance Attribute Details
#domain_history ⇒ Object (readonly)
Returns the value of attribute domain_history.
10 11 12 |
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 10 def domain_history @domain_history end |
#history ⇒ Object (readonly)
Returns the value of attribute history.
10 11 12 |
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 10 def history @history end |
#last_rpe ⇒ Object (readonly)
Returns the value of attribute last_rpe.
10 11 12 |
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 10 def last_rpe @last_rpe end |
#predicted_reward ⇒ Object (readonly)
Returns the value of attribute predicted_reward.
10 11 12 |
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 10 def predicted_reward @predicted_reward end |
#running_average ⇒ Object (readonly)
Returns the value of attribute running_average.
10 11 12 |
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 10 def running_average @running_average end |
#tick_count ⇒ Object (readonly)
Returns the value of attribute tick_count.
10 11 12 |
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 10 def tick_count @tick_count end |
Instance Method Details
#anhedonic? ⇒ Boolean
76 77 78 |
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 76 def anhedonic? @running_average < Constants::ANHEDONIA_THRESHOLD end |
#compute(source_signals) ⇒ Object
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 22 def compute(source_signals) @tick_count += 1 raw_reward = weighted_sum(source_signals) reward = raw_reward.clamp(Constants::REWARD_RANGE[:min], Constants::REWARD_RANGE[:max]) @last_rpe = reward - @predicted_reward @running_average = ema(@running_average, reward, Constants::REWARD_ALPHA) @predicted_reward = ema(@predicted_reward, reward, Constants::PREDICTION_ALPHA) record(reward, source_signals) { reward: reward.round(4), rpe: @last_rpe.round(4), rpe_class: classify_rpe(@last_rpe), running_average: @running_average.round(4), predicted_reward: @predicted_reward.round(4), sources: source_signals, learning_signal: learning_signal? } end |
#discounted_return(window = nil) ⇒ Object
92 93 94 95 96 97 98 99 100 101 |
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 92 def discounted_return(window = nil) entries = window ? @history.last(window) : @history return 0.0 if entries.empty? total = 0.0 entries.reverse_each.with_index do |entry, idx| total += entry[:reward] * (Constants::TEMPORAL_DISCOUNT**idx) end total end |
#domain_average(domain) ⇒ Object
50 51 52 53 54 55 |
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 50 def domain_average(domain) entries = @domain_history[domain] return 0.0 if entries.nil? || entries.empty? entries.sum { |e| e[:reward] } / entries.size.to_f end |
#domain_trend(domain) ⇒ Object
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 57 def domain_trend(domain) entries = @domain_history[domain] return :no_data if entries.nil? || entries.size < 5 recent = entries.last(10) values = recent.map { |e| e[:reward] } first_half = values[0...(values.size / 2)] second_half = values[(values.size / 2)..] diff = mean(second_half) - mean(first_half) if diff > 0.05 :improving elsif diff < -0.05 :declining else :stable end end |
#euphoric? ⇒ Boolean
80 81 82 |
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 80 def euphoric? @running_average > Constants::EUPHORIA_THRESHOLD end |
#learning_signal? ⇒ Boolean
84 85 86 |
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 84 def learning_signal? @last_rpe.abs >= Constants::RPE_THRESHOLD end |
#recent_rewards(limit = 20) ⇒ Object
88 89 90 |
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 88 def recent_rewards(limit = 20) @history.last(limit) end |
#record_domain_reward(domain, reward) ⇒ Object
44 45 46 47 48 |
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 44 def record_domain_reward(domain, reward) @domain_history[domain] ||= [] @domain_history[domain] << { reward: reward, at: Time.now.utc } @domain_history[domain].shift while @domain_history[domain].size > Constants::MAX_DOMAIN_HISTORY end |
#reward_volatility ⇒ Object
103 104 105 106 107 108 109 110 |
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 103 def reward_volatility return 0.0 if @history.size < 3 recent = @history.last(Constants::MOMENTUM_WINDOW).map { |h| h[:reward] } avg = mean(recent) variance = recent.sum { |r| (r - avg)**2 } / recent.size.to_f Math.sqrt(variance) end |
#to_h ⇒ Object
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/legion/extensions/agentic/affect/reward/helpers/reward_signal.rb', line 112 def to_h { running_average: @running_average.round(4), predicted_reward: @predicted_reward.round(4), last_rpe: @last_rpe.round(4), rpe_class: classify_rpe(@last_rpe), tick_count: @tick_count, learning_signal: learning_signal?, anhedonic: anhedonic?, euphoric: euphoric?, volatility: reward_volatility.round(4), domains_tracked: @domain_history.keys.size, history_size: @history.size } end |