Module: Legion::Extensions::Agentic::Affect::Reward::Runners::Reward
- Includes:
- Helpers::Lex
- Included in:
- Client
- Defined in:
- lib/legion/extensions/agentic/affect/reward/runners/reward.rb
Instance Method Summary collapse
- #compute_reward(tick_results: {}) ⇒ Object
- #domain_rewards ⇒ Object
- #reward_for(domain:) ⇒ Object
- #reward_history(limit: 20) ⇒ Object
- #reward_stats ⇒ Object
- #reward_status ⇒ Object
Instance Method Details
#compute_reward(tick_results: {}) ⇒ Object
13 14 15 16 17 18 19 20 |
# File 'lib/legion/extensions/agentic/affect/reward/runners/reward.rb', line 13 def compute_reward(tick_results: {}, **) result = reward_store.process_tick(tick_results) log.debug("[reward] reward=#{result[:reward]} rpe=#{result[:rpe]} " \ "class=#{result[:rpe_class]} learning=#{result[:learning_signal]}") result end |
#domain_rewards ⇒ Object
49 50 51 52 53 54 55 56 57 58 59 |
# File 'lib/legion/extensions/agentic/affect/reward/runners/reward.rb', line 49 def domain_rewards(**) averages = reward_store.all_domain_averages log.debug("[reward] domains: #{averages.size} tracked") { domains: averages, domain_count: averages.size, best_domain: averages.max_by { |_, v| v }&.first, worst_domain: averages.min_by { |_, v| v }&.first } end |
#reward_for(domain:) ⇒ Object
32 33 34 35 36 |
# File 'lib/legion/extensions/agentic/affect/reward/runners/reward.rb', line 32 def reward_for(domain:, **) report = reward_store.domain_report(domain) log.debug("[reward] domain=#{domain} avg=#{report[:average].round(3)} trend=#{report[:trend]}") report end |
#reward_history(limit: 20) ⇒ Object
38 39 40 41 42 43 44 45 46 47 |
# File 'lib/legion/extensions/agentic/affect/reward/runners/reward.rb', line 38 def reward_history(limit: 20, **) recent = reward_store.signal.recent_rewards(limit) log.debug("[reward] history: #{recent.size} entries") { history: recent, total: reward_store.signal.history.size, discounted_return: reward_store.signal.discounted_return(limit).round(4) } end |
#reward_stats ⇒ Object
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/legion/extensions/agentic/affect/reward/runners/reward.rb', line 61 def reward_stats(**) sig = reward_store.signal health = reward_store.health_assessment log.debug('[reward] stats') { running_average: sig.running_average.round(4), predicted_reward: sig.predicted_reward.round(4), volatility: sig.reward_volatility.round(4), tick_count: sig.tick_count, health: health, domains_tracked: sig.domain_history.keys.size, history_size: sig.history.size, discounted_return: sig.discounted_return.round(4), anhedonic: sig.anhedonic?, euphoric: sig.euphoric? } end |
#reward_status ⇒ Object
22 23 24 25 26 27 28 29 30 |
# File 'lib/legion/extensions/agentic/affect/reward/runners/reward.rb', line 22 def reward_status(**) sig = reward_store.signal health = reward_store.health_assessment log.debug("[reward] status: avg=#{sig.running_average.round(3)} " \ "predicted=#{sig.predicted_reward.round(3)} health=#{health[:status]}") sig.to_h.merge(health: health) end |