Module: Legion::Extensions::Agentic::Affect::Reward::Helpers::Constants

Defined in:: lib/legion/extensions/agentic/affect/reward/helpers/constants.rb

Constant Summary collapse

REWARD_SOURCES = Reward sources with weights (sum to 1.0) Each source contributes independently to the composite reward signal

{
  prediction_accuracy: { weight: 0.20, description: 'Correct predictions reinforced' },
  curiosity_resolved:  { weight: 0.15, description: 'Wonder resolution satisfaction' },
  goal_achieved:       { weight: 0.20, description: 'Intention completion reward' },
  social_approval:     { weight: 0.10, description: 'Trust increase from peers' },
  flow_state:          { weight: 0.10, description: 'Intrinsic flow motivation' },
  error_avoidance:     { weight: 0.10, description: 'Low error rate maintenance' },
  novelty_encounter:   { weight: 0.10, description: 'Novel experience exploration' },
  homeostatic_balance: { weight: 0.05, description: 'System stability maintenance' }
}.freeze

REWARD_ALPHA = EMA alpha for running reward average

0.15

PREDICTION_ALPHA = EMA alpha for reward prediction (expected reward baseline)

0.1

RPE_THRESHOLD = Minimum RPE magnitude to trigger learning signal

0.05

REWARD_RANGE = Reward signal range

{ min: -1.0, max: 1.0 }.freeze

RPE_LEVELS = RPE classification thresholds

{
  large_positive: 0.3,   # "Way better than expected!" — strong reinforcement
  positive:       0.1,   # "Better than expected" — moderate reinforcement
  neutral:        0.05,  # "About as expected" — maintenance
  negative:       -0.1,  # "Worse than expected" — mild suppression
  large_negative: -0.3   # "Way worse than expected!" — strong suppression
}.freeze

TEMPORAL_DISCOUNT = Temporal discount factor (per tick, for weighted history)

0.95

MAX_REWARD_HISTORY = History cap

MAX_DOMAIN_HISTORY = Domain-specific reward history cap

ANHEDONIA_THRESHOLD = Anhedonia threshold — running average below this triggers concern

-0.3

EUPHORIA_THRESHOLD = Euphoria threshold — running average above this triggers concern

0.7

MOMENTUM_WINDOW = Reward momentum (how much prior reward influences next prediction)