Class: Legion::CLI::Chat::Session

Inherits:
Object
  • Object
show all
Defined in:
lib/legion/cli/chat/session.rb

Defined Under Namespace

Classes: BudgetExceeded

Constant Summary collapse

INPUT_RATE =

Conservative per-token rates (USD) — roughly Sonnet-class pricing. Used as a safety cap, not a billing system.

0.003 / 1000.0
OUTPUT_RATE =

$3 per million input tokens

0.015 / 1000.0

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(chat:, system_prompt: nil, budget_usd: nil) ⇒ Session

Returns a new instance of Session.



19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/legion/cli/chat/session.rb', line 19

def initialize(chat:, system_prompt: nil, budget_usd: nil)
  @chat = chat
  @chat.with_instructions(system_prompt) if system_prompt
  @budget_usd = budget_usd
  @stats = {
    messages_sent:     0,
    messages_received: 0,
    started_at:        Time.now
  }
  @model_usage = Hash.new { |h, k| h[k] = { input_tokens: 0, output_tokens: 0, requests: 0 } }
  @cache_hits_tokens = 0
  @callbacks = Hash.new { |h, k| h[k] = [] }
  @turn = 0
end

Instance Attribute Details

#budget_usdObject

Returns the value of attribute budget_usd.



17
18
19
# File 'lib/legion/cli/chat/session.rb', line 17

def budget_usd
  @budget_usd
end

#cache_hits_tokensObject (readonly)

$15 per million output tokens



16
17
18
# File 'lib/legion/cli/chat/session.rb', line 16

def cache_hits_tokens
  @cache_hits_tokens
end

#chatObject (readonly)

$15 per million output tokens



16
17
18
# File 'lib/legion/cli/chat/session.rb', line 16

def chat
  @chat
end

#statsObject (readonly)

$15 per million output tokens



16
17
18
# File 'lib/legion/cli/chat/session.rb', line 16

def stats
  @stats
end

Instance Method Details

#cost_breakdownObject



107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/legion/cli/chat/session.rb', line 107

def cost_breakdown
  @model_usage.map do |model, usage|
    cost = if cost_estimator_available?
             Legion::LLM::CostEstimator.estimate(
               model_id: model, input_tokens: usage[:input_tokens], output_tokens: usage[:output_tokens]
             )
           else
             (usage[:input_tokens] * INPUT_RATE) + (usage[:output_tokens] * OUTPUT_RATE)
           end
    { model: model, input_tokens: usage[:input_tokens], output_tokens: usage[:output_tokens],
      requests: usage[:requests], cost: cost }
  end
end

#elapsedObject



128
129
130
# File 'lib/legion/cli/chat/session.rb', line 128

def elapsed
  Time.now - @stats[:started_at]
end

#emit(event, payload = {}) ⇒ Object



38
39
40
# File 'lib/legion/cli/chat/session.rb', line 38

def emit(event, payload = {})
  @callbacks[event].each { |cb| cb.call(payload) }
end

#estimated_costObject



89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/legion/cli/chat/session.rb', line 89

def estimated_cost
  if cost_estimator_available? && @model_usage.any?
    @model_usage.sum do |model, usage|
      Legion::LLM::CostEstimator.estimate(
        model_id: model, input_tokens: usage[:input_tokens], output_tokens: usage[:output_tokens]
      )
    end
  else
    input  = (@stats[:input_tokens] || 0) * INPUT_RATE
    output = (@stats[:output_tokens] || 0) * OUTPUT_RATE
    input + output
  end
end

#model_idObject



121
122
123
124
125
126
# File 'lib/legion/cli/chat/session.rb', line 121

def model_id
  @chat.model&.id
rescue StandardError => e
  Legion::Logging.debug("Session#model_id failed: #{e.message}") if defined?(Legion::Logging)
  'unknown'
end

#model_usageObject



103
104
105
# File 'lib/legion/cli/chat/session.rb', line 103

def model_usage
  @model_usage.transform_values(&:dup)
end

#on(event, &block) ⇒ Object



34
35
36
# File 'lib/legion/cli/chat/session.rb', line 34

def on(event, &block)
  @callbacks[event] << block
end

#send_message(message, on_tool_call: nil, on_tool_result: nil, &block) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/legion/cli/chat/session.rb', line 42

def send_message(message, on_tool_call: nil, on_tool_result: nil, &block)
  check_budget!
  check_for_absorbable_urls(message)

  @stats[:messages_sent] += 1
  @turn += 1
  current_turn = @turn

  @chat.on_tool_call { |tc| on_tool_call&.call(tc) }
  @chat.on_tool_result { |tr| on_tool_result&.call(tr) }

  emit(:llm_start, { turn: current_turn })

  first_token_emitted = false
  wrapped_block = if block
                    proc do |chunk|
                      unless first_token_emitted
                        first_token_emitted = true
                        emit(:llm_first_token, { turn: current_turn })
                      end
                      block.call(chunk)
                    end
                  end

  response = @chat.ask(message, &wrapped_block)
  @stats[:messages_received] += 1

  if response.respond_to?(:input_tokens)
    in_tok  = response.input_tokens || 0
    out_tok = response.output_tokens || 0
    @stats[:input_tokens]  = (@stats[:input_tokens] || 0) + in_tok
    @stats[:output_tokens] = (@stats[:output_tokens] || 0) + out_tok

    resp_model = response.respond_to?(:model_id) ? response.model_id : model_id
    entry = @model_usage[resp_model.to_s]
    entry[:input_tokens]  += in_tok
    entry[:output_tokens] += out_tok
    entry[:requests]      += 1

    @cache_hits_tokens += response.cache_read_input_tokens.to_i if response.respond_to?(:cache_read_input_tokens) && response.cache_read_input_tokens
  end

  emit(:llm_complete, { turn: current_turn, user_message: message })

  response
end