Class: Legion::CLI::Chat::Session
- Inherits:
-
Object
- Object
- Legion::CLI::Chat::Session
- Defined in:
- lib/legion/cli/chat/session.rb
Defined Under Namespace
Classes: BudgetExceeded
Constant Summary collapse
- INPUT_RATE =
Conservative per-token rates (USD) — roughly Sonnet-class pricing. Used as a safety cap, not a billing system.
0.003 / 1000.0
- OUTPUT_RATE =
$3 per million input tokens
0.015 / 1000.0
Instance Attribute Summary collapse
-
#budget_usd ⇒ Object
Returns the value of attribute budget_usd.
-
#cache_hits_tokens ⇒ Object
readonly
$15 per million output tokens.
-
#chat ⇒ Object
readonly
$15 per million output tokens.
-
#stats ⇒ Object
readonly
$15 per million output tokens.
Instance Method Summary collapse
- #cost_breakdown ⇒ Object
- #elapsed ⇒ Object
- #emit(event, payload = {}) ⇒ Object
- #estimated_cost ⇒ Object
-
#initialize(chat:, system_prompt: nil, budget_usd: nil) ⇒ Session
constructor
A new instance of Session.
- #model_id ⇒ Object
- #model_usage ⇒ Object
- #on(event, &block) ⇒ Object
- #send_message(message, on_tool_call: nil, on_tool_result: nil, &block) ⇒ Object
Constructor Details
#initialize(chat:, system_prompt: nil, budget_usd: nil) ⇒ Session
Returns a new instance of Session.
19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
# File 'lib/legion/cli/chat/session.rb', line 19 def initialize(chat:, system_prompt: nil, budget_usd: nil) @chat = chat @chat.with_instructions(system_prompt) if system_prompt @budget_usd = budget_usd @stats = { messages_sent: 0, messages_received: 0, started_at: Time.now } @model_usage = Hash.new { |h, k| h[k] = { input_tokens: 0, output_tokens: 0, requests: 0 } } @cache_hits_tokens = 0 @callbacks = Hash.new { |h, k| h[k] = [] } @turn = 0 end |
Instance Attribute Details
#budget_usd ⇒ Object
Returns the value of attribute budget_usd.
17 18 19 |
# File 'lib/legion/cli/chat/session.rb', line 17 def budget_usd @budget_usd end |
#cache_hits_tokens ⇒ Object (readonly)
$15 per million output tokens
16 17 18 |
# File 'lib/legion/cli/chat/session.rb', line 16 def cache_hits_tokens @cache_hits_tokens end |
#chat ⇒ Object (readonly)
$15 per million output tokens
16 17 18 |
# File 'lib/legion/cli/chat/session.rb', line 16 def chat @chat end |
#stats ⇒ Object (readonly)
$15 per million output tokens
16 17 18 |
# File 'lib/legion/cli/chat/session.rb', line 16 def stats @stats end |
Instance Method Details
#cost_breakdown ⇒ Object
107 108 109 110 111 112 113 114 115 116 117 118 119 |
# File 'lib/legion/cli/chat/session.rb', line 107 def cost_breakdown @model_usage.map do |model, usage| cost = if cost_estimator_available? Legion::LLM::CostEstimator.estimate( model_id: model, input_tokens: usage[:input_tokens], output_tokens: usage[:output_tokens] ) else (usage[:input_tokens] * INPUT_RATE) + (usage[:output_tokens] * OUTPUT_RATE) end { model: model, input_tokens: usage[:input_tokens], output_tokens: usage[:output_tokens], requests: usage[:requests], cost: cost } end end |
#elapsed ⇒ Object
128 129 130 |
# File 'lib/legion/cli/chat/session.rb', line 128 def elapsed Time.now - @stats[:started_at] end |
#emit(event, payload = {}) ⇒ Object
38 39 40 |
# File 'lib/legion/cli/chat/session.rb', line 38 def emit(event, payload = {}) @callbacks[event].each { |cb| cb.call(payload) } end |
#estimated_cost ⇒ Object
89 90 91 92 93 94 95 96 97 98 99 100 101 |
# File 'lib/legion/cli/chat/session.rb', line 89 def estimated_cost if cost_estimator_available? && @model_usage.any? @model_usage.sum do |model, usage| Legion::LLM::CostEstimator.estimate( model_id: model, input_tokens: usage[:input_tokens], output_tokens: usage[:output_tokens] ) end else input = (@stats[:input_tokens] || 0) * INPUT_RATE output = (@stats[:output_tokens] || 0) * OUTPUT_RATE input + output end end |
#model_id ⇒ Object
121 122 123 124 125 126 |
# File 'lib/legion/cli/chat/session.rb', line 121 def model_id @chat.model&.id rescue StandardError => e Legion::Logging.debug("Session#model_id failed: #{e.}") if defined?(Legion::Logging) 'unknown' end |
#model_usage ⇒ Object
103 104 105 |
# File 'lib/legion/cli/chat/session.rb', line 103 def model_usage @model_usage.transform_values(&:dup) end |
#on(event, &block) ⇒ Object
34 35 36 |
# File 'lib/legion/cli/chat/session.rb', line 34 def on(event, &block) @callbacks[event] << block end |
#send_message(message, on_tool_call: nil, on_tool_result: nil, &block) ⇒ Object
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# File 'lib/legion/cli/chat/session.rb', line 42 def (, on_tool_call: nil, on_tool_result: nil, &block) check_budget! check_for_absorbable_urls() @stats[:messages_sent] += 1 @turn += 1 current_turn = @turn @chat.on_tool_call { |tc| on_tool_call&.call(tc) } @chat.on_tool_result { |tr| on_tool_result&.call(tr) } emit(:llm_start, { turn: current_turn }) first_token_emitted = false wrapped_block = if block proc do |chunk| unless first_token_emitted first_token_emitted = true emit(:llm_first_token, { turn: current_turn }) end block.call(chunk) end end response = @chat.ask(, &wrapped_block) @stats[:messages_received] += 1 if response.respond_to?(:input_tokens) in_tok = response.input_tokens || 0 out_tok = response.output_tokens || 0 @stats[:input_tokens] = (@stats[:input_tokens] || 0) + in_tok @stats[:output_tokens] = (@stats[:output_tokens] || 0) + out_tok resp_model = response.respond_to?(:model_id) ? response.model_id : model_id entry = @model_usage[resp_model.to_s] entry[:input_tokens] += in_tok entry[:output_tokens] += out_tok entry[:requests] += 1 @cache_hits_tokens += response.cache_read_input_tokens.to_i if response.respond_to?(:cache_read_input_tokens) && response.cache_read_input_tokens end emit(:llm_complete, { turn: current_turn, user_message: }) response end |