Module: Legion::LLM::Inference::Steps::Metering

Extended by:
Legion::Logging::Helper
Included in:
Executor
Defined in:
lib/legion/llm/inference/steps/metering.rb

Class Method Summary collapse

Class Method Details

.build_event(**opts) ⇒ Object



14
15
16
17
# File 'lib/legion/llm/inference/steps/metering.rb', line 14

def build_event(**opts)
  log.debug("[metering][build_event] action=build provider=#{opts[:provider]} model=#{opts[:model_id]}")
  identity_fields(opts).merge(token_fields(opts)).merge(timing_and_context(opts))
end

.flush_spoolObject



23
24
25
# File 'lib/legion/llm/inference/steps/metering.rb', line 23

def flush_spool
  Legion::LLM::Metering.flush_spool
end

.identity_fields(opts) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
# File 'lib/legion/llm/inference/steps/metering.rb', line 27

def identity_fields(opts)
  {
    node_id:      opts[:node_id],
    worker_id:    opts[:worker_id],
    agent_id:     opts[:agent_id],
    request_type: opts[:request_type],
    tier:         opts[:tier],
    provider:     opts[:provider],
    model_id:     opts[:model_id]
  }
end

.publish_event(event) ⇒ Object



56
57
58
# File 'lib/legion/llm/inference/steps/metering.rb', line 56

def publish_event(event)
  Legion::LLM::Metering.emit(event)
end

.publish_or_spool(event) ⇒ Object



19
20
21
# File 'lib/legion/llm/inference/steps/metering.rb', line 19

def publish_or_spool(event)
  publish_event(event)
end

.timing_and_context(opts) ⇒ Object



47
48
49
50
51
52
53
54
# File 'lib/legion/llm/inference/steps/metering.rb', line 47

def timing_and_context(opts)
  {
    latency_ms:     opts.fetch(:latency_ms, 0),
    wall_clock_ms:  opts.fetch(:wall_clock_ms, 0),
    routing_reason: opts[:routing_reason],
    recorded_at:    Time.now.utc.iso8601
  }
end

.token_fields(opts) ⇒ Object



39
40
41
42
43
44
45
# File 'lib/legion/llm/inference/steps/metering.rb', line 39

def token_fields(opts)
  input    = opts.fetch(:input_tokens, 0)
  output   = opts.fetch(:output_tokens, 0)
  thinking = opts.fetch(:thinking_tokens, 0)
  { input_tokens: input, output_tokens: output, thinking_tokens: thinking,
    total_tokens: input + output + thinking }
end