19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
# File 'lib/legion/llm/api/native/chat.rb', line 19
def self.registered(app)
log.debug('[llm][api][chat] registering POST /api/llm/chat')
app.post '/api/llm/chat' do
log.debug("[llm][api][chat] action=received params=#{params.keys}")
require_llm!
body = parse_request_body
validate_required!(body, :message)
message = body[:message]
if defined?(Legion::MCP::TierRouter)
log.debug('[llm][api][chat] action=tier_routing_check')
tier_result = Legion::MCP::TierRouter.route(
intent: message,
params: body.except(:message, :model, :provider, :request_id),
context: {}
)
if tier_result[:tier]&.zero?
log.info("[llm][api][chat] action=tier0_response request_id=#{body[:request_id] || 'generated'} latency_ms=#{tier_result[:latency_ms]}")
halt json_response({
response: tier_result[:response],
tier: 0,
latency_ms: tier_result[:latency_ms],
pattern_confidence: tier_result[:pattern_confidence]
})
end
end
request_id = body[:request_id] || SecureRandom.uuid
model = body[:model]
provider = body[:provider]
log.debug("[llm][api][chat] action=dispatch request_id=#{request_id} model=#{model || 'auto'} provider=#{provider || 'auto'}")
if cache_available? && env['HTTP_X_LEGION_SYNC'] != 'true'
log.debug("[llm][api][chat] action=async_dispatch request_id=#{request_id}")
llm = Legion::LLM
rc = Legion::LLM::Cache::Response
rc.init_request(request_id)
Chat::ASYNC_POOL.post do
session = llm.chat_direct(model: model, provider: provider)
response = session.ask(message)
rc.complete(
request_id,
response: response.content,
meta: {
model: session.model.to_s,
tokens_in: response.respond_to?(:input_tokens) ? response.input_tokens : nil,
tokens_out: response.respond_to?(:output_tokens) ? response.output_tokens : nil
}
)
log.debug("[llm][api][chat] action=async_complete request_id=#{request_id}")
rescue StandardError => e
handle_exception(e, level: :error, handled: true, operation: 'llm.api.chat.async', request_id: request_id)
rc.fail_request(request_id, code: 'llm_error', message: e.message)
end
log.info("[llm][api][chat] action=queued request_id=#{request_id}")
json_response({ request_id: request_id, poll_key: "llm:#{request_id}:status" },
status_code: 202)
else
log.debug("[llm][api][chat] action=sync_dispatch request_id=#{request_id}")
result = Legion::LLM.chat(
message: message,
model: model,
provider: provider,
caller: build_server_caller(source: 'api', path: request.path, env: env)
)
if result.is_a?(Legion::LLM::Inference::Response)
raw_msg = result.message
content = raw_msg.is_a?(Hash) ? (raw_msg[:content] || raw_msg['content']) : raw_msg.to_s
routing = result.routing || {}
resolved_model = routing[:model] || routing['model']
tokens = result.tokens || {}
log.info("[llm][api][chat] action=completed request_id=#{request_id} model=#{resolved_model}")
json_response(
{
response: content,
meta: {
model: resolved_model.to_s,
tokens_in: token_value(tokens, :input),
tokens_out: token_value(tokens, :output)
}
},
status_code: 201
)
else
response = result
log.info("[llm][api][chat] action=completed request_id=#{request_id} result_class=#{response.class}")
json_response(
{
response: response.respond_to?(:content) ? response.content : response.to_s,
meta: {
model: response.respond_to?(:model_id) ? response.model_id.to_s : model.to_s,
tokens_in: response.respond_to?(:input_tokens) ? response.input_tokens : nil,
tokens_out: response.respond_to?(:output_tokens) ? response.output_tokens : nil
}
},
status_code: 201
)
end
end
end
log.debug('[llm][api][chat] POST /api/llm/chat registered')
end
|