15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
|
# File 'lib/legion/llm/api/namespaces/openai/completions.rb', line 15
def self.registered(app)
log.debug('[llm][api][namespaces][openai][completions] registering routes')
app.post '/v1/completions' do
require_llm!
request_started_at = ::Process.clock_gettime(::Process::CLOCK_MONOTONIC)
body = parse_request_body
prompt = body[:prompt]
if prompt.nil? || (prompt.respond_to?(:empty?) && prompt.empty?)
return openai_error('prompt is required', type: 'invalid_request_error',
code: nil, status_code: 400)
end
request_id = SecureRandom.uuid
model = body[:model] || Legion::Settings[:llm][:default_model] || 'default'
messages = [{ role: 'user', content: prompt.to_s }]
log.info("[llm][api][namespaces][openai][completions] action=accepted request_id=#{request_id} model=#{model}")
inference_request = Legion::LLM::Inference::Request.build(
id: request_id,
messages: messages,
routing: { model: model },
tools: [],
caller: build_server_caller(source: 'openai_completions', path: request.path, env: env),
stream: false,
cache: { strategy: :default, cacheable: true }
)
pipeline_response = Legion::LLM::Inference::Executor.new(inference_request).call
routing = pipeline_response.routing || {}
tokens = pipeline_response.tokens || {}
raw_msg = pipeline_response.message
text = raw_msg.is_a?(Hash) ? (raw_msg[:content] || raw_msg['content']).to_s : raw_msg.to_s
resolved_model = (routing[:model] || routing['model'] || model).to_s
input_tokens = Completions.(tokens, :input_tokens)
output_tokens = Completions.(tokens, :output_tokens)
log_api_completion_summary(
namespace: 'namespaces][openai][completions',
request_id: request_id,
pipeline_response: pipeline_response,
stream: false,
started_at: request_started_at,
tool_calls: [],
stop_reason: 'stop'
)
content_type :json
status 200
Legion::JSON.dump({
id: "cmpl-#{request_id.delete('-')}",
object: 'text_completion',
created: Time.now.to_i,
model: resolved_model,
choices: [{ text: text, index: 0, finish_reason: 'stop' }],
usage: {
prompt_tokens: input_tokens,
completion_tokens: output_tokens,
total_tokens: input_tokens.to_i + output_tokens.to_i
}
})
rescue Legion::LLM::AuthError => e
handle_exception(e, level: :error, handled: true, operation: 'llm.api.namespaces.openai.completions.auth')
openai_error(e.message, type: 'authentication_error', status_code: 401)
rescue Legion::LLM::RateLimitError => e
handle_exception(e, level: :warn, handled: true, operation: 'llm.api.namespaces.openai.completions.rate_limit')
openai_error(e.message, type: 'rate_limit_error', code: 'rate_limit_exceeded', status_code: 429)
rescue Legion::LLM::ProviderDown, Legion::LLM::ProviderError => e
handle_exception(e, level: :error, handled: true, operation: 'llm.api.namespaces.openai.completions.provider')
openai_error(e.message, type: 'server_error', status_code: 502)
rescue StandardError => e
handle_exception(e, level: :error, handled: false, operation: 'llm.api.namespaces.openai.completions')
openai_error(e.message, type: 'server_error', status_code: 500)
end
log.debug('[llm][api][namespaces][openai][completions] routes registered')
rescue StandardError => e
handle_exception(e, level: :error, handled: false, operation: 'llm.api.namespaces.openai.completions.register')
end
|