44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
# File 'lib/toy/serve/openai/handlers.rb', line 44
def handle(req, res)
res.["Content-Type"] = "application/json"
body = req.body
prompt_ids = SpinelKit::Json.get_int_array(body, "prompt")
if prompt_ids.length == 0
prompt_ids = SpinelKit::Json.get_int_array(body, "prompt_ids")
end
if prompt_ids.length == 0
res.set_status(400)
return "{\"error\":{\"message\":\"prompt must be a non-empty int array " +
"(this server speaks IDs only; tokenize client-side)\"," +
"\"type\":\"invalid_request_error\"}}\n"
end
n_new = 16
if SpinelKit::Json.has_key?(body, "max_tokens")
n_new = SpinelKit::Json.get_int(body, "max_tokens")
end
if n_new <= 0; n_new = 16; end
if n_new > 256; n_new = 256; end
t_start = TinyNN.tnn_events_now_seconds
new_ids = api_generate_ids(prompt_ids, n_new)
t_end = TinyNN.tnn_events_now_seconds
prompt_len = prompt_ids.length
completion_len = new_ids.length
latency_us = ((t_end - t_start) * 1.0e6).to_i
if TinyNN.tnn_events_active == 1
STATE.req_seq = STATE.req_seq + 1
req_id = "req-" + STATE.req_seq.to_s
ev = "{\"kind\":\"eval\",\"phase\":\"serve\""
ev = ev + ",\"t\":" + TinyNN.tnn_events_now_seconds.to_s
ev = ev + ",\"name\":\"request\""
ev = ev + ",\"extra\":{"
ev = ev + "\"model\":\"" + STATE.model_name + "\""
ev = ev + ",\"prompt_tokens\":" + prompt_len.to_s
ev = ev + ",\"completion_tokens\":" + completion_len.to_s
ev = ev + ",\"latency_us\":" + latency_us.to_s
ev = ev + ",\"sampling\":{\"max_tokens\":" + n_new.to_s + "}"
ev = ev + ",\"request_id\":\"" + req_id + "\""
ev = ev + "}}"
TinyNN.tnn_events_emit(ev)
end
"{" +
SpinelKit::Json.encode_pair_str("id", api_gen_id("cmpl")) + "," +
SpinelKit::Json.encode_pair_str("object", "text_completion") + "," +
SpinelKit::Json.encode_pair_int("created", api_now_unix) + "," +
SpinelKit::Json.encode_pair_str("model", STATE.model_name) + "," +
"\"choices\":[{\"index\":0," +
SpinelKit::Json.encode_pair_str("text", "") + "," +
"\"ids\":" + SpinelKit::Json.from_int_array(new_ids) + "," +
"\"finish_reason\":\"length\"}]," +
"\"usage\":{" +
SpinelKit::Json.encode_pair_int("prompt_tokens", prompt_len) + "," +
SpinelKit::Json.encode_pair_int("completion_tokens", completion_len) + "," +
SpinelKit::Json.encode_pair_int("total_tokens", prompt_len + completion_len) +
"}}\n"
end
|