Class: Hyperion::CParser
- Inherits:
-
Object
- Object
- Hyperion::CParser
- Defined in:
- ext/hyperion_http/parser.c
Constant Summary collapse
- PREINTERNED_HEADERS =
rb_aHeaderTable
Class Method Summary collapse
-
.build_access_line(format_sym, rb_ts, rb_method, rb_path, rb_query, rb_status, rb_duration, rb_remote, rb_http_version) ⇒ Object
Hyperion::CParser.build_access_line(format, ts, method, path, query, status, duration_ms, remote_addr, http_version) -> String.
-
.build_access_line_colored(format_sym, rb_ts, rb_method, rb_path, rb_query, rb_status, rb_duration, rb_remote, rb_http_version) ⇒ Object
Hyperion::CParser.build_access_line_colored(format, ts, method, path, query, status, duration_ms, remote_addr, http_version) -> String.
-
.build_env(env, request) ⇒ Object
Hyperion::CParser.build_env(env, request) -> env.
-
.build_response_head(rb_status, rb_reason, rb_headers, rb_body_size, rb_keep_alive, rb_date) ⇒ Object
Hyperion::CParser.build_response_head(status, reason, headers, body_size, keep_alive, date_str) -> String.
-
.chunked_body_complete?(rb_buffer, rb_body_start) ⇒ Boolean
Hyperion::CParser.chunked_body_complete?(buffer, body_start) -> [complete?, end_offset].
-
.parse_cookie_header(rb_cookie) ⇒ Object
Hyperion::CParser.parse_cookie_header(cookie_str) -> Hash.
-
.upcase_underscore(rb_name) ⇒ Object
Hyperion::CParser.upcase_underscore(name) -> “HTTP_<UPCASED_UNDERSCORED>”.
Instance Method Summary collapse
-
#parse(buffer) ⇒ Object
parse(buffer) -> [Request, end_offset].
Class Method Details
.build_access_line(format_sym, rb_ts, rb_method, rb_path, rb_query, rb_status, rb_duration, rb_remote, rb_http_version) ⇒ Object
Hyperion::CParser.build_access_line(format, ts, method, path, query,
status, duration_ms, remote_addr,
http_version) -> String
Hand-rolled access-log line builder used by Hyperion::Logger#access on the hot path. The Ruby version allocates 1-2 throwaway Strings per line; this builds the line into a stack scratch buffer (with rb_str_buf overflow for extreme cases) and returns a single Ruby String. ~10× faster on the common case, which closes the perf gap between log_requests on/off.
‘format` is :text or :json (Symbol). The format strings here mirror Logger#build_access_text / #build_access_json byte-for-byte (no colour —the C builder is only used when @colorize is false, i.e. non-TTY production deployments where access logs are the highest-volume log line).
String inputs are passed through verbatim. Access logs are best-effort structured output, not a security boundary; CRLF in path/remote_addr would be a log-injection nuisance but cannot escalate. Status (int) and duration_ms (double/Numeric) go through snprintf, which is type-safe.
1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 |
# File 'ext/hyperion_http/parser.c', line 1073
static VALUE cbuild_access_line(VALUE self,
VALUE format_sym, VALUE rb_ts, VALUE rb_method,
VALUE rb_path, VALUE rb_query, VALUE rb_status,
VALUE rb_duration, VALUE rb_remote,
VALUE rb_http_version) {
(void)self;
Check_Type(rb_ts, T_STRING);
Check_Type(rb_method, T_STRING);
Check_Type(rb_path, T_STRING);
Check_Type(rb_http_version, T_STRING);
int is_json = (TYPE(format_sym) == T_SYMBOL) &&
(SYM2ID(format_sym) == rb_intern("json"));
int status = NUM2INT(rb_status);
double dur_ms = NUM2DBL(rb_duration);
int has_query = !NIL_P(rb_query) && RSTRING_LEN(rb_query) > 0;
int has_remote = !NIL_P(rb_remote) && RSTRING_LEN(rb_remote) > 0;
/* 1 KiB initial buffer covers the vast majority of access-log lines
* (timestamp + level + path + status + addr ~= 200 bytes). rb_str_cat
* grows on overflow.
*
* We use a CAT_LIT macro for literal-string appends so the compiler
* computes length via sizeof — manual byte counts on hand-rolled
* literal lengths are an off-by-one waiting to happen. */
#define CAT_LIT(b, s) rb_str_cat((b), (s), (long)(sizeof(s) - 1))
VALUE buf = rb_str_buf_new(512);
if (is_json) {
/* Prefix: {"ts":"...","level":"info","source":"hyperion","message":"request", */
CAT_LIT(buf, "{\"ts\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_ts), RSTRING_LEN(rb_ts));
CAT_LIT(buf, "\",\"level\":\"info\",\"source\":\"hyperion\",\"message\":\"request\",");
CAT_LIT(buf, "\"method\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_method), RSTRING_LEN(rb_method));
CAT_LIT(buf, "\",\"path\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_path), RSTRING_LEN(rb_path));
CAT_LIT(buf, "\"");
if (has_query) {
CAT_LIT(buf, ",\"query\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_query), RSTRING_LEN(rb_query));
CAT_LIT(buf, "\"");
}
char num[64];
int n = snprintf(num, sizeof(num), ",\"status\":%d,\"duration_ms\":%g,",
status, dur_ms);
rb_str_cat(buf, num, n);
if (has_remote) {
CAT_LIT(buf, "\"remote_addr\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_remote), RSTRING_LEN(rb_remote));
CAT_LIT(buf, "\",");
} else {
CAT_LIT(buf, "\"remote_addr\":null,");
}
CAT_LIT(buf, "\"http_version\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_http_version), RSTRING_LEN(rb_http_version));
CAT_LIT(buf, "\"}\n");
} else {
/* text: "<ts> INFO [hyperion] message=request method=... path=... [query=...] status=... duration_ms=... remote_addr=... http_version=...\n" */
rb_str_cat(buf, RSTRING_PTR(rb_ts), RSTRING_LEN(rb_ts));
CAT_LIT(buf, " INFO [hyperion] message=request method=");
rb_str_cat(buf, RSTRING_PTR(rb_method), RSTRING_LEN(rb_method));
CAT_LIT(buf, " path=");
rb_str_cat(buf, RSTRING_PTR(rb_path), RSTRING_LEN(rb_path));
if (has_query) {
/* Mirror Logger#quote_if_needed: quote if value contains
* whitespace, '"', or '='. Hot path skips quoting. */
const char *q_ptr = RSTRING_PTR(rb_query);
long q_len = RSTRING_LEN(rb_query);
int need_quote = 0;
for (long j = 0; j < q_len; j++) {
char c = q_ptr[j];
if (c == ' ' || c == '\t' || c == '\n' || c == '\r' ||
c == '"' || c == '=') {
need_quote = 1;
break;
}
}
if (need_quote) {
/* Defer to Ruby's String#inspect for correct quoting. */
VALUE quoted = rb_funcall(rb_query, rb_intern("inspect"), 0);
CAT_LIT(buf, " query=");
rb_str_cat(buf, RSTRING_PTR(quoted), RSTRING_LEN(quoted));
} else {
CAT_LIT(buf, " query=");
rb_str_cat(buf, q_ptr, q_len);
}
}
char num[80];
/* Use %g to match the existing Ruby format which interpolates
* Float#to_s (no fixed precision). Status is an int. */
int n = snprintf(num, sizeof(num), " status=%d duration_ms=%g remote_addr=",
status, dur_ms);
rb_str_cat(buf, num, n);
if (has_remote) {
rb_str_cat(buf, RSTRING_PTR(rb_remote), RSTRING_LEN(rb_remote));
} else {
CAT_LIT(buf, "nil");
}
CAT_LIT(buf, " http_version=");
rb_str_cat(buf, RSTRING_PTR(rb_http_version), RSTRING_LEN(rb_http_version));
CAT_LIT(buf, "\n");
}
return buf;
}
|
.build_access_line_colored(format_sym, rb_ts, rb_method, rb_path, rb_query, rb_status, rb_duration, rb_remote, rb_http_version) ⇒ Object
Hyperion::CParser.build_access_line_colored(format, ts, method, path, query,
status, duration_ms, remote_addr,
http_version) -> String
TTY-coloured variant of build_access_line. The text path wraps the level label with ANSI escape “e[32mINFO e[0m” so a developer running Hyperion in a terminal sees a green INFO tag. The :json branch is identical to the non-coloured builder — JSON access lines are machine-readable and never carry ANSI escapes.
Lifted from cbuild_access_line above; the only divergence is the level label injection in the text branch. We deliberately duplicate the text format rather than templating, because the text body is short and a single function with a colour flag would compile to the same code with an extra branch in the hot loop.
1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 |
# File 'ext/hyperion_http/parser.c', line 1208
static VALUE cbuild_access_line_colored(VALUE self,
VALUE format_sym, VALUE rb_ts,
VALUE rb_method, VALUE rb_path,
VALUE rb_query, VALUE rb_status,
VALUE rb_duration, VALUE rb_remote,
VALUE rb_http_version) {
(void)self;
Check_Type(rb_ts, T_STRING);
Check_Type(rb_method, T_STRING);
Check_Type(rb_path, T_STRING);
Check_Type(rb_http_version, T_STRING);
int is_json = (TYPE(format_sym) == T_SYMBOL) &&
(SYM2ID(format_sym) == rb_intern("json"));
int status = NUM2INT(rb_status);
double dur_ms = NUM2DBL(rb_duration);
int has_query = !NIL_P(rb_query) && RSTRING_LEN(rb_query) > 0;
int has_remote = !NIL_P(rb_remote) && RSTRING_LEN(rb_remote) > 0;
#define CAT_LIT(b, s) rb_str_cat((b), (s), (long)(sizeof(s) - 1))
VALUE buf = rb_str_buf_new(512);
if (is_json) {
/* JSON output is identical to the non-coloured path — ANSI escapes
* have no place in a structured log record. */
CAT_LIT(buf, "{\"ts\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_ts), RSTRING_LEN(rb_ts));
CAT_LIT(buf, "\",\"level\":\"info\",\"source\":\"hyperion\",\"message\":\"request\",");
CAT_LIT(buf, "\"method\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_method), RSTRING_LEN(rb_method));
CAT_LIT(buf, "\",\"path\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_path), RSTRING_LEN(rb_path));
CAT_LIT(buf, "\"");
if (has_query) {
CAT_LIT(buf, ",\"query\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_query), RSTRING_LEN(rb_query));
CAT_LIT(buf, "\"");
}
char num[64];
int n = snprintf(num, sizeof(num), ",\"status\":%d,\"duration_ms\":%g,",
status, dur_ms);
rb_str_cat(buf, num, n);
if (has_remote) {
CAT_LIT(buf, "\"remote_addr\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_remote), RSTRING_LEN(rb_remote));
CAT_LIT(buf, "\",");
} else {
CAT_LIT(buf, "\"remote_addr\":null,");
}
CAT_LIT(buf, "\"http_version\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_http_version), RSTRING_LEN(rb_http_version));
CAT_LIT(buf, "\"}\n");
} else {
/* text: "<ts> \e[32mINFO \e[0m [hyperion] message=request method=..." */
rb_str_cat(buf, RSTRING_PTR(rb_ts), RSTRING_LEN(rb_ts));
CAT_LIT(buf, " \x1b[32mINFO \x1b[0m [hyperion] message=request method=");
rb_str_cat(buf, RSTRING_PTR(rb_method), RSTRING_LEN(rb_method));
CAT_LIT(buf, " path=");
rb_str_cat(buf, RSTRING_PTR(rb_path), RSTRING_LEN(rb_path));
if (has_query) {
const char *q_ptr = RSTRING_PTR(rb_query);
long q_len = RSTRING_LEN(rb_query);
int need_quote = 0;
for (long j = 0; j < q_len; j++) {
char c = q_ptr[j];
if (c == ' ' || c == '\t' || c == '\n' || c == '\r' ||
c == '"' || c == '=') {
need_quote = 1;
break;
}
}
if (need_quote) {
VALUE quoted = rb_funcall(rb_query, rb_intern("inspect"), 0);
CAT_LIT(buf, " query=");
rb_str_cat(buf, RSTRING_PTR(quoted), RSTRING_LEN(quoted));
} else {
CAT_LIT(buf, " query=");
rb_str_cat(buf, q_ptr, q_len);
}
}
char num[80];
int n = snprintf(num, sizeof(num), " status=%d duration_ms=%g remote_addr=",
status, dur_ms);
rb_str_cat(buf, num, n);
if (has_remote) {
rb_str_cat(buf, RSTRING_PTR(rb_remote), RSTRING_LEN(rb_remote));
} else {
CAT_LIT(buf, "nil");
}
CAT_LIT(buf, " http_version=");
rb_str_cat(buf, RSTRING_PTR(rb_http_version), RSTRING_LEN(rb_http_version));
CAT_LIT(buf, "\n");
}
return buf;
}
|
.build_env(env, request) ⇒ Object
Hyperion::CParser.build_env(env, request) -> env
Phase 3a (1.7.1) — populate the Rack env hash with REQUEST_METHOD, PATH_INFO, QUERY_STRING, HTTP_VERSION, SERVER_PROTOCOL, CONTENT_TYPE, CONTENT_LENGTH, and HTTP_<UPCASED_UNDERSCORED> for every parsed header.
The Ruby caller (Hyperion::Adapter::Rack#build_env) sets the rest of the Rack-required keys (rack.input, REMOTE_ADDR, SERVER_NAME/PORT, …) since those need a StringIO from a pool and a peer-address split. The header loop is the bytewise-bound piece and the only thing worth pulling into C — moving the full env build would mean threading the pool, host splitter, and version constant through the FFI boundary for ~no extra win.
Returns the same env Hash (callers can either chain or ignore).
1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 |
# File 'ext/hyperion_http/parser.c', line 1598
static VALUE cbuild_env(VALUE self, VALUE env, VALUE request) {
(void)self;
Check_Type(env, T_HASH);
/* Read Request ivars directly — Request is a frozen value object set
* up in initialize; no risk of stale reads, no method-dispatch cost. */
VALUE method = rb_ivar_get(request, id_iv_method);
VALUE path = rb_ivar_get(request, id_iv_path);
VALUE query_string = rb_ivar_get(request, id_iv_query_string);
VALUE http_version = rb_ivar_get(request, id_iv_http_version);
VALUE headers = rb_ivar_get(request, id_iv_headers);
rb_hash_aset(env, rb_kREQUEST_METHOD, method);
rb_hash_aset(env, rb_kPATH_INFO, path);
rb_hash_aset(env, rb_kQUERY_STRING, query_string);
rb_hash_aset(env, rb_kSERVER_PROTOCOL, http_version);
rb_hash_aset(env, rb_kHTTP_VERSION, http_version);
if (TYPE(headers) == T_HASH) {
rb_hash_foreach(headers, build_env_iter, env);
}
return env;
}
|
.build_response_head(rb_status, rb_reason, rb_headers, rb_body_size, rb_keep_alive, rb_date) ⇒ Object
Hyperion::CParser.build_response_head(status, reason, headers, body_size,
keep_alive, date_str) -> String
Builds the HTTP/1.1 response head:
"HTTP/1.1 <status> <reason>\r\n"
"<lowercased-key>: <value>\r\n" for each user header (except
content-length / connection — we always set these from the framing
args below, mirroring the rc16 Ruby behaviour where the normalized
hash is overridden in place).
"content-length: <body_size>\r\n"
"connection: <close|keep-alive>\r\n"
"date: <date_str>\r\n" (only if user headers didn't include 'date')
"\r\n"
Header values containing CR/LF raise ArgumentError (response-splitting guard). Bypasses Ruby Hash#each + per-line String#<< allocation; the status line, framing headers, and join slices live in C buffers.
2.13-B — three CPU savings over the rc17 baseline:
1. Common (status, reason) pairs hit a static table of pre-baked
"HTTP/1.1 NNN <reason>\r\n" lines — one rb_str_cat replaces the
per-request snprintf + reason-cat + CRLF-cat triple.
2. Header iteration uses rb_hash_foreach instead of
`rb_funcall(:keys)` + per-key `rb_hash_aref` — eliminates the
keys-Array allocation and the N hash lookups per call.
3. Per-key downcase result + "<lc>: " prefix is cached on the
input frozen String's identity (capped at 64 entries; a
misbehaving app emitting unique keys per request just falls
back to the slow path on overflow). For the canonical Rack-3
app emitting `'content-type' / 'cache-control' / ...` from
frozen literals, every header lookup is a single st hit.
4. (key, value) full-line cache: both sides are frozen-literal
Strings (e.g. `'cache-control' => 'no-store'`) — entire
"<lc-key>: <value>\r\n" line is one rb_str_cat after the first
request populates the cache. Capped at 256 entries.
938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 |
# File 'ext/hyperion_http/parser.c', line 938
static VALUE cbuild_response_head(VALUE self, VALUE rb_status, VALUE rb_reason,
VALUE rb_headers, VALUE rb_body_size,
VALUE rb_keep_alive, VALUE rb_date) {
(void)self;
Check_Type(rb_headers, T_HASH);
Check_Type(rb_reason, T_STRING);
Check_Type(rb_date, T_STRING);
int status = NUM2INT(rb_status);
long body_size = NUM2LONG(rb_body_size);
int keep_alive = RTEST(rb_keep_alive);
/* body_size == -1 is the chunked-encoding sentinel; any other
* negative value is a programming error (likely an integer
* underflow in a caller). Reject early with a clear message
* rather than silently treating -2 / -42 as chunked. */
if (body_size < -1) {
rb_raise(rb_eArgError,
"body_size must be >= 0 (or -1 for chunked sentinel), got %ld",
body_size);
}
/* body_size == -1 is the chunked-encoding sentinel (from
* hyperion_build_response_head_chunked). In this mode we emit
* "transfer-encoding: chunked\r\n" instead of "content-length: N\r\n"
* and suppress any user-supplied content-length / transfer-encoding
* headers (RFC 7230 §3.3.3 — they are mutually exclusive). */
int is_chunked = (body_size == -1);
/* Most heads fit in 1 KiB; rb_str_cat grows on demand. */
VALUE buf = rb_str_buf_new(1024);
/* Status line: pre-baked when (status, reason) is one of the well-known
* pairs in `Hyperion::ResponseWriter::REASONS`; falls back to
* `snprintf("HTTP/1.1 %d ", status)` + reason-cat for unknowns. */
const struct status_line *sline =
lookup_status_line(status, RSTRING_PTR(rb_reason), RSTRING_LEN(rb_reason));
if (sline != NULL) {
rb_str_cat(buf, sline->bytes, sline->len);
} else {
char status_line_buf[48];
int n = snprintf(status_line_buf, sizeof(status_line_buf), "HTTP/1.1 %d ", status);
rb_str_cat(buf, status_line_buf, n);
rb_str_cat(buf, RSTRING_PTR(rb_reason), RSTRING_LEN(rb_reason));
rb_str_cat(buf, "\r\n", 2);
}
/* Iterate user headers — lowercase key, validate value, skip framing.
* Threaded through rb_hash_foreach so we can reuse the per-key
* downcase cache and skip the per-call `keys` Array allocation.
* is_chunked is threaded through state so build_head_each can drop
* user-supplied transfer-encoding and content-length in chunked mode. */
build_head_state_t state = { buf, 0, is_chunked };
rb_hash_foreach(rb_headers, build_head_each, (VALUE)&state);
/* Framing headers — always emitted.
* Non-chunked: content-length uses a hand-rolled itoa rather than
* snprintf (vfprintf was 1 % of CPU on the CPU-JSON profile).
* Chunked: transfer-encoding: chunked (no content-length — RFC 7230 §3.3.3). */
if (is_chunked) {
rb_str_cat(buf, "transfer-encoding: chunked\r\n", 28);
} else {
char itoa_scratch[24];
int cl_off = itoa_positive_decimal(body_size, itoa_scratch, (int)sizeof(itoa_scratch));
rb_str_cat(buf, "content-length: ", 16);
rb_str_cat(buf, itoa_scratch + cl_off, sizeof(itoa_scratch) - cl_off);
rb_str_cat(buf, "\r\n", 2);
}
if (keep_alive) {
rb_str_cat(buf, "connection: keep-alive\r\n", 24);
} else {
rb_str_cat(buf, "connection: close\r\n", 19);
}
if (!state.has_date) {
rb_str_cat(buf, "date: ", 6);
rb_str_cat(buf, RSTRING_PTR(rb_date), RSTRING_LEN(rb_date));
rb_str_cat(buf, "\r\n", 2);
}
/* End of head */
rb_str_cat(buf, "\r\n", 2);
return buf;
}
|
.chunked_body_complete?(rb_buffer, rb_body_start) ⇒ Boolean
Hyperion::CParser.chunked_body_complete?(buffer, body_start)
-> [complete?, end_offset]
Walks chunked-transfer framing in ‘buffer` starting at byte offset `body_start`. Returns a 2-element array:
[true, end_offset] — chunked body fully buffered; end_offset is the
byte just after the trailer CRLF (where pipelined
bytes from a follow-on request would begin).
[false, last_safe] — body is not yet complete; last_safe is the
furthest cursor we successfully advanced to,
useful as a hint for incremental parsing.
Mirrors Connection#chunked_body_complete? in pure Ruby — see lib/hyperion/ connection.rb. Trailing whitespace after the size token (e.g. “5 ; extrn”) is permitted as a permissive parse to match the upstream Ruby ‘.strip`.
1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 |
# File 'ext/hyperion_http/parser.c', line 1387
static VALUE cchunked_body_complete(VALUE self, VALUE rb_buffer, VALUE rb_body_start) {
(void)self;
Check_Type(rb_buffer, T_STRING);
const char *data = RSTRING_PTR(rb_buffer);
long len = RSTRING_LEN(rb_buffer);
long cursor = NUM2LONG(rb_body_start);
if (cursor < 0 || cursor > len) {
rb_raise(rb_eArgError, "body_start out of range");
}
long last_safe = cursor;
VALUE result = rb_ary_new_capa(2);
while (1) {
/* Find the next CRLF starting at cursor. */
long line_end = -1;
for (long i = cursor; i + 1 < len; i++) {
if (data[i] == '\r' && data[i + 1] == '\n') {
line_end = i;
break;
}
}
if (line_end < 0) {
rb_ary_push(result, Qfalse);
rb_ary_push(result, LONG2NUM(last_safe));
RB_GC_GUARD(rb_buffer);
return result;
}
/* Parse the size token: hex digits up to ';' or whitespace, optional
* chunk extension after ';' which we ignore wholesale. */
long tok_start = cursor;
long tok_end = line_end;
for (long i = cursor; i < line_end; i++) {
if (data[i] == ';') { tok_end = i; break; }
}
/* Trim leading/trailing ASCII whitespace from the token. */
while (tok_start < tok_end &&
(data[tok_start] == ' ' || data[tok_start] == '\t')) {
tok_start++;
}
while (tok_end > tok_start &&
(data[tok_end - 1] == ' ' || data[tok_end - 1] == '\t')) {
tok_end--;
}
if (tok_end <= tok_start) {
/* Empty size token — incomplete frame. */
rb_ary_push(result, Qfalse);
rb_ary_push(result, LONG2NUM(last_safe));
RB_GC_GUARD(rb_buffer);
return result;
}
/* Validate + decode hex. */
unsigned long size = 0;
for (long i = tok_start; i < tok_end; i++) {
unsigned char c = (unsigned char)data[i];
unsigned int digit;
if (c >= '0' && c <= '9') {
digit = c - '0';
} else if (c >= 'a' && c <= 'f') {
digit = 10 + (c - 'a');
} else if (c >= 'A' && c <= 'F') {
digit = 10 + (c - 'A');
} else {
/* Non-hex byte: incomplete/malformed. Match the Ruby
* regex `/\A\h+\z/` semantics — return false, advance no
* further. The caller will read more bytes and retry. */
rb_ary_push(result, Qfalse);
rb_ary_push(result, LONG2NUM(last_safe));
RB_GC_GUARD(rb_buffer);
return result;
}
size = (size << 4) | digit;
}
cursor = line_end + 2;
if (size == 0) {
/* Final chunk — walk trailer headers until we hit "\r\n\r\n"
* (i.e. an empty trailer line directly after the size line). */
while (1) {
long nl = -1;
for (long i = cursor; i + 1 < len; i++) {
if (data[i] == '\r' && data[i + 1] == '\n') {
nl = i;
break;
}
}
if (nl < 0) {
rb_ary_push(result, Qfalse);
rb_ary_push(result, LONG2NUM(last_safe));
RB_GC_GUARD(rb_buffer);
return result;
}
if (nl == cursor) {
/* Empty line — body complete. */
rb_ary_push(result, Qtrue);
rb_ary_push(result, LONG2NUM(nl + 2));
RB_GC_GUARD(rb_buffer);
return result;
}
cursor = nl + 2;
}
}
/* Need cursor + size + 2 bytes (chunk data + trailing CRLF). */
if ((unsigned long)(len - cursor) < size + 2) {
rb_ary_push(result, Qfalse);
rb_ary_push(result, LONG2NUM(last_safe));
RB_GC_GUARD(rb_buffer);
return result;
}
cursor += (long)size + 2;
last_safe = cursor;
}
}
|
.parse_cookie_header(rb_cookie) ⇒ Object
Hyperion::CParser.parse_cookie_header(cookie_str) -> Hash
Phase 3b (1.7.1) — split a single Cookie header value into its { “name” => “value” } pairs.
Standard format: “name1=val1; name2=val2; name3=val3”. Leading/trailing ASCII whitespace is trimmed around each pair and around each key. Empty values are valid. Pairs without ‘=` are skipped (RFC 6265 calls them ignorable). Repeated names are last-wins —middlewares that need RFC-strict merge can override.
Cookies are NOT URL-decoded by spec; values are opaque octets. We leave them verbatim. The returned Hash is mutable so the caller can extend it (e.g. for session-cookie hot-swaps).
1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 |
# File 'ext/hyperion_http/parser.c', line 1638
static VALUE cparse_cookie_header(VALUE self, VALUE rb_cookie) {
(void)self;
Check_Type(rb_cookie, T_STRING);
VALUE result = rb_hash_new();
const char *src = RSTRING_PTR(rb_cookie);
long src_len = RSTRING_LEN(rb_cookie);
long i = 0;
while (i < src_len) {
/* Skip leading whitespace and stray semicolons. */
while (i < src_len && (src[i] == ' ' || src[i] == '\t' ||
src[i] == ';')) {
i++;
}
if (i >= src_len) break;
/* Pair runs to next ';' (or end of string). */
long pair_start = i;
while (i < src_len && src[i] != ';') i++;
long pair_end = i;
/* Trim trailing whitespace inside the pair. */
while (pair_end > pair_start &&
(src[pair_end - 1] == ' ' || src[pair_end - 1] == '\t')) {
pair_end--;
}
if (pair_end == pair_start) continue;
/* Find '=' inside [pair_start, pair_end). */
long eq = -1;
for (long j = pair_start; j < pair_end; j++) {
if (src[j] == '=') { eq = j; break; }
}
if (eq < 0) continue; /* malformed — no '=' — skip per RFC 6265. */
/* Trim trailing ws on key (between pair_start and eq). */
long key_end = eq;
while (key_end > pair_start &&
(src[key_end - 1] == ' ' || src[key_end - 1] == '\t')) {
key_end--;
}
if (key_end == pair_start) continue; /* empty name — skip. */
/* Skip leading ws on value (between eq+1 and pair_end). */
long val_start = eq + 1;
while (val_start < pair_end &&
(src[val_start] == ' ' || src[val_start] == '\t')) {
val_start++;
}
VALUE key = rb_str_new(src + pair_start, key_end - pair_start);
VALUE val = rb_str_new(src + val_start, pair_end - val_start);
rb_hash_aset(result, key, val);
}
RB_GC_GUARD(rb_cookie);
return result;
}
|
.upcase_underscore(rb_name) ⇒ Object
Hyperion::CParser.upcase_underscore(name) -> “HTTP_<UPCASED_UNDERSCORED>”
Single-allocation replacement for ‘“HTTP_#’_’)”‘. Hot path on the Rack adapter: every uncached request header (any `X-*` custom header) hits this on every request, and the Ruby version spawns three String allocations (the upcase result, the tr result, and the “HTTP_…” interpolation) plus a per-byte loop in tr.
We allocate one Ruby String of length 5 + name.bytesize, fill it in a single byte loop, return it. ASCII letters get OR’d with 0x20 inverted (i.e. cleared bit 5 to upcase ‘a’..‘z’); ‘-’ becomes ‘_’; everything else passes through (header names are ASCII per RFC 9110, but multi-byte UTF-8 bytes pass through bytewise unmolested rather than crashing).
Encoding is set to US-ASCII because Ruby’s String#upcase on an ASCII-only input returns a US-ASCII string, and the env-key lookup downstream is encoding-agnostic anyway.
1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 |
# File 'ext/hyperion_http/parser.c', line 1335
static VALUE cupcase_underscore(VALUE self, VALUE rb_name) {
(void)self;
Check_Type(rb_name, T_STRING);
const char *src = RSTRING_PTR(rb_name);
long src_len = RSTRING_LEN(rb_name);
/* Single allocation: 5 prefix bytes + N source bytes. */
VALUE out = rb_str_new(NULL, 5 + src_len);
char *dst = RSTRING_PTR(out);
dst[0] = 'H';
dst[1] = 'T';
dst[2] = 'T';
dst[3] = 'P';
dst[4] = '_';
for (long i = 0; i < src_len; i++) {
unsigned char c = (unsigned char)src[i];
if (c >= 'a' && c <= 'z') {
dst[5 + i] = (char)(c - 32);
} else if (c == '-') {
dst[5 + i] = '_';
} else {
dst[5 + i] = (char)c;
}
}
rb_enc_associate(out, rb_usascii_encoding());
/* Keep rb_name live across the loop above. RSTRING_PTR returns an
* interior pointer that becomes invalid if the GC moves the source
* String — unlikely on this tight path, but cheap insurance. */
RB_GC_GUARD(rb_name);
return out;
}
|
Instance Method Details
#parse(buffer) ⇒ Object
parse(buffer) -> [Request, end_offset]
Parse one complete HTTP/1.1 request from ‘buffer`. If buffer doesn’t yet contain a complete request, raise ParseError(“incomplete”). For pipelined input, end_offset is the byte boundary of the first request — Connection carries the rest forward.
493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 |
# File 'ext/hyperion_http/parser.c', line 493
static VALUE cparser_parse(VALUE self, VALUE buffer) {
Check_Type(buffer, T_STRING);
(void)self;
parser_state_t s;
state_init(&s);
llhttp_t parser;
llhttp_init(&parser, HTTP_REQUEST, &settings);
parser.data = &s;
const char *data = RSTRING_PTR(buffer);
size_t len = (size_t)RSTRING_LEN(buffer);
enum llhttp_errno err = llhttp_execute(&parser, data, len);
/* Custom error flags (set inside callbacks) take precedence. */
if (s.parse_error == 2) {
rb_raise(rb_eUnsupportedError, "%s", s.error_message);
}
if (s.parse_error == 1) {
rb_raise(rb_eParseError, "%s", s.error_message);
}
if (err == HPE_PAUSED_UPGRADE) {
rb_raise(rb_eUnsupportedError, "Upgrade not supported");
}
if (err != HPE_OK && err != HPE_PAUSED) {
const char *reason = llhttp_get_error_reason(&parser);
rb_raise(rb_eParseError, "llhttp: %s",
(reason && *reason) ? reason : llhttp_errno_name(err));
}
if (!s.message_complete) {
rb_raise(rb_eParseError, "incomplete request");
}
/* Compute end_offset. We pause inside on_message_complete, so
* llhttp_get_error_pos returns the byte just after the message
* boundary — exactly the carry-over offset we want. */
size_t consumed;
if (err == HPE_PAUSED) {
const char *epos = llhttp_get_error_pos(&parser);
consumed = epos ? (size_t)(epos - data) : len;
} else {
consumed = len;
}
/* 2.4-B (S1): Qnil-to-empty-String coercion for fields that the
* llhttp callbacks never touched (e.g. zero-length URL, GET with
* no body, HTTP/1.0 with no version detail). The frozen empty
* String is shared across every nil-coerced field — no allocation. */
VALUE method = NIL_P(s.method) ? rb_kEMPTY_STR : s.method;
VALUE path = NIL_P(s.path) ? rb_kEMPTY_STR : s.path;
VALUE query_string = NIL_P(s.query_string) ? rb_kEMPTY_STR : s.query_string;
VALUE http_version = NIL_P(s.http_version) ? rb_kHTTP_1_1 : s.http_version;
VALUE body = NIL_P(s.body) ? rb_kEMPTY_STR : s.body;
/* Build the Request. */
VALUE kwargs = rb_hash_new();
rb_hash_aset(kwargs, ID2SYM(id_method_kw), method);
rb_hash_aset(kwargs, ID2SYM(id_path_kw), path);
rb_hash_aset(kwargs, ID2SYM(id_query_string_kw), query_string);
rb_hash_aset(kwargs, ID2SYM(id_http_version_kw), http_version);
rb_hash_aset(kwargs, ID2SYM(id_headers_kw), s.headers);
rb_hash_aset(kwargs, ID2SYM(id_body_kw), body);
VALUE args[1] = { kwargs };
VALUE request = rb_funcallv_kw(rb_cRequest, id_new, 1, args, RB_PASS_KEYWORDS);
return rb_ary_new_from_args(2, request, ULONG2NUM((unsigned long)consumed));
}
|