Class: Hyperion::CParser
- Inherits:
-
Object
- Object
- Hyperion::CParser
- Defined in:
- ext/hyperion_http/parser.c
Constant Summary collapse
- PREINTERNED_HEADERS =
rb_aHeaderTable
Class Method Summary collapse
-
.build_access_line(format_sym, rb_ts, rb_method, rb_path, rb_query, rb_status, rb_duration, rb_remote, rb_http_version) ⇒ Object
Hyperion::CParser.build_access_line(format, ts, method, path, query, status, duration_ms, remote_addr, http_version) -> String.
-
.build_access_line_colored(format_sym, rb_ts, rb_method, rb_path, rb_query, rb_status, rb_duration, rb_remote, rb_http_version) ⇒ Object
Hyperion::CParser.build_access_line_colored(format, ts, method, path, query, status, duration_ms, remote_addr, http_version) -> String.
-
.build_env(env, request) ⇒ Object
Hyperion::CParser.build_env(env, request) -> env.
-
.build_response_head(rb_status, rb_reason, rb_headers, rb_body_size, rb_keep_alive, rb_date) ⇒ Object
Hyperion::CParser.build_response_head(status, reason, headers, body_size, keep_alive, date_str) -> String.
-
.chunked_body_complete?(rb_buffer, rb_body_start) ⇒ Boolean
Hyperion::CParser.chunked_body_complete?(buffer, body_start) -> [complete?, end_offset].
-
.parse_cookie_header(rb_cookie) ⇒ Object
Hyperion::CParser.parse_cookie_header(cookie_str) -> Hash.
-
.upcase_underscore(rb_name) ⇒ Object
Hyperion::CParser.upcase_underscore(name) -> “HTTP_<UPCASED_UNDERSCORED>”.
Instance Method Summary collapse
-
#parse(buffer) ⇒ Object
parse(buffer) -> [Request, end_offset].
Class Method Details
.build_access_line(format_sym, rb_ts, rb_method, rb_path, rb_query, rb_status, rb_duration, rb_remote, rb_http_version) ⇒ Object
Hyperion::CParser.build_access_line(format, ts, method, path, query,
status, duration_ms, remote_addr,
http_version) -> String
Hand-rolled access-log line builder used by Hyperion::Logger#access on the hot path. The Ruby version allocates 1-2 throwaway Strings per line; this builds the line into a stack scratch buffer (with rb_str_buf overflow for extreme cases) and returns a single Ruby String. ~10× faster on the common case, which closes the perf gap between log_requests on/off.
‘format` is :text or :json (Symbol). The format strings here mirror Logger#build_access_text / #build_access_json byte-for-byte (no colour —the C builder is only used when @colorize is false, i.e. non-TTY production deployments where access logs are the highest-volume log line).
String inputs are passed through verbatim. Access logs are best-effort structured output, not a security boundary; CRLF in path/remote_addr would be a log-injection nuisance but cannot escalate. Status (int) and duration_ms (double/Numeric) go through snprintf, which is type-safe.
885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 |
# File 'ext/hyperion_http/parser.c', line 885
static VALUE cbuild_access_line(VALUE self,
VALUE format_sym, VALUE rb_ts, VALUE rb_method,
VALUE rb_path, VALUE rb_query, VALUE rb_status,
VALUE rb_duration, VALUE rb_remote,
VALUE rb_http_version) {
(void)self;
Check_Type(rb_ts, T_STRING);
Check_Type(rb_method, T_STRING);
Check_Type(rb_path, T_STRING);
Check_Type(rb_http_version, T_STRING);
int is_json = (TYPE(format_sym) == T_SYMBOL) &&
(SYM2ID(format_sym) == rb_intern("json"));
int status = NUM2INT(rb_status);
double dur_ms = NUM2DBL(rb_duration);
int has_query = !NIL_P(rb_query) && RSTRING_LEN(rb_query) > 0;
int has_remote = !NIL_P(rb_remote) && RSTRING_LEN(rb_remote) > 0;
/* 1 KiB initial buffer covers the vast majority of access-log lines
* (timestamp + level + path + status + addr ~= 200 bytes). rb_str_cat
* grows on overflow.
*
* We use a CAT_LIT macro for literal-string appends so the compiler
* computes length via sizeof — manual byte counts on hand-rolled
* literal lengths are an off-by-one waiting to happen. */
#define CAT_LIT(b, s) rb_str_cat((b), (s), (long)(sizeof(s) - 1))
VALUE buf = rb_str_buf_new(512);
if (is_json) {
/* Prefix: {"ts":"...","level":"info","source":"hyperion","message":"request", */
CAT_LIT(buf, "{\"ts\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_ts), RSTRING_LEN(rb_ts));
CAT_LIT(buf, "\",\"level\":\"info\",\"source\":\"hyperion\",\"message\":\"request\",");
CAT_LIT(buf, "\"method\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_method), RSTRING_LEN(rb_method));
CAT_LIT(buf, "\",\"path\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_path), RSTRING_LEN(rb_path));
CAT_LIT(buf, "\"");
if (has_query) {
CAT_LIT(buf, ",\"query\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_query), RSTRING_LEN(rb_query));
CAT_LIT(buf, "\"");
}
char num[64];
int n = snprintf(num, sizeof(num), ",\"status\":%d,\"duration_ms\":%g,",
status, dur_ms);
rb_str_cat(buf, num, n);
if (has_remote) {
CAT_LIT(buf, "\"remote_addr\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_remote), RSTRING_LEN(rb_remote));
CAT_LIT(buf, "\",");
} else {
CAT_LIT(buf, "\"remote_addr\":null,");
}
CAT_LIT(buf, "\"http_version\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_http_version), RSTRING_LEN(rb_http_version));
CAT_LIT(buf, "\"}\n");
} else {
/* text: "<ts> INFO [hyperion] message=request method=... path=... [query=...] status=... duration_ms=... remote_addr=... http_version=...\n" */
rb_str_cat(buf, RSTRING_PTR(rb_ts), RSTRING_LEN(rb_ts));
CAT_LIT(buf, " INFO [hyperion] message=request method=");
rb_str_cat(buf, RSTRING_PTR(rb_method), RSTRING_LEN(rb_method));
CAT_LIT(buf, " path=");
rb_str_cat(buf, RSTRING_PTR(rb_path), RSTRING_LEN(rb_path));
if (has_query) {
/* Mirror Logger#quote_if_needed: quote if value contains
* whitespace, '"', or '='. Hot path skips quoting. */
const char *q_ptr = RSTRING_PTR(rb_query);
long q_len = RSTRING_LEN(rb_query);
int need_quote = 0;
for (long j = 0; j < q_len; j++) {
char c = q_ptr[j];
if (c == ' ' || c == '\t' || c == '\n' || c == '\r' ||
c == '"' || c == '=') {
need_quote = 1;
break;
}
}
if (need_quote) {
/* Defer to Ruby's String#inspect for correct quoting. */
VALUE quoted = rb_funcall(rb_query, rb_intern("inspect"), 0);
CAT_LIT(buf, " query=");
rb_str_cat(buf, RSTRING_PTR(quoted), RSTRING_LEN(quoted));
} else {
CAT_LIT(buf, " query=");
rb_str_cat(buf, q_ptr, q_len);
}
}
char num[80];
/* Use %g to match the existing Ruby format which interpolates
* Float#to_s (no fixed precision). Status is an int. */
int n = snprintf(num, sizeof(num), " status=%d duration_ms=%g remote_addr=",
status, dur_ms);
rb_str_cat(buf, num, n);
if (has_remote) {
rb_str_cat(buf, RSTRING_PTR(rb_remote), RSTRING_LEN(rb_remote));
} else {
CAT_LIT(buf, "nil");
}
CAT_LIT(buf, " http_version=");
rb_str_cat(buf, RSTRING_PTR(rb_http_version), RSTRING_LEN(rb_http_version));
CAT_LIT(buf, "\n");
}
return buf;
}
|
.build_access_line_colored(format_sym, rb_ts, rb_method, rb_path, rb_query, rb_status, rb_duration, rb_remote, rb_http_version) ⇒ Object
Hyperion::CParser.build_access_line_colored(format, ts, method, path, query,
status, duration_ms, remote_addr,
http_version) -> String
TTY-coloured variant of build_access_line. The text path wraps the level label with ANSI escape “e[32mINFO e[0m” so a developer running Hyperion in a terminal sees a green INFO tag. The :json branch is identical to the non-coloured builder — JSON access lines are machine-readable and never carry ANSI escapes.
Lifted from cbuild_access_line above; the only divergence is the level label injection in the text branch. We deliberately duplicate the text format rather than templating, because the text body is short and a single function with a colour flag would compile to the same code with an extra branch in the hot loop.
1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 |
# File 'ext/hyperion_http/parser.c', line 1020
static VALUE cbuild_access_line_colored(VALUE self,
VALUE format_sym, VALUE rb_ts,
VALUE rb_method, VALUE rb_path,
VALUE rb_query, VALUE rb_status,
VALUE rb_duration, VALUE rb_remote,
VALUE rb_http_version) {
(void)self;
Check_Type(rb_ts, T_STRING);
Check_Type(rb_method, T_STRING);
Check_Type(rb_path, T_STRING);
Check_Type(rb_http_version, T_STRING);
int is_json = (TYPE(format_sym) == T_SYMBOL) &&
(SYM2ID(format_sym) == rb_intern("json"));
int status = NUM2INT(rb_status);
double dur_ms = NUM2DBL(rb_duration);
int has_query = !NIL_P(rb_query) && RSTRING_LEN(rb_query) > 0;
int has_remote = !NIL_P(rb_remote) && RSTRING_LEN(rb_remote) > 0;
#define CAT_LIT(b, s) rb_str_cat((b), (s), (long)(sizeof(s) - 1))
VALUE buf = rb_str_buf_new(512);
if (is_json) {
/* JSON output is identical to the non-coloured path — ANSI escapes
* have no place in a structured log record. */
CAT_LIT(buf, "{\"ts\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_ts), RSTRING_LEN(rb_ts));
CAT_LIT(buf, "\",\"level\":\"info\",\"source\":\"hyperion\",\"message\":\"request\",");
CAT_LIT(buf, "\"method\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_method), RSTRING_LEN(rb_method));
CAT_LIT(buf, "\",\"path\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_path), RSTRING_LEN(rb_path));
CAT_LIT(buf, "\"");
if (has_query) {
CAT_LIT(buf, ",\"query\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_query), RSTRING_LEN(rb_query));
CAT_LIT(buf, "\"");
}
char num[64];
int n = snprintf(num, sizeof(num), ",\"status\":%d,\"duration_ms\":%g,",
status, dur_ms);
rb_str_cat(buf, num, n);
if (has_remote) {
CAT_LIT(buf, "\"remote_addr\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_remote), RSTRING_LEN(rb_remote));
CAT_LIT(buf, "\",");
} else {
CAT_LIT(buf, "\"remote_addr\":null,");
}
CAT_LIT(buf, "\"http_version\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_http_version), RSTRING_LEN(rb_http_version));
CAT_LIT(buf, "\"}\n");
} else {
/* text: "<ts> \e[32mINFO \e[0m [hyperion] message=request method=..." */
rb_str_cat(buf, RSTRING_PTR(rb_ts), RSTRING_LEN(rb_ts));
CAT_LIT(buf, " \x1b[32mINFO \x1b[0m [hyperion] message=request method=");
rb_str_cat(buf, RSTRING_PTR(rb_method), RSTRING_LEN(rb_method));
CAT_LIT(buf, " path=");
rb_str_cat(buf, RSTRING_PTR(rb_path), RSTRING_LEN(rb_path));
if (has_query) {
const char *q_ptr = RSTRING_PTR(rb_query);
long q_len = RSTRING_LEN(rb_query);
int need_quote = 0;
for (long j = 0; j < q_len; j++) {
char c = q_ptr[j];
if (c == ' ' || c == '\t' || c == '\n' || c == '\r' ||
c == '"' || c == '=') {
need_quote = 1;
break;
}
}
if (need_quote) {
VALUE quoted = rb_funcall(rb_query, rb_intern("inspect"), 0);
CAT_LIT(buf, " query=");
rb_str_cat(buf, RSTRING_PTR(quoted), RSTRING_LEN(quoted));
} else {
CAT_LIT(buf, " query=");
rb_str_cat(buf, q_ptr, q_len);
}
}
char num[80];
int n = snprintf(num, sizeof(num), " status=%d duration_ms=%g remote_addr=",
status, dur_ms);
rb_str_cat(buf, num, n);
if (has_remote) {
rb_str_cat(buf, RSTRING_PTR(rb_remote), RSTRING_LEN(rb_remote));
} else {
CAT_LIT(buf, "nil");
}
CAT_LIT(buf, " http_version=");
rb_str_cat(buf, RSTRING_PTR(rb_http_version), RSTRING_LEN(rb_http_version));
CAT_LIT(buf, "\n");
}
return buf;
}
|
.build_env(env, request) ⇒ Object
Hyperion::CParser.build_env(env, request) -> env
Phase 3a (1.7.1) — populate the Rack env hash with REQUEST_METHOD, PATH_INFO, QUERY_STRING, HTTP_VERSION, SERVER_PROTOCOL, CONTENT_TYPE, CONTENT_LENGTH, and HTTP_<UPCASED_UNDERSCORED> for every parsed header.
The Ruby caller (Hyperion::Adapter::Rack#build_env) sets the rest of the Rack-required keys (rack.input, REMOTE_ADDR, SERVER_NAME/PORT, …) since those need a StringIO from a pool and a peer-address split. The header loop is the bytewise-bound piece and the only thing worth pulling into C — moving the full env build would mean threading the pool, host splitter, and version constant through the FFI boundary for ~no extra win.
Returns the same env Hash (callers can either chain or ignore).
1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 |
# File 'ext/hyperion_http/parser.c', line 1410
static VALUE cbuild_env(VALUE self, VALUE env, VALUE request) {
(void)self;
Check_Type(env, T_HASH);
/* Read Request ivars directly — Request is a frozen value object set
* up in initialize; no risk of stale reads, no method-dispatch cost. */
VALUE method = rb_ivar_get(request, id_iv_method);
VALUE path = rb_ivar_get(request, id_iv_path);
VALUE query_string = rb_ivar_get(request, id_iv_query_string);
VALUE http_version = rb_ivar_get(request, id_iv_http_version);
VALUE headers = rb_ivar_get(request, id_iv_headers);
rb_hash_aset(env, rb_kREQUEST_METHOD, method);
rb_hash_aset(env, rb_kPATH_INFO, path);
rb_hash_aset(env, rb_kQUERY_STRING, query_string);
rb_hash_aset(env, rb_kSERVER_PROTOCOL, http_version);
rb_hash_aset(env, rb_kHTTP_VERSION, http_version);
if (TYPE(headers) == T_HASH) {
rb_hash_foreach(headers, build_env_iter, env);
}
return env;
}
|
.build_response_head(rb_status, rb_reason, rb_headers, rb_body_size, rb_keep_alive, rb_date) ⇒ Object
Hyperion::CParser.build_response_head(status, reason, headers, body_size,
keep_alive, date_str) -> String
Builds the HTTP/1.1 response head:
"HTTP/1.1 <status> <reason>\r\n"
"<lowercased-key>: <value>\r\n" for each user header (except
content-length / connection — we always set these from the framing
args below, mirroring the rc16 Ruby behaviour where the normalized
hash is overridden in place).
"content-length: <body_size>\r\n"
"connection: <close|keep-alive>\r\n"
"date: <date_str>\r\n" (only if user headers didn't include 'date')
"\r\n"
Header values containing CR/LF raise ArgumentError (response-splitting guard). Bypasses Ruby Hash#each + per-line String#<< allocation; the status line, framing headers, and join slices live in C buffers.
2.13-B — three CPU savings over the rc17 baseline:
1. Common (status, reason) pairs hit a static table of pre-baked
"HTTP/1.1 NNN <reason>\r\n" lines — one rb_str_cat replaces the
per-request snprintf + reason-cat + CRLF-cat triple.
2. Header iteration uses rb_hash_foreach instead of
`rb_funcall(:keys)` + per-key `rb_hash_aref` — eliminates the
keys-Array allocation and the N hash lookups per call.
3. Per-key downcase result + "<lc>: " prefix is cached on the
input frozen String's identity (capped at 64 entries; a
misbehaving app emitting unique keys per request just falls
back to the slow path on overflow). For the canonical Rack-3
app emitting `'content-type' / 'cache-control' / ...` from
frozen literals, every header lookup is a single st hit.
4. (key, value) full-line cache: both sides are frozen-literal
Strings (e.g. `'cache-control' => 'no-store'`) — entire
"<lc-key>: <value>\r\n" line is one rb_str_cat after the first
request populates the cache. Capped at 256 entries.
802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 |
# File 'ext/hyperion_http/parser.c', line 802
static VALUE cbuild_response_head(VALUE self, VALUE rb_status, VALUE rb_reason,
VALUE rb_headers, VALUE rb_body_size,
VALUE rb_keep_alive, VALUE rb_date) {
(void)self;
Check_Type(rb_headers, T_HASH);
Check_Type(rb_reason, T_STRING);
Check_Type(rb_date, T_STRING);
int status = NUM2INT(rb_status);
long body_size = NUM2LONG(rb_body_size);
int keep_alive = RTEST(rb_keep_alive);
/* Most heads fit in 1 KiB; rb_str_cat grows on demand. */
VALUE buf = rb_str_buf_new(1024);
/* Status line: pre-baked when (status, reason) is one of the well-known
* pairs in `Hyperion::ResponseWriter::REASONS`; falls back to
* `snprintf("HTTP/1.1 %d ", status)` + reason-cat for unknowns. */
const struct status_line *sline =
lookup_status_line(status, RSTRING_PTR(rb_reason), RSTRING_LEN(rb_reason));
if (sline != NULL) {
rb_str_cat(buf, sline->bytes, sline->len);
} else {
char status_line_buf[48];
int n = snprintf(status_line_buf, sizeof(status_line_buf), "HTTP/1.1 %d ", status);
rb_str_cat(buf, status_line_buf, n);
rb_str_cat(buf, RSTRING_PTR(rb_reason), RSTRING_LEN(rb_reason));
rb_str_cat(buf, "\r\n", 2);
}
/* Iterate user headers — lowercase key, validate value, skip framing.
* Threaded through rb_hash_foreach so we can reuse the per-key
* downcase cache and skip the per-call `keys` Array allocation. */
build_head_state_t state = { buf, 0 };
rb_hash_foreach(rb_headers, build_head_each, (VALUE)&state);
/* Framing headers — always emitted. content-length uses a hand-rolled
* itoa rather than snprintf (vfprintf was 1 % of CPU on the
* CPU-JSON profile). */
char itoa_scratch[24];
int cl_off = itoa_positive_decimal(body_size, itoa_scratch, (int)sizeof(itoa_scratch));
rb_str_cat(buf, "content-length: ", 16);
rb_str_cat(buf, itoa_scratch + cl_off, sizeof(itoa_scratch) - cl_off);
rb_str_cat(buf, "\r\n", 2);
if (keep_alive) {
rb_str_cat(buf, "connection: keep-alive\r\n", 24);
} else {
rb_str_cat(buf, "connection: close\r\n", 19);
}
if (!state.has_date) {
rb_str_cat(buf, "date: ", 6);
rb_str_cat(buf, RSTRING_PTR(rb_date), RSTRING_LEN(rb_date));
rb_str_cat(buf, "\r\n", 2);
}
/* End of head */
rb_str_cat(buf, "\r\n", 2);
return buf;
}
|
.chunked_body_complete?(rb_buffer, rb_body_start) ⇒ Boolean
Hyperion::CParser.chunked_body_complete?(buffer, body_start)
-> [complete?, end_offset]
Walks chunked-transfer framing in ‘buffer` starting at byte offset `body_start`. Returns a 2-element array:
[true, end_offset] — chunked body fully buffered; end_offset is the
byte just after the trailer CRLF (where pipelined
bytes from a follow-on request would begin).
[false, last_safe] — body is not yet complete; last_safe is the
furthest cursor we successfully advanced to,
useful as a hint for incremental parsing.
Mirrors Connection#chunked_body_complete? in pure Ruby — see lib/hyperion/ connection.rb. Trailing whitespace after the size token (e.g. “5 ; extrn”) is permitted as a permissive parse to match the upstream Ruby ‘.strip`.
1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 |
# File 'ext/hyperion_http/parser.c', line 1199
static VALUE cchunked_body_complete(VALUE self, VALUE rb_buffer, VALUE rb_body_start) {
(void)self;
Check_Type(rb_buffer, T_STRING);
const char *data = RSTRING_PTR(rb_buffer);
long len = RSTRING_LEN(rb_buffer);
long cursor = NUM2LONG(rb_body_start);
if (cursor < 0 || cursor > len) {
rb_raise(rb_eArgError, "body_start out of range");
}
long last_safe = cursor;
VALUE result = rb_ary_new_capa(2);
while (1) {
/* Find the next CRLF starting at cursor. */
long line_end = -1;
for (long i = cursor; i + 1 < len; i++) {
if (data[i] == '\r' && data[i + 1] == '\n') {
line_end = i;
break;
}
}
if (line_end < 0) {
rb_ary_push(result, Qfalse);
rb_ary_push(result, LONG2NUM(last_safe));
RB_GC_GUARD(rb_buffer);
return result;
}
/* Parse the size token: hex digits up to ';' or whitespace, optional
* chunk extension after ';' which we ignore wholesale. */
long tok_start = cursor;
long tok_end = line_end;
for (long i = cursor; i < line_end; i++) {
if (data[i] == ';') { tok_end = i; break; }
}
/* Trim leading/trailing ASCII whitespace from the token. */
while (tok_start < tok_end &&
(data[tok_start] == ' ' || data[tok_start] == '\t')) {
tok_start++;
}
while (tok_end > tok_start &&
(data[tok_end - 1] == ' ' || data[tok_end - 1] == '\t')) {
tok_end--;
}
if (tok_end <= tok_start) {
/* Empty size token — incomplete frame. */
rb_ary_push(result, Qfalse);
rb_ary_push(result, LONG2NUM(last_safe));
RB_GC_GUARD(rb_buffer);
return result;
}
/* Validate + decode hex. */
unsigned long size = 0;
for (long i = tok_start; i < tok_end; i++) {
unsigned char c = (unsigned char)data[i];
unsigned int digit;
if (c >= '0' && c <= '9') {
digit = c - '0';
} else if (c >= 'a' && c <= 'f') {
digit = 10 + (c - 'a');
} else if (c >= 'A' && c <= 'F') {
digit = 10 + (c - 'A');
} else {
/* Non-hex byte: incomplete/malformed. Match the Ruby
* regex `/\A\h+\z/` semantics — return false, advance no
* further. The caller will read more bytes and retry. */
rb_ary_push(result, Qfalse);
rb_ary_push(result, LONG2NUM(last_safe));
RB_GC_GUARD(rb_buffer);
return result;
}
size = (size << 4) | digit;
}
cursor = line_end + 2;
if (size == 0) {
/* Final chunk — walk trailer headers until we hit "\r\n\r\n"
* (i.e. an empty trailer line directly after the size line). */
while (1) {
long nl = -1;
for (long i = cursor; i + 1 < len; i++) {
if (data[i] == '\r' && data[i + 1] == '\n') {
nl = i;
break;
}
}
if (nl < 0) {
rb_ary_push(result, Qfalse);
rb_ary_push(result, LONG2NUM(last_safe));
RB_GC_GUARD(rb_buffer);
return result;
}
if (nl == cursor) {
/* Empty line — body complete. */
rb_ary_push(result, Qtrue);
rb_ary_push(result, LONG2NUM(nl + 2));
RB_GC_GUARD(rb_buffer);
return result;
}
cursor = nl + 2;
}
}
/* Need cursor + size + 2 bytes (chunk data + trailing CRLF). */
if ((unsigned long)(len - cursor) < size + 2) {
rb_ary_push(result, Qfalse);
rb_ary_push(result, LONG2NUM(last_safe));
RB_GC_GUARD(rb_buffer);
return result;
}
cursor += (long)size + 2;
last_safe = cursor;
}
}
|
.parse_cookie_header(rb_cookie) ⇒ Object
Hyperion::CParser.parse_cookie_header(cookie_str) -> Hash
Phase 3b (1.7.1) — split a single Cookie header value into its { “name” => “value” } pairs.
Standard format: “name1=val1; name2=val2; name3=val3”. Leading/trailing ASCII whitespace is trimmed around each pair and around each key. Empty values are valid. Pairs without ‘=` are skipped (RFC 6265 calls them ignorable). Repeated names are last-wins —middlewares that need RFC-strict merge can override.
Cookies are NOT URL-decoded by spec; values are opaque octets. We leave them verbatim. The returned Hash is mutable so the caller can extend it (e.g. for session-cookie hot-swaps).
1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 |
# File 'ext/hyperion_http/parser.c', line 1450
static VALUE cparse_cookie_header(VALUE self, VALUE rb_cookie) {
(void)self;
Check_Type(rb_cookie, T_STRING);
VALUE result = rb_hash_new();
const char *src = RSTRING_PTR(rb_cookie);
long src_len = RSTRING_LEN(rb_cookie);
long i = 0;
while (i < src_len) {
/* Skip leading whitespace and stray semicolons. */
while (i < src_len && (src[i] == ' ' || src[i] == '\t' ||
src[i] == ';')) {
i++;
}
if (i >= src_len) break;
/* Pair runs to next ';' (or end of string). */
long pair_start = i;
while (i < src_len && src[i] != ';') i++;
long pair_end = i;
/* Trim trailing whitespace inside the pair. */
while (pair_end > pair_start &&
(src[pair_end - 1] == ' ' || src[pair_end - 1] == '\t')) {
pair_end--;
}
if (pair_end == pair_start) continue;
/* Find '=' inside [pair_start, pair_end). */
long eq = -1;
for (long j = pair_start; j < pair_end; j++) {
if (src[j] == '=') { eq = j; break; }
}
if (eq < 0) continue; /* malformed — no '=' — skip per RFC 6265. */
/* Trim trailing ws on key (between pair_start and eq). */
long key_end = eq;
while (key_end > pair_start &&
(src[key_end - 1] == ' ' || src[key_end - 1] == '\t')) {
key_end--;
}
if (key_end == pair_start) continue; /* empty name — skip. */
/* Skip leading ws on value (between eq+1 and pair_end). */
long val_start = eq + 1;
while (val_start < pair_end &&
(src[val_start] == ' ' || src[val_start] == '\t')) {
val_start++;
}
VALUE key = rb_str_new(src + pair_start, key_end - pair_start);
VALUE val = rb_str_new(src + val_start, pair_end - val_start);
rb_hash_aset(result, key, val);
}
RB_GC_GUARD(rb_cookie);
return result;
}
|
.upcase_underscore(rb_name) ⇒ Object
Hyperion::CParser.upcase_underscore(name) -> “HTTP_<UPCASED_UNDERSCORED>”
Single-allocation replacement for ‘“HTTP_#’_’)”‘. Hot path on the Rack adapter: every uncached request header (any `X-*` custom header) hits this on every request, and the Ruby version spawns three String allocations (the upcase result, the tr result, and the “HTTP_…” interpolation) plus a per-byte loop in tr.
We allocate one Ruby String of length 5 + name.bytesize, fill it in a single byte loop, return it. ASCII letters get OR’d with 0x20 inverted (i.e. cleared bit 5 to upcase ‘a’..‘z’); ‘-’ becomes ‘_’; everything else passes through (header names are ASCII per RFC 9110, but multi-byte UTF-8 bytes pass through bytewise unmolested rather than crashing).
Encoding is set to US-ASCII because Ruby’s String#upcase on an ASCII-only input returns a US-ASCII string, and the env-key lookup downstream is encoding-agnostic anyway.
1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 |
# File 'ext/hyperion_http/parser.c', line 1147
static VALUE cupcase_underscore(VALUE self, VALUE rb_name) {
(void)self;
Check_Type(rb_name, T_STRING);
const char *src = RSTRING_PTR(rb_name);
long src_len = RSTRING_LEN(rb_name);
/* Single allocation: 5 prefix bytes + N source bytes. */
VALUE out = rb_str_new(NULL, 5 + src_len);
char *dst = RSTRING_PTR(out);
dst[0] = 'H';
dst[1] = 'T';
dst[2] = 'T';
dst[3] = 'P';
dst[4] = '_';
for (long i = 0; i < src_len; i++) {
unsigned char c = (unsigned char)src[i];
if (c >= 'a' && c <= 'z') {
dst[5 + i] = (char)(c - 32);
} else if (c == '-') {
dst[5 + i] = '_';
} else {
dst[5 + i] = (char)c;
}
}
rb_enc_associate(out, rb_usascii_encoding());
/* Keep rb_name live across the loop above. RSTRING_PTR returns an
* interior pointer that becomes invalid if the GC moves the source
* String — unlikely on this tight path, but cheap insurance. */
RB_GC_GUARD(rb_name);
return out;
}
|
Instance Method Details
#parse(buffer) ⇒ Object
parse(buffer) -> [Request, end_offset]
Parse one complete HTTP/1.1 request from ‘buffer`. If buffer doesn’t yet contain a complete request, raise ParseError(“incomplete”). For pipelined input, end_offset is the byte boundary of the first request — Connection carries the rest forward.
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 |
# File 'ext/hyperion_http/parser.c', line 372
static VALUE cparser_parse(VALUE self, VALUE buffer) {
Check_Type(buffer, T_STRING);
(void)self;
parser_state_t s;
state_init(&s);
llhttp_t parser;
llhttp_init(&parser, HTTP_REQUEST, &settings);
parser.data = &s;
const char *data = RSTRING_PTR(buffer);
size_t len = (size_t)RSTRING_LEN(buffer);
enum llhttp_errno err = llhttp_execute(&parser, data, len);
/* Custom error flags (set inside callbacks) take precedence. */
if (s.parse_error == 2) {
rb_raise(rb_eUnsupportedError, "%s", s.error_message);
}
if (s.parse_error == 1) {
rb_raise(rb_eParseError, "%s", s.error_message);
}
if (err == HPE_PAUSED_UPGRADE) {
rb_raise(rb_eUnsupportedError, "Upgrade not supported");
}
if (err != HPE_OK && err != HPE_PAUSED) {
const char *reason = llhttp_get_error_reason(&parser);
rb_raise(rb_eParseError, "llhttp: %s",
(reason && *reason) ? reason : llhttp_errno_name(err));
}
if (!s.message_complete) {
rb_raise(rb_eParseError, "incomplete request");
}
/* Compute end_offset. We pause inside on_message_complete, so
* llhttp_get_error_pos returns the byte just after the message
* boundary — exactly the carry-over offset we want. */
size_t consumed;
if (err == HPE_PAUSED) {
const char *epos = llhttp_get_error_pos(&parser);
consumed = epos ? (size_t)(epos - data) : len;
} else {
consumed = len;
}
/* 2.4-B (S1): Qnil-to-empty-String coercion for fields that the
* llhttp callbacks never touched (e.g. zero-length URL, GET with
* no body, HTTP/1.0 with no version detail). The frozen empty
* String is shared across every nil-coerced field — no allocation. */
VALUE method = NIL_P(s.method) ? rb_kEMPTY_STR : s.method;
VALUE path = NIL_P(s.path) ? rb_kEMPTY_STR : s.path;
VALUE query_string = NIL_P(s.query_string) ? rb_kEMPTY_STR : s.query_string;
VALUE http_version = NIL_P(s.http_version) ? rb_kHTTP_1_1 : s.http_version;
VALUE body = NIL_P(s.body) ? rb_kEMPTY_STR : s.body;
/* Build the Request. */
VALUE kwargs = rb_hash_new();
rb_hash_aset(kwargs, ID2SYM(id_method_kw), method);
rb_hash_aset(kwargs, ID2SYM(id_path_kw), path);
rb_hash_aset(kwargs, ID2SYM(id_query_string_kw), query_string);
rb_hash_aset(kwargs, ID2SYM(id_http_version_kw), http_version);
rb_hash_aset(kwargs, ID2SYM(id_headers_kw), s.headers);
rb_hash_aset(kwargs, ID2SYM(id_body_kw), body);
VALUE args[1] = { kwargs };
VALUE request = rb_funcallv_kw(rb_cRequest, id_new, 1, args, RB_PASS_KEYWORDS);
return rb_ary_new_from_args(2, request, ULONG2NUM((unsigned long)consumed));
}
|