Class: Hyperion::CParser
- Inherits:
-
Object
- Object
- Hyperion::CParser
- Defined in:
- ext/hyperion_http/parser.c
Class Method Summary collapse
-
.build_access_line(format_sym, rb_ts, rb_method, rb_path, rb_query, rb_status, rb_duration, rb_remote, rb_http_version) ⇒ Object
Hyperion::CParser.build_access_line(format, ts, method, path, query, status, duration_ms, remote_addr, http_version) -> String.
-
.build_response_head(rb_status, rb_reason, rb_headers, rb_body_size, rb_keep_alive, rb_date) ⇒ Object
Hyperion::CParser.build_response_head(status, reason, headers, body_size, keep_alive, date_str) -> String.
Instance Method Summary collapse
-
#parse(buffer) ⇒ Object
parse(buffer) -> [Request, end_offset].
Class Method Details
.build_access_line(format_sym, rb_ts, rb_method, rb_path, rb_query, rb_status, rb_duration, rb_remote, rb_http_version) ⇒ Object
Hyperion::CParser.build_access_line(format, ts, method, path, query,
status, duration_ms, remote_addr,
http_version) -> String
Hand-rolled access-log line builder used by Hyperion::Logger#access on the hot path. The Ruby version allocates 1-2 throwaway Strings per line; this builds the line into a stack scratch buffer (with rb_str_buf overflow for extreme cases) and returns a single Ruby String. ~10× faster on the common case, which closes the perf gap between log_requests on/off.
‘format` is :text or :json (Symbol). The format strings here mirror Logger#build_access_text / #build_access_json byte-for-byte (no colour —the C builder is only used when @colorize is false, i.e. non-TTY production deployments where access logs are the highest-volume log line).
String inputs are passed through verbatim. Access logs are best-effort structured output, not a security boundary; CRLF in path/remote_addr would be a log-injection nuisance but cannot escalate. Status (int) and duration_ms (double/Numeric) go through snprintf, which is type-safe.
427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 |
# File 'ext/hyperion_http/parser.c', line 427
static VALUE cbuild_access_line(VALUE self,
VALUE format_sym, VALUE rb_ts, VALUE rb_method,
VALUE rb_path, VALUE rb_query, VALUE rb_status,
VALUE rb_duration, VALUE rb_remote,
VALUE rb_http_version) {
(void)self;
Check_Type(rb_ts, T_STRING);
Check_Type(rb_method, T_STRING);
Check_Type(rb_path, T_STRING);
Check_Type(rb_http_version, T_STRING);
int is_json = (TYPE(format_sym) == T_SYMBOL) &&
(SYM2ID(format_sym) == rb_intern("json"));
int status = NUM2INT(rb_status);
double dur_ms = NUM2DBL(rb_duration);
int has_query = !NIL_P(rb_query) && RSTRING_LEN(rb_query) > 0;
int has_remote = !NIL_P(rb_remote) && RSTRING_LEN(rb_remote) > 0;
/* 1 KiB initial buffer covers the vast majority of access-log lines
* (timestamp + level + path + status + addr ~= 200 bytes). rb_str_cat
* grows on overflow.
*
* We use a CAT_LIT macro for literal-string appends so the compiler
* computes length via sizeof — manual byte counts on hand-rolled
* literal lengths are an off-by-one waiting to happen. */
#define CAT_LIT(b, s) rb_str_cat((b), (s), (long)(sizeof(s) - 1))
VALUE buf = rb_str_buf_new(512);
if (is_json) {
/* Prefix: {"ts":"...","level":"info","source":"hyperion","message":"request", */
CAT_LIT(buf, "{\"ts\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_ts), RSTRING_LEN(rb_ts));
CAT_LIT(buf, "\",\"level\":\"info\",\"source\":\"hyperion\",\"message\":\"request\",");
CAT_LIT(buf, "\"method\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_method), RSTRING_LEN(rb_method));
CAT_LIT(buf, "\",\"path\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_path), RSTRING_LEN(rb_path));
CAT_LIT(buf, "\"");
if (has_query) {
CAT_LIT(buf, ",\"query\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_query), RSTRING_LEN(rb_query));
CAT_LIT(buf, "\"");
}
char num[64];
int n = snprintf(num, sizeof(num), ",\"status\":%d,\"duration_ms\":%g,",
status, dur_ms);
rb_str_cat(buf, num, n);
if (has_remote) {
CAT_LIT(buf, "\"remote_addr\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_remote), RSTRING_LEN(rb_remote));
CAT_LIT(buf, "\",");
} else {
CAT_LIT(buf, "\"remote_addr\":null,");
}
CAT_LIT(buf, "\"http_version\":\"");
rb_str_cat(buf, RSTRING_PTR(rb_http_version), RSTRING_LEN(rb_http_version));
CAT_LIT(buf, "\"}\n");
} else {
/* text: "<ts> INFO [hyperion] message=request method=... path=... [query=...] status=... duration_ms=... remote_addr=... http_version=...\n" */
rb_str_cat(buf, RSTRING_PTR(rb_ts), RSTRING_LEN(rb_ts));
CAT_LIT(buf, " INFO [hyperion] message=request method=");
rb_str_cat(buf, RSTRING_PTR(rb_method), RSTRING_LEN(rb_method));
CAT_LIT(buf, " path=");
rb_str_cat(buf, RSTRING_PTR(rb_path), RSTRING_LEN(rb_path));
if (has_query) {
/* Mirror Logger#quote_if_needed: quote if value contains
* whitespace, '"', or '='. Hot path skips quoting. */
const char *q_ptr = RSTRING_PTR(rb_query);
long q_len = RSTRING_LEN(rb_query);
int need_quote = 0;
for (long j = 0; j < q_len; j++) {
char c = q_ptr[j];
if (c == ' ' || c == '\t' || c == '\n' || c == '\r' ||
c == '"' || c == '=') {
need_quote = 1;
break;
}
}
if (need_quote) {
/* Defer to Ruby's String#inspect for correct quoting. */
VALUE quoted = rb_funcall(rb_query, rb_intern("inspect"), 0);
CAT_LIT(buf, " query=");
rb_str_cat(buf, RSTRING_PTR(quoted), RSTRING_LEN(quoted));
} else {
CAT_LIT(buf, " query=");
rb_str_cat(buf, q_ptr, q_len);
}
}
char num[80];
/* Use %g to match the existing Ruby format which interpolates
* Float#to_s (no fixed precision). Status is an int. */
int n = snprintf(num, sizeof(num), " status=%d duration_ms=%g remote_addr=",
status, dur_ms);
rb_str_cat(buf, num, n);
if (has_remote) {
rb_str_cat(buf, RSTRING_PTR(rb_remote), RSTRING_LEN(rb_remote));
} else {
CAT_LIT(buf, "nil");
}
CAT_LIT(buf, " http_version=");
rb_str_cat(buf, RSTRING_PTR(rb_http_version), RSTRING_LEN(rb_http_version));
CAT_LIT(buf, "\n");
}
return buf;
}
|
.build_response_head(rb_status, rb_reason, rb_headers, rb_body_size, rb_keep_alive, rb_date) ⇒ Object
Hyperion::CParser.build_response_head(status, reason, headers, body_size,
keep_alive, date_str) -> String
Builds the HTTP/1.1 response head:
"HTTP/1.1 <status> <reason>\r\n"
"<lowercased-key>: <value>\r\n" for each user header (except
content-length / connection — we always set these from the framing
args below, mirroring the rc16 Ruby behaviour where the normalized
hash is overridden in place).
"content-length: <body_size>\r\n"
"connection: <close|keep-alive>\r\n"
"date: <date_str>\r\n" (only if user headers didn't include 'date')
"\r\n"
Header values containing CR/LF raise ArgumentError (response-splitting guard). Bypasses Ruby Hash#each + per-line String#<< allocation; the status line, framing headers, and join slices live in C buffers.
320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 |
# File 'ext/hyperion_http/parser.c', line 320
static VALUE cbuild_response_head(VALUE self, VALUE rb_status, VALUE rb_reason,
VALUE rb_headers, VALUE rb_body_size,
VALUE rb_keep_alive, VALUE rb_date) {
(void)self;
Check_Type(rb_headers, T_HASH);
Check_Type(rb_reason, T_STRING);
Check_Type(rb_date, T_STRING);
int status = NUM2INT(rb_status);
long body_size = NUM2LONG(rb_body_size);
int keep_alive = RTEST(rb_keep_alive);
/* Most heads fit in 1 KiB; rb_str_cat grows on demand. */
VALUE buf = rb_str_buf_new(1024);
/* Status line: "HTTP/1.1 <status> <reason>\r\n" */
char status_line[48];
int n = snprintf(status_line, sizeof(status_line), "HTTP/1.1 %d ", status);
rb_str_cat(buf, status_line, n);
rb_str_cat(buf, RSTRING_PTR(rb_reason), RSTRING_LEN(rb_reason));
rb_str_cat(buf, "\r\n", 2);
/* Iterate user headers — lowercase key, validate value, skip framing. */
int has_date = 0;
VALUE keys = rb_funcall(rb_headers, rb_intern("keys"), 0);
long n_keys = RARRAY_LEN(keys);
for (long i = 0; i < n_keys; i++) {
VALUE k = rb_ary_entry(keys, i);
VALUE v = rb_hash_aref(rb_headers, k);
VALUE k_s = rb_obj_as_string(k);
VALUE v_s = rb_obj_as_string(v);
VALUE k_lower = rb_funcall(k_s, id_downcase, 0);
const char *k_ptr = RSTRING_PTR(k_lower);
long k_len = RSTRING_LEN(k_lower);
const char *v_ptr = RSTRING_PTR(v_s);
long v_len = RSTRING_LEN(v_s);
/* CRLF injection guard on value. */
for (long j = 0; j < v_len; j++) {
if (v_ptr[j] == '\r' || v_ptr[j] == '\n') {
rb_raise(rb_eArgError, "header %s contains CR/LF",
RSTRING_PTR(rb_inspect(k_lower)));
}
}
/* Drop user-supplied content-length / connection — we always set
* these unconditionally below (matches rc16 Ruby behaviour where
* the normalized hash overwrites in place). */
if (k_len == 14 && memcmp(k_ptr, "content-length", 14) == 0) continue;
if (k_len == 10 && memcmp(k_ptr, "connection", 10) == 0) continue;
if (k_len == 4 && memcmp(k_ptr, "date", 4) == 0) {
has_date = 1;
}
rb_str_cat(buf, k_ptr, k_len);
rb_str_cat(buf, ": ", 2);
rb_str_cat(buf, v_ptr, v_len);
rb_str_cat(buf, "\r\n", 2);
}
/* Framing headers — always emitted. */
char cl_buf[48];
n = snprintf(cl_buf, sizeof(cl_buf), "content-length: %ld\r\n", body_size);
rb_str_cat(buf, cl_buf, n);
if (keep_alive) {
rb_str_cat(buf, "connection: keep-alive\r\n", 24);
} else {
rb_str_cat(buf, "connection: close\r\n", 19);
}
if (!has_date) {
rb_str_cat(buf, "date: ", 6);
rb_str_cat(buf, RSTRING_PTR(rb_date), RSTRING_LEN(rb_date));
rb_str_cat(buf, "\r\n", 2);
}
/* End of head */
rb_str_cat(buf, "\r\n", 2);
return buf;
}
|
Instance Method Details
#parse(buffer) ⇒ Object
parse(buffer) -> [Request, end_offset]
Parse one complete HTTP/1.1 request from ‘buffer`. If buffer doesn’t yet contain a complete request, raise ParseError(“incomplete”). For pipelined input, end_offset is the byte boundary of the first request — Connection carries the rest forward.
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 |
# File 'ext/hyperion_http/parser.c', line 239
static VALUE cparser_parse(VALUE self, VALUE buffer) {
Check_Type(buffer, T_STRING);
(void)self;
parser_state_t s;
state_init(&s);
llhttp_t parser;
llhttp_init(&parser, HTTP_REQUEST, &settings);
parser.data = &s;
const char *data = RSTRING_PTR(buffer);
size_t len = (size_t)RSTRING_LEN(buffer);
enum llhttp_errno err = llhttp_execute(&parser, data, len);
/* Custom error flags (set inside callbacks) take precedence. */
if (s.parse_error == 2) {
rb_raise(rb_eUnsupportedError, "%s", s.error_message);
}
if (s.parse_error == 1) {
rb_raise(rb_eParseError, "%s", s.error_message);
}
if (err == HPE_PAUSED_UPGRADE) {
rb_raise(rb_eUnsupportedError, "Upgrade not supported");
}
if (err != HPE_OK && err != HPE_PAUSED) {
const char *reason = llhttp_get_error_reason(&parser);
rb_raise(rb_eParseError, "llhttp: %s",
(reason && *reason) ? reason : llhttp_errno_name(err));
}
if (!s.message_complete) {
rb_raise(rb_eParseError, "incomplete request");
}
/* Compute end_offset. We pause inside on_message_complete, so
* llhttp_get_error_pos returns the byte just after the message
* boundary — exactly the carry-over offset we want. */
size_t consumed;
if (err == HPE_PAUSED) {
const char *epos = llhttp_get_error_pos(&parser);
consumed = epos ? (size_t)(epos - data) : len;
} else {
consumed = len;
}
/* Build the Request. */
VALUE kwargs = rb_hash_new();
rb_hash_aset(kwargs, ID2SYM(id_method_kw), s.method);
rb_hash_aset(kwargs, ID2SYM(id_path_kw), s.path);
rb_hash_aset(kwargs, ID2SYM(id_query_string_kw), s.query_string);
rb_hash_aset(kwargs, ID2SYM(id_http_version_kw), s.http_version);
rb_hash_aset(kwargs, ID2SYM(id_headers_kw), s.headers);
rb_hash_aset(kwargs, ID2SYM(id_body_kw), s.body);
VALUE args[1] = { kwargs };
VALUE request = rb_funcallv_kw(rb_cRequest, id_new, 1, args, RB_PASS_KEYWORDS);
return rb_ary_new_from_args(2, request, ULONG2NUM((unsigned long)consumed));
}
|