Module: CurlImpersonate
- Defined in:
- lib/curl_impersonate.rb,
lib/curl_impersonate.rb,
lib/curl_impersonate/cookies.rb,
lib/curl_impersonate/version.rb,
lib/curl_impersonate/response.rb,
ext/curl_impersonate/curl_impersonate.c
Defined Under Namespace
Constant Summary collapse
- DEFAULT_IMPERSONATE =
"chrome131".freeze
- DEFAULT_TIMEOUT_SEC =
15- VERSION =
"0.1.1"
Class Method Summary collapse
-
._do_request_native(*args) ⇒ Object
Signature (stage 7): _do_request_native(url, impersonate, headers, post_data, follow_redirects, timeout_sec, proxy_url, proxy_userpwd) -> Response.
- ._native_curl_version ⇒ Object
- .do_request(url:, impersonate: DEFAULT_IMPERSONATE, headers: {}, post_data: "", follow_redirects: true, timeout_sec: DEFAULT_TIMEOUT_SEC, proxy: "") ⇒ Object
-
.extract_cookies(headers_str) ⇒ Object
Extracts cookies from a raw HTTP header string (the value of Response#headers).
-
.parse_proxy(proxy) ⇒ Object
Split “scheme://user:pass@host:port” into (“scheme://host:port”, “user:pass”).
Class Method Details
._do_request_native(*args) ⇒ Object
Signature (stage 7):
_do_request_native(url, impersonate, headers, post_data,
follow_redirects, timeout_sec,
proxy_url, proxy_userpwd) -> Response
url : String
impersonate : String (e.g. "chrome131")
headers : Hash<String, String>
post_data : String — empty string means GET, non-empty means POST
follow_redirects : true / false
timeout_sec : Integer
proxy_url : String — empty string disables; otherwise host (no auth)
proxy_userpwd : String — "user:pass" or empty
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
# File 'ext/curl_impersonate/curl_impersonate.c', line 115
static VALUE rb_cci_do_request(int argc, VALUE *argv, VALUE self) {
(void)self;
if (argc != 8) {
rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 8)", argc);
}
VALUE rb_url = argv[0];
VALUE rb_impersonate = argv[1];
VALUE rb_headers = argv[2];
VALUE rb_post_data = argv[3];
VALUE rb_follow = argv[4];
VALUE rb_timeout = argv[5];
VALUE rb_proxy_url = argv[6];
VALUE rb_proxy_userpwd = argv[7];
Check_Type(rb_url, T_STRING);
Check_Type(rb_impersonate, T_STRING);
Check_Type(rb_headers, T_HASH);
Check_Type(rb_post_data, T_STRING);
Check_Type(rb_proxy_url, T_STRING);
Check_Type(rb_proxy_userpwd, T_STRING);
const char *url = StringValueCStr(rb_url);
const char *impersonate = StringValueCStr(rb_impersonate);
long timeout_sec = NUM2LONG(rb_timeout);
long follow = RTEST(rb_follow) ? 1L : 0L;
CURL *handle = curl_easy_init();
if (!handle) {
rb_raise(eError, "curl_easy_init failed");
}
CURLcode rc = curl_easy_impersonate(handle, impersonate, 1);
if (rc != CURLE_OK) {
curl_easy_cleanup(handle);
rb_raise(eError, "curl_easy_impersonate(%s) failed: %s",
impersonate, curl_easy_strerror(rc));
}
struct buffer body, headers_buf;
buffer_init(&body);
buffer_init(&headers_buf);
curl_easy_setopt(handle, CURLOPT_URL, url);
curl_easy_setopt(handle, CURLOPT_TIMEOUT, timeout_sec);
curl_easy_setopt(handle, CURLOPT_FOLLOWLOCATION, follow);
curl_easy_setopt(handle, CURLOPT_SSL_VERIFYPEER, 0L);
curl_easy_setopt(handle, CURLOPT_SSL_VERIFYHOST, 0L);
/* Empty string enables all built-in encodings (gzip, br, zstd if available) —
* essential for matching browser Accept-Encoding fingerprint. */
curl_easy_setopt(handle, CURLOPT_ACCEPT_ENCODING, "");
curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, write_body_cb);
curl_easy_setopt(handle, CURLOPT_WRITEDATA, &body);
curl_easy_setopt(handle, CURLOPT_HEADERFUNCTION, write_header_cb);
curl_easy_setopt(handle, CURLOPT_HEADERDATA, &headers_buf);
/* POST body. We use COPYPOSTFIELDS so libcurl owns the copy and we can
* release the Ruby String after setopt returns. POSTFIELDSIZE is set
* explicitly so binary bodies with embedded NULs work. */
long post_len = RSTRING_LEN(rb_post_data);
if (post_len > 0) {
curl_easy_setopt(handle, CURLOPT_POSTFIELDSIZE, post_len);
curl_easy_setopt(handle, CURLOPT_COPYPOSTFIELDS, RSTRING_PTR(rb_post_data));
}
if (RSTRING_LEN(rb_proxy_url) > 0) {
curl_easy_setopt(handle, CURLOPT_PROXY, StringValueCStr(rb_proxy_url));
}
if (RSTRING_LEN(rb_proxy_userpwd) > 0) {
curl_easy_setopt(handle, CURLOPT_PROXYUSERPWD, StringValueCStr(rb_proxy_userpwd));
}
/* Custom headers. Build a curl_slist from the Ruby Hash. */
struct slist_build sb = { .list = NULL, .error = 0 };
if (RHASH_SIZE(rb_headers) > 0) {
rb_hash_foreach(rb_headers, build_header_slist, (VALUE)&sb);
if (sb.error) {
curl_slist_free_all(sb.list);
buffer_free(&body);
buffer_free(&headers_buf);
curl_easy_cleanup(handle);
rb_raise(eError, "failed to build header list (non-string key/value or OOM)");
}
curl_easy_setopt(handle, CURLOPT_HTTPHEADER, sb.list);
}
struct perform_args args = { .handle = handle, .result = CURLE_OK };
rb_thread_call_without_gvl(perform_without_gvl, &args, RUBY_UBF_IO, NULL);
if (args.result != CURLE_OK) {
char errbuf[256];
strncpy(errbuf, curl_easy_strerror(args.result), sizeof(errbuf) - 1);
errbuf[sizeof(errbuf) - 1] = '\0';
curl_slist_free_all(sb.list);
buffer_free(&body);
buffer_free(&headers_buf);
curl_easy_cleanup(handle);
rb_raise(eError, "curl_easy_perform failed: %s", errbuf);
}
long status_code = 0;
curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &status_code);
VALUE rb_body = rb_str_new(body.data ? body.data : "", body.len);
VALUE rb_hdr_str = rb_str_new(headers_buf.data ? headers_buf.data : "", headers_buf.len);
curl_slist_free_all(sb.list);
buffer_free(&body);
buffer_free(&headers_buf);
curl_easy_cleanup(handle);
return rb_struct_new(cResponse, LONG2NUM(status_code), rb_body, rb_hdr_str);
}
|
._native_curl_version ⇒ Object
96 97 98 99 |
# File 'ext/curl_impersonate/curl_impersonate.c', line 96
static VALUE rb_cci_native_version(VALUE self) {
(void)self;
return rb_str_new_cstr(curl_version());
}
|
.do_request(url:, impersonate: DEFAULT_IMPERSONATE, headers: {}, post_data: "", follow_redirects: true, timeout_sec: DEFAULT_TIMEOUT_SEC, proxy: "") ⇒ Object
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
# File 'lib/curl_impersonate.rb', line 18 def self.do_request(url:, impersonate: DEFAULT_IMPERSONATE, headers: {}, post_data: "", follow_redirects: true, timeout_sec: DEFAULT_TIMEOUT_SEC, proxy: "") string_headers = headers.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s } proxy_url, proxy_userpwd = parse_proxy(proxy.to_s) _do_request_native( url.to_s, impersonate.to_s, string_headers, post_data.to_s, follow_redirects ? true : false, Integer(timeout_sec), proxy_url, proxy_userpwd, ) end |
.extract_cookies(headers_str) ⇒ Object
Extracts cookies from a raw HTTP header string (the value of Response#headers). Returns a Hash<String, String>; later Set-Cookie lines with the same name overwrite earlier ones (matches the Go reference implementation).
Only the cookie name/value pair is kept — attributes such as Path, Domain, Expires, Secure are discarded.
8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
# File 'lib/curl_impersonate/cookies.rb', line 8 def self.(headers_str) = {} return if headers_str.nil? || headers_str.empty? headers_str.split(/\r?\n/).each do |line| next unless line.downcase.start_with?("set-cookie:") pair = line[("set-cookie:".length)..].split(";", 2).first.to_s.strip next if pair.empty? name, value = pair.split("=", 2) [name] = value.to_s if name && !name.empty? end end |
.parse_proxy(proxy) ⇒ Object
Split “scheme://user:pass@host:port” into (“scheme://host:port”, “user:pass”). Returns (“”, “”) for empty input. If there is no “@” the whole string is treated as the proxy URL with empty auth. Port-only or scheme-less inputs are passed through to libcurl, which has its own defaulting logic.
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/curl_impersonate.rb', line 43 def self.parse_proxy(proxy) return ["", ""] if proxy.nil? || proxy.empty? scheme = "" rest = proxy if (idx = proxy.index("://")) scheme = proxy[0..(idx + 2)] rest = proxy[(idx + 3)..] end if (idx = rest.rindex("@")) auth = rest[0...idx] host = rest[(idx + 1)..] [scheme + host, auth] else [proxy, ""] end end |