Class: Request

Inherits:
Object
  • Object
show all
Defined in:
lib/Request.rb

Defined Under Namespace

Modules: InteractiveCloudflareRecovery Classes: CloudflareBlockedError

Constant Summary collapse

CLOUDFLARE_MITIGATION_VALUES =
%w[challenge block managed_challenge].freeze
@@cloudflareInteractiveResolutionAttempted =
false

Class Method Summary collapse

Class Method Details

.body(response) ⇒ Object



266
267
268
# File 'lib/Request.rb', line 266

def self.body(response)
  readBodyAsUTF8(response)
end

.cloudflareBlocked?(response) ⇒ Boolean

Cloudflare tags blocked responses via either the cf-mitigated header or the standard “Just a moment…” challenge HTML. We check both so we catch challenges even on Cloudflare deployments that don’t set the explicit header.

Returns:

  • (Boolean)


246
247
248
249
250
251
252
253
254
255
256
257
258
259
# File 'lib/Request.rb', line 246

def self.cloudflareBlocked?(response)
    return false if response.nil?
    code = response.code.to_i
    return false unless code == 403 || code == 503

    mitigated = response['cf-mitigated'].to_s.downcase
    return true if CLOUDFLARE_MITIGATION_VALUES.include?(mitigated)

    body = response.body.to_s
    return false if body.empty?
    body.include?('Just a moment...') ||
        body.include?('cf-error-details') ||
        body.include?('Attention Required')
end

.html(response) ⇒ Object



261
262
263
264
# File 'lib/Request.rb', line 261

def self.html(response)
  body = readBodyAsUTF8(response)
  body.nil? ? nil : Nokogiri::HTML(body)
end

.readBodyAsUTF8(response) ⇒ Object

Net::HTTP#read_body returns ASCII-8BIT (binary). Without an explicit UTF-8 tag, downstream parsers misinterpret multi-byte sequences: Nokogiri’s encoding sniffer falls back to ISO-8859-1 for inline <script> contents, which then mojibakes the embedded JSON (e.g. CJK comes back as garbage like “使” instead of “使”).



275
276
277
278
279
280
281
# File 'lib/Request.rb', line 275

def self.readBodyAsUTF8(response)
  return nil if response.nil? || response.code.to_i != 200
  body = response.read_body
  return body if body.nil? || body.empty?
  body.force_encoding(Encoding::UTF_8)
  body
end

.resetCloudflareInteractiveResolution!Object

Test helper: reset the once-per-process recovery flag.



134
135
136
# File 'lib/Request.rb', line 134

def self.resetCloudflareInteractiveResolution!
    @@cloudflareInteractiveResolutionAttempted = false
end

.URL(url, method = 'GET', data = nil, retryCount = 0) ⇒ Object



138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# File 'lib/Request.rb', line 138

def self.URL(url, method = 'GET', data = nil, retryCount = 0)
    retryCount += 1

    uri = URI(url)
    https = Net::HTTP.new(uri.host, uri.port)
    https.use_ssl = true

    # --- TLS / Certificate verification setup ---
    # Some OpenSSL builds/configs enable CRL checking, which can fail with:
    # "certificate verify failed (unable to get certificate CRL)".
    # Net::HTTP/OpenSSL does not automatically fetch CRLs, so we use a default
    # cert store and clear CRL-related flags to avoid hard failures while still
    # verifying the peer certificate.
    https.verify_mode = OpenSSL::SSL::VERIFY_PEER

    store = OpenSSL::X509::Store.new
    store.set_default_paths
    # Ensure no CRL-check flags are enabled by default
    store.flags = 0
    https.cert_store = store

    # Allow overriding CA bundle paths via environment variables if needed.
    if ENV['SSL_CERT_FILE'] && !ENV['SSL_CERT_FILE'].empty?
      https.ca_file = ENV['SSL_CERT_FILE']
    end
    if ENV['SSL_CERT_DIR'] && !ENV['SSL_CERT_DIR'].empty?
      https.ca_path = ENV['SSL_CERT_DIR']
    end

    # (Optional) timeouts to avoid hanging on network issues
    https.open_timeout = 10
    https.read_timeout = 30
    # --- end TLS setup ---

    if method.upcase == "GET"
        request = Net::HTTP::Get.new(uri)
    else
        request = Net::HTTP::Post.new(uri)
        request['Content-Type'] = 'application/json'
        if !data.nil?
            request.body = JSON.dump(data)
        end
    end

    request['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36 Edg/142.0.0.0';

    cookiesString = $cookies.reject { |_, value| value.nil? }
    .map { |key, value| "#{key}=#{value}" }
    .join("; ");

    if !cookiesString.nil? && cookiesString != ""
      request['Cookie'] = cookiesString;
    end

    response = https.request(request);

    setCookieString = response.get_fields('set-cookie');
    if !setCookieString.nil? && setCookieString != ""
      setCookies = setCookieString.map { |cookie| cookie.split('; ').first }.each_with_object({}) do |cookie, hash|
        key, value = cookie.split('=', 2) # Split by '=' into key and value
        hash[key] = value
      end;

      setCookies.each do |key, value|
        $cookies[key] = value
      end
    end

    if cloudflareBlocked?(response)
        # Once-per-process: if we're on an interactive TTY, ask the user
        # to clear the challenge in a browser and retry. CI / non-TTY
        # environments fall straight through to the raise below.
        if !@@cloudflareInteractiveResolutionAttempted && InteractiveCloudflareRecovery.available?
            @@cloudflareInteractiveResolutionAttempted = true
            if InteractiveCloudflareRecovery.run(url)
                return self.URL(url, method, data, retryCount)
            end
        end
        raise CloudflareBlockedError.new(response.code.to_i, url)
    end

    # 3XX Redirect
    if response.code.to_i == 429
      if retryCount >= 10
        raise "Error: Too Many Requests, blocked by Medium. URL: #{url}"
      else
        response = self.URL(url, method, data, retryCount);
      end
    elsif response.code.to_i >= 300 && response.code.to_i <= 399 && !response['location'].nil? && response['location'] != ''
        if retryCount >= 10
            raise "Error: Retry limit reached. URL: #{url}"
        else
            location = response['location']
            if !location.match? /^(http)/
                location = "#{uri.scheme}://#{uri.host}#{location}"
            end

            response = self.URL(location, method, data, retryCount)
        end
    end

    response
end