Class: Request

Inherits:
Object
  • Object
show all
Defined in:
lib/Request.rb

Defined Under Namespace

Modules: InteractiveCloudflareRecovery Classes: CloudflareBlockedError

Constant Summary collapse

CLOUDFLARE_MITIGATION_VALUES =
%w[challenge block managed_challenge].freeze
CLOUDFLARE_RECOVERY_LIMIT =

Cap how many times a single self.URL call chain can fall through the Cloudflare-recovery branch, so a user who keeps saying yes to the prompt while Medium keeps blocking can’t loop forever.

5

Class Method Summary collapse

Class Method Details

.body(response) ⇒ Object



304
305
306
# File 'lib/Request.rb', line 304

def self.body(response)
  readBodyAsUTF8(response)
end

.cloudflareBlocked?(response) ⇒ Boolean

Cloudflare tags blocked responses via either the cf-mitigated header or the standard “Just a moment…” challenge HTML. We check both so we catch challenges even on Cloudflare deployments that don’t set the explicit header.

Returns:

  • (Boolean)


284
285
286
287
288
289
290
291
292
293
294
295
296
297
# File 'lib/Request.rb', line 284

def self.cloudflareBlocked?(response)
    return false if response.nil?
    code = response.code.to_i
    return false unless code == 403 || code == 503

    mitigated = response['cf-mitigated'].to_s.downcase
    return true if CLOUDFLARE_MITIGATION_VALUES.include?(mitigated)

    body = response.body.to_s
    return false if body.empty?
    body.include?('Just a moment...') ||
        body.include?('cf-error-details') ||
        body.include?('Attention Required')
end

.html(response) ⇒ Object



299
300
301
302
# File 'lib/Request.rb', line 299

def self.html(response)
  body = readBodyAsUTF8(response)
  body.nil? ? nil : Nokogiri::HTML(body)
end

.readBodyAsUTF8(response) ⇒ Object

Net::HTTP#read_body returns ASCII-8BIT (binary). Without an explicit UTF-8 tag, downstream parsers misinterpret multi-byte sequences: Nokogiri’s encoding sniffer falls back to ISO-8859-1 for inline <script> contents, which then mojibakes the embedded JSON (e.g. CJK comes back as garbage like “使” instead of “使”).



313
314
315
316
317
318
319
# File 'lib/Request.rb', line 313

def self.readBodyAsUTF8(response)
  return nil if response.nil? || response.code.to_i != 200
  body = response.read_body
  return body if body.nil? || body.empty?
  body.force_encoding(Encoding::UTF_8)
  body
end

.URL(url, method = 'GET', data = nil, retryCount = 0) ⇒ Object



174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
# File 'lib/Request.rb', line 174

def self.URL(url, method = 'GET', data = nil, retryCount = 0)
    retryCount += 1

    uri = URI(url)
    https = Net::HTTP.new(uri.host, uri.port)
    https.use_ssl = true

    # --- TLS / Certificate verification setup ---
    # Some OpenSSL builds/configs enable CRL checking, which can fail with:
    # "certificate verify failed (unable to get certificate CRL)".
    # Net::HTTP/OpenSSL does not automatically fetch CRLs, so we use a default
    # cert store and clear CRL-related flags to avoid hard failures while still
    # verifying the peer certificate.
    https.verify_mode = OpenSSL::SSL::VERIFY_PEER

    store = OpenSSL::X509::Store.new
    store.set_default_paths
    # Ensure no CRL-check flags are enabled by default
    store.flags = 0
    https.cert_store = store

    # Allow overriding CA bundle paths via environment variables if needed.
    if ENV['SSL_CERT_FILE'] && !ENV['SSL_CERT_FILE'].empty?
      https.ca_file = ENV['SSL_CERT_FILE']
    end
    if ENV['SSL_CERT_DIR'] && !ENV['SSL_CERT_DIR'].empty?
      https.ca_path = ENV['SSL_CERT_DIR']
    end

    # (Optional) timeouts to avoid hanging on network issues
    https.open_timeout = 10
    https.read_timeout = 30
    # --- end TLS setup ---

    if method.upcase == "GET"
        request = Net::HTTP::Get.new(uri)
    else
        request = Net::HTTP::Post.new(uri)
        request['Content-Type'] = 'application/json'
        if !data.nil?
            request.body = JSON.dump(data)
        end
    end

    request['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36 Edg/142.0.0.0';

    cookiesString = $cookies.reject { |_, value| value.nil? }
    .map { |key, value| "#{key}=#{value}" }
    .join("; ");

    if !cookiesString.nil? && cookiesString != ""
      request['Cookie'] = cookiesString;
    end

    response = https.request(request);

    setCookieString = response.get_fields('set-cookie');
    if !setCookieString.nil? && setCookieString != ""
      setCookies = setCookieString.map { |cookie| cookie.split('; ').first }.each_with_object({}) do |cookie, hash|
        key, value = cookie.split('=', 2) # Split by '=' into key and value
        hash[key] = value
      end;

      setCookies.each do |key, value|
        $cookies[key] = value
      end
    end

    if cloudflareBlocked?(response)
        # On every Cloudflare block — even when cookies are already
        # set — re-run the recovery flow on a TTY. ChromeAuth refreshes
        # sid/uid/cf_clearance/_cfuvid into $cookies + the cache, so
        # the next attempt usually succeeds. Bounded by retryCount so
        # a degenerate loop (user keeps clearing, Medium keeps blocking)
        # eventually surfaces the error. CI / non-TTY just raises.
        if retryCount <= CLOUDFLARE_RECOVERY_LIMIT && InteractiveCloudflareRecovery.available?
            if InteractiveCloudflareRecovery.run(url)
                return self.URL(url, method, data, retryCount)
            end
        end
        raise CloudflareBlockedError.new(response.code.to_i, url)
    end

    # 3XX Redirect
    if response.code.to_i == 429
      if retryCount >= 10
        raise "Error: Too Many Requests, blocked by Medium. URL: #{url}"
      else
        response = self.URL(url, method, data, retryCount);
      end
    elsif response.code.to_i >= 300 && response.code.to_i <= 399 && !response['location'].nil? && response['location'] != ''
        if retryCount >= 10
            raise "Error: Retry limit reached. URL: #{url}"
        else
            location = response['location']
            if !location.match? /^(http)/
                location = "#{uri.scheme}://#{uri.host}#{location}"
            end

            response = self.URL(location, method, data, retryCount)
        end
    end

    response
end