Class: CacheServer

Inherits:
Object
  • Object
show all
Defined in:
lib/cache_server.rb

Constant Summary collapse

AUTH0_BEARER_HEADER =
'Authorization'.freeze
CONTENT_TYPE_HEADER =
'Content-Type'.freeze
GET_CACHE_ENDPOINT =
'/v1/get-cache'.freeze
GET_DOM_ENDPOINT =
'/v1/get-dom'.freeze
PUT_CACHE_ENDPOINT =
'/v1/put-cache'.freeze
HTTP_OK =
200
GET_HTTP =

Persistent HTTP clients shared across all instances — reuses TCP connections instead of opening a new connection on every request.

Net::HTTP::Persistent.new(name: 'scraper_central_get_cache').tap do |h|
  h.verify_mode = OpenSSL::SSL::VERIFY_NONE
end
PUT_HTTP =
Net::HTTP::Persistent.new(name: 'scraper_central_put_cache').tap do |h|
  h.verify_mode = OpenSSL::SSL::VERIFY_NONE
end

Instance Method Summary collapse

Constructor Details

#initialize(args) ⇒ CacheServer

Returns a new instance of CacheServer.



26
27
28
29
30
31
32
33
34
35
# File 'lib/cache_server.rb', line 26

def initialize(args)
  @proxy_name = args[:proxy_name]
  @enable_js = args[:enable_js]
  @cache_duration = args[:cache_duration]
  @s3_key = args[:s3_key]
  @enable_image_cache = args[:enable_image_cache]
  @auth_config = args[:auth_config]
  @use_get_dom = args[:use_get_dom]
  @logger = Logger.new($stdout)
end

Instance Method Details

#get_cache(url) ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/cache_server.rb', line 37

def get_cache(url)
  base_url = ENV['SERVER_URL_GET_CACHE']
  endpoint = GET_CACHE_ENDPOINT
  if @use_get_dom
    put_cache = ENV['SERVER_URL_PUT_CACHE']
    base_url = put_cache.to_s != '' ? put_cache : ENV['SERVER_URL_GET_CACHE']
    endpoint = GET_DOM_ENDPOINT
    if base_url.nil? || base_url.empty?
      raise StandardError, 'SERVER_URL_PUT_CACHE or SERVER_URL_GET_CACHE must be set for get-dom'
    end
  end

  payload = prepare_get_cache_payload(url)

  uri = URI.parse("#{base_url}#{endpoint}")
  request = Net::HTTP::Get.new(uri.request_uri, auth_headers)
  request.body = payload.to_json

  begin
    response = GET_HTTP.request(uri, request)

    response_code = response.code&.to_i
    if @use_get_dom && response_code != HTTP_OK
      raise StandardError, "get-dom returned status #{response.code}: #{response.body}"
    end

    if response.content_type.include?('application/json')
      response_body = JSON.parse(response.body)
      return '', nil, proxy_from_server(response_body), 200 if response_body.key?('proxyUrl')
      page_from_server = response_body['body']
      if @enable_image_cache
        decoded_data = Base64.decode64(page_from_server)
        page_from_server = StringIO.new(decoded_data)
      end
      status_code = response_body['statusCode'] || 200
      return page_from_server, headers_from_server(response_body), nil, status_code
    elsif @use_get_dom
      raise StandardError, "get-dom returned unexpected content type #{response.content_type.inspect}"
    else
      @logger.error "Unexpected response type: #{response.content_type}, body: #{response.body}, code: #{response.code}"
    end
  rescue JSON::ParserError => e
    if @use_get_dom
      raise StandardError, "error unmarshaling get-dom response: #{e.message}"
    end
    @logger.error "Error sending request to server: #{e.message}"
  rescue StandardError => e
    if @use_get_dom
      @logger.error "Error sending request to server: #{e.message}"
      raise
    end
    @logger.error "Error sending request to server: #{e.message}"
  end
  ['', nil, nil, 200]
end

#put_cache(cache_key, page, headers, cookies, enable_image_cache) ⇒ Object



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/cache_server.rb', line 93

def put_cache(cache_key, page, headers, cookies, enable_image_cache)
  payload = {
    cacheKey: cache_key,
    page: page,
    headers: headers,
    cookies: cookies,
    enableImageCache: enable_image_cache
  }

  uri = URI.parse("#{ENV['SERVER_URL_PUT_CACHE']}#{PUT_CACHE_ENDPOINT}")
  request = Net::HTTP::Post.new(uri.request_uri, auth_headers)
  request.body = payload.to_json

  begin
    response = PUT_HTTP.request(uri, request)
    if response.code&.to_i != HTTP_OK
      error_message = "Server returned bad status: #{response.code}"
      @logger.error error_message
      raise StandardError, error_message
    end
  rescue StandardError => e
    @logger.error "Error sending cache to server: #{e.message}"
    raise e
  end
end