Class: Glib::JsonCrawler::Router

Inherits:
Object
  • Object
show all
Defined in:
lib/glib/json_crawler/router.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeRouter

Returns a new instance of Router.



54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/glib/json_crawler/router.rb', line 54

def initialize
  @depth = -1
  @logger = ''
  @visitor = Glib::Json::Traversal::Visitor.new(crawler_test: true)
  @read_only_actions = Set.new
  @http_actions = Set.new
  # default rails's development host
  @host ||= 'localhost:3000'
  @page_specs = []
  @page_urls = []
  @skip_similar_page = false
end

Instance Attribute Details

#deferred_actionsObject (readonly)

Returns the value of attribute deferred_actions.



7
8
9
# File 'lib/glib/json_crawler/router.rb', line 7

def deferred_actions
  @deferred_actions
end

#hostObject

Returns the value of attribute host.



9
10
11
# File 'lib/glib/json_crawler/router.rb', line 9

def host
  @host
end

#http_actionsObject (readonly)

Returns the value of attribute http_actions.



8
9
10
# File 'lib/glib/json_crawler/router.rb', line 8

def http_actions
  @http_actions
end

#last_logObject (readonly)

Returns the value of attribute last_log.



7
8
9
# File 'lib/glib/json_crawler/router.rb', line 7

def last_log
  @last_log
end

#loggerObject (readonly)

Returns the value of attribute logger.



7
8
9
# File 'lib/glib/json_crawler/router.rb', line 7

def logger
  @logger
end

#read_only_actionsObject (readonly)

deprecated



6
7
8
# File 'lib/glib/json_crawler/router.rb', line 6

def read_only_actions
  @read_only_actions
end

#skip_similar_pageObject

Returns the value of attribute skip_similar_page.



9
10
11
# File 'lib/glib/json_crawler/router.rb', line 9

def skip_similar_page
  @skip_similar_page
end

Instance Method Details

#_puts(text) ⇒ Object



29
30
31
# File 'lib/glib/json_crawler/router.rb', line 29

def _puts(text)
  puts '  ' * @depth + text
end

#allowed?(url) ⇒ Boolean

Returns:

  • (Boolean)


252
253
254
255
# File 'lib/glib/json_crawler/router.rb', line 252

def allowed?(url)
  regex = Regexp.new("#{host}.+(?<!\.pdf)$")
  regex.match(url)
end

#assert_target_ids_exist(args) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
# File 'lib/glib/json_crawler/router.rb', line 33

def assert_target_ids_exist(args)
  # This saves targetId so that later we can check to make sure that it indeed exists
  # within the page.
  if (target_ids = args['targetIds'])
    target_ids.each do |target_id|
      @visitor.defer_action(nil, target_id)
    end
  elsif (target_id = args['targetId'])
    @visitor.defer_action(nil, target_id)
  end
end

#begin_page(spec, url) ⇒ Object



232
233
234
235
236
# File 'lib/glib/json_crawler/router.rb', line 232

def begin_page(spec, url)
  @page_specs << spec
  @page_urls << url
  @visitor.begin_page(spec)
end

#crawl_multiple(views, block) ⇒ Object



228
229
230
# File 'lib/glib/json_crawler/router.rb', line 228

def crawl_multiple(views, block)
  @visitor.traverse_multiple views, block
end

#end_page(spec) ⇒ Object



238
239
240
241
242
# File 'lib/glib/json_crawler/router.rb', line 238

def end_page(spec)
  @page_specs.pop
  @page_urls.pop
  @visitor.end_page(spec)
end

#follow_v2(http, crawler_actions) ⇒ Object

@depth += 1

target_actions.each do |crawler_action|
  action, url = crawler_action
  http.get(url, action, {})
end

end



198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
# File 'lib/glib/json_crawler/router.rb', line 198

def follow_v2(http, crawler_actions)
  @depth += 1
  crawler_actions.each do |crawler_action|
    action, url, params = crawler_action

    # In full mode, wrap each action in a transaction that gets rolled back
    # to ensure database state is reset between each URL check (prevent database contamination)
    if ENV['GLIB_DISABLE_PERMISSION_TEST_SKIP'] == 'true'
      # This solution is important for permissions tests (not as much in the crawler tests),
      # because in permission tests, the user hits every single available URLs with a single purpose
      # of checking the permission of every URL, meaning that one incorrect result (e.g. 403 instead of 200 due to
      # side effect from previous URL requests) cannot be tolerated.
      #
      # On the other hand, crawler tests are expected to cover only one scenario anyway, so
      # having the scenario changed (due to side effects) is fine. We decided it's better not
      # to apply this solution for crawler tests out of performance considerations.
      ActiveRecord::Base.transaction do
        execute_crawler_action(http, action, url, params)
        raise ActiveRecord::Rollback
      end
    else
      # In skip mode, add the permission test parameter
      if url.present?
        url = add_params(url, __glib_permission_test: true)
      end
      execute_crawler_action(http, action, url, params)
    end
  end
end

#last_formObject



176
177
178
# File 'lib/glib/json_crawler/router.rb', line 176

def last_form
  @visitor.forms.last
end

#log(action, key_data, response = nil) ⇒ Object



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/glib/json_crawler/router.rb', line 11

def log(action, key_data, response = nil)
  # Sometimes `key_data` may not be an actual URL, e.g. in the context of dialogs_alert,
  # it is the alert message.
  if key_data&.start_with?('http://', 'https://')
    key_data = remove_params(key_data, [:__glib_permission_test])
  end

  @last_log = [
    action,
    response.present? ? response.code : nil,
    key_data
  ].compact.join(
    ' :: '
  )

  @logger += '  ' * @depth + @last_log + "\n"
end

#page_specObject



244
245
246
# File 'lib/glib/json_crawler/router.rb', line 244

def page_spec
  @page_specs.last
end

#page_urlObject



248
249
250
# File 'lib/glib/json_crawler/router.rb', line 248

def page_url
  @page_urls.last
end

#process_action(http, spec) ⇒ Object



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/glib/json_crawler/router.rb', line 100

def process_action(http, spec)
  action = spec&.fetch('action')
  params = spec

  assert_target_ids_exist(params)

  if action.present?
    return if similar_page?(params)

    @depth += 1
    case action
    when 'initiate_navigation'
      # @read_only_actions.add([action, params['url']])
      http_actions.add([action, params['url']])
      JsonCrawler::NavInitiate.new(http, params, action)
    when 'runMultiple-v1', 'runMultiple'
      JsonCrawler::RunMultiple.new(http, params, action)
    when 'windows/open-v1', 'dialogs/open-v1', 'windows/reload-v1', 'windows/open',
      'dialogs/open', 'windows/reload', 'windows/openWeb', 'windows/openWeb-v1'
      if allowed?(params['url'])
        # @read_only_actions.add([action, params['url']])
        http_actions.add([action, params['url']])
        JsonCrawler::WindowsOpen.new(http, params, action)
      else
        # IMPORTANT — do not drop the `http_actions.add` below.
        #
        # This `else` is reached by BOTH genuinely external links AND by
        # same-host file/download endpoints that `allowed?` rejected for the
        # file-extension rule (e.g. *.pdf). Recording the same-host ones is the
        # ONLY way their file authorization ever gets exercised: the permission
        # test replays each recorded action per user and snapshots the response.
        # Remove this line and every file/download endpoint silently falls out
        # of permission coverage -- a wrong authorization on one would then ship
        # unnoticed (exactly the gap this was added to close).
        #
        # We do NOT traverse/download them (no WindowsOpen crawler is created
        # here), and the permission replay runs with inspect_http:false so it
        # never follows the storage redirect. External links are filtered out by
        # `internal_url?`. Behaviour is guarded by
        # test/dummy-app/test/json_crawler/router_test.rb -- if you delete the
        # line below, that test goes red.
        http_actions.add([action, params['url']]) if internal_url?(params['url'])
        self.log action, params['url']
      end
    when 'dialogs/show-v1', 'dialogs/show', 'popovers/show-v1', 'popovers/show'
      JsonCrawler::DialogsShow.new(http, params, action)
    when 'sheets/select-v1', 'sheets/select'
      JsonCrawler::Menu.new(http, params, action)
    when 'http/post-v1', 'http/post'
      JsonCrawler::ActionHttp.new(:post, http, params, action)
    when 'forms/submit-v1', 'forms/submit'
      # forms = @visitor.forms
      # JsonCrawler::FormsSubmit.new(http, params, forms.last)
      JsonCrawler::FormsSubmit.new(http, params)
    when 'dialogs/alert-v1', 'dialogs/alert'
      JsonCrawler::DialogsAlert.new(http, params, action)
    when 'dialogs/close-v1', 'dialogs/close', 'popovers/close', 'popovers/close-v1'
      JsonCrawler::DialogsClose.new(http, params, action)
    else
      unless [
        'http/delete-v1',
        'dialogs/oauth-v1',
        'http/delete',
        'dialogs/oauth'
      ].include?(
        action
             )
        # @read_only_actions.add([action, params['url']])
        http_actions.add([action, params['url']])
      end
      self.log action, params['url']
    end
    @depth -= 1
  end
end

#should_defer_crawl?(action_crawler, args) ⇒ Boolean

Returns:

  • (Boolean)


45
46
47
48
49
50
51
52
# File 'lib/glib/json_crawler/router.rb', line 45

def should_defer_crawl?(action_crawler, args)
  if (target_id = args['targetId'])
    @visitor.defer_action(action_crawler, target_id)
    return true
  end

  false
end

#step(http, args) ⇒ Object



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/glib/json_crawler/router.rb', line 67

def step(http, args)
  # TODO: Refactor
  case args['view']
  when 'fields/submit-v1', 'fields/submit'
    @depth += 1
    # forms = @visitor.forms
    # JsonCrawler::FormsSubmit.new(http, args, forms.last)
    JsonCrawler::FormsSubmit.new(http, args)
    @depth -= 1
    return
  when 'panels/web-v1', 'panels/web'
    # A panels/web embeds content by URL in an inline viewer -- a file
    # preview (PDF/image), an inline HTML preview, etc. There's no onClick
    # action to catch here, so when the URL is one of our own (same-host)
    # endpoints we record it directly: the client fetches it to render the
    # panel, so its authorization should be exercised by the permission test
    # (it replays each per user and snapshots the response). We only record
    # (no fetch); external embeds are filtered out by internal_url?.
    url = args['url']
    http_actions.add(['panels/web-v1', url]) if url.present? && internal_url?(url)
  end

  if args.is_a?(Hash) && args['rel'] != 'nofollow'
    on_click = args.fetch('onClick', nil)

    if on_click && !args['disabled']
      process_action(http, on_click)
    end
  end

  # @read_only_actions.replace(@read_only_actions.sort_by { |e| e[1].to_s })
end