Class: Crawlscope::Crawl

Inherits:
Object
  • Object
show all
Defined in:
lib/crawlscope/crawl.rb

Instance Method Summary collapse

Constructor Details

#initialize(base_url:, sitemap_path:, rules:, schema_registry:, browser_factory: nil, concurrency: Configuration::DEFAULT_CONCURRENCY, network_idle_timeout_seconds: Configuration::DEFAULT_BROWSER_NETWORK_IDLE_TIMEOUT_SECONDS, renderer: :http, scroll_page: Configuration::DEFAULT_BROWSER_SCROLL_PAGE, timeout_seconds: Configuration::DEFAULT_TIMEOUT_SECONDS, allowed_statuses: Configuration::DEFAULT_ALLOWED_STATUSES) ⇒ Crawl

Returns a new instance of Crawl.



5
6
7
8
9
10
11
12
13
14
15
16
17
# File 'lib/crawlscope/crawl.rb', line 5

def initialize(base_url:, sitemap_path:, rules:, schema_registry:, browser_factory: nil, concurrency: Configuration::DEFAULT_CONCURRENCY, network_idle_timeout_seconds: Configuration::DEFAULT_BROWSER_NETWORK_IDLE_TIMEOUT_SECONDS, renderer: :http, scroll_page: Configuration::DEFAULT_BROWSER_SCROLL_PAGE, timeout_seconds: Configuration::DEFAULT_TIMEOUT_SECONDS, allowed_statuses: Configuration::DEFAULT_ALLOWED_STATUSES)
  @base_url = base_url
  @sitemap_path = sitemap_path
  @rules = Array(rules)
  @schema_registry = schema_registry
  @browser_factory = browser_factory
  @concurrency = concurrency
  @network_idle_timeout_seconds = network_idle_timeout_seconds
  @renderer = renderer.to_sym
  @scroll_page = scroll_page
  @timeout_seconds = timeout_seconds
  @allowed_statuses = allowed_statuses
end

Instance Method Details

#callObject



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/crawlscope/crawl.rb', line 19

def call
  urls = sitemap_urls

  @page_fetcher = page
  pages = Crawler.new(page_fetcher: @page_fetcher, concurrency: @concurrency).call(urls)
  issues = IssueCollection.new

  collect(pages, issues)
  cache(pages)
  scan(urls, pages, issues)

  Result.new(
    base_url: @base_url,
    sitemap_path: @sitemap_path,
    urls: urls,
    pages: pages,
    issues: issues
  )
ensure
  @page_fetcher&.close
end