Class: Crawlscope::Configuration

Inherits:
Object
  • Object
show all
Defined in:
lib/crawlscope/configuration.rb

Constant Summary collapse

DEFAULT_ALLOWED_STATUSES =
[200, 301, 302].freeze
DEFAULT_BROWSER_CONCURRENCY =
4
DEFAULT_BROWSER_NETWORK_IDLE_TIMEOUT_SECONDS =
5
DEFAULT_BROWSER_SCROLL_PAGE =
true
DEFAULT_CONCURRENCY =
10
DEFAULT_TIMEOUT_SECONDS =
20

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#allowed_statusesObject



14
15
16
17
# File 'lib/crawlscope/configuration.rb', line 14

def allowed_statuses
  value = resolve(@allowed_statuses)
  Array(value.nil? ? DEFAULT_ALLOWED_STATUSES : value).map(&:to_i)
end

#base_urlObject



19
20
21
# File 'lib/crawlscope/configuration.rb', line 19

def base_url
  resolve(@base_url)
end

#browser_factoryObject



23
24
25
# File 'lib/crawlscope/configuration.rb', line 23

def browser_factory
  resolve(@browser_factory)
end

#concurrencyObject



27
28
29
30
# File 'lib/crawlscope/configuration.rb', line 27

def concurrency
  value = resolve(@concurrency)
  value.nil? ? DEFAULT_CONCURRENCY : value.to_i
end

#network_idle_timeout_secondsObject



43
44
45
46
# File 'lib/crawlscope/configuration.rb', line 43

def network_idle_timeout_seconds
  value = resolve(@network_idle_timeout_seconds)
  value.nil? ? DEFAULT_BROWSER_NETWORK_IDLE_TIMEOUT_SECONDS : value.to_i
end

#outputObject



48
49
50
51
# File 'lib/crawlscope/configuration.rb', line 48

def output
  value = resolve(@output)
  value.nil? ? $stdout : value
end

#rendererObject



53
54
55
56
57
58
59
# File 'lib/crawlscope/configuration.rb', line 53

def renderer
  value = resolve(@renderer)
  normalized_value = value.to_s.strip
  normalized_value = "http" if normalized_value.empty?

  normalized_value.to_sym
end

#rule_registryObject



61
62
63
64
65
66
# File 'lib/crawlscope/configuration.rb', line 61

def rule_registry
  value = resolve(@rule_registry)
  return value unless value.nil?

  RuleRegistry.default(site_name: site_name)
end

#schema_registryObject



92
93
94
95
96
97
# File 'lib/crawlscope/configuration.rb', line 92

def schema_registry
  value = resolve(@schema_registry)
  return value unless value.nil?

  SchemaRegistry.default
end

#scroll_page=(value) ⇒ Object (writeonly)

Sets the attribute scroll_page

Parameters:

  • value

    the value to set the attribute scroll_page to.



12
13
14
# File 'lib/crawlscope/configuration.rb', line 12

def scroll_page=(value)
  @scroll_page = value
end

#site_nameObject



99
100
101
# File 'lib/crawlscope/configuration.rb', line 99

def site_name
  resolve(@site_name)
end

#sitemap_pathObject



108
109
110
# File 'lib/crawlscope/configuration.rb', line 108

def sitemap_path
  resolve(@sitemap_path)
end

#timeout_secondsObject



112
113
114
115
# File 'lib/crawlscope/configuration.rb', line 112

def timeout_seconds
  value = resolve(@timeout_seconds)
  value.nil? ? DEFAULT_TIMEOUT_SECONDS : value.to_i
end

Instance Method Details

#audit(base_url: self.base_url, sitemap_path: self.sitemap_path, rule_names: nil) ⇒ Object



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/crawlscope/configuration.rb', line 68

def audit(base_url: self.base_url, sitemap_path: self.sitemap_path, rule_names: nil)
  if base_url.to_s.strip.empty?
    raise ConfigurationError, "Crawlscope base_url is not configured"
  end

  if sitemap_path.to_s.strip.empty?
    raise ConfigurationError, "Crawlscope sitemap_path is not configured"
  end

  Audit.new(
    base_url: base_url,
    sitemap_path: sitemap_path,
    browser_factory: browser_factory,
    concurrency: concurrency,
    network_idle_timeout_seconds: network_idle_timeout_seconds,
    renderer: renderer,
    timeout_seconds: timeout_seconds,
    allowed_statuses: allowed_statuses,
    rules: rule_registry.rules_for(rule_names),
    schema_registry: schema_registry,
    scroll_page: scroll_page?
  )
end

#browser_concurrencyObject



32
33
34
35
36
37
38
39
40
41
# File 'lib/crawlscope/configuration.rb', line 32

def browser_concurrency
  value = concurrency
  default_value = DEFAULT_BROWSER_CONCURRENCY

  if value > default_value
    default_value
  else
    value
  end
end

#scroll_page?Boolean

Returns:

  • (Boolean)


103
104
105
106
# File 'lib/crawlscope/configuration.rb', line 103

def scroll_page?
  value = resolve(@scroll_page)
  value.nil? ? DEFAULT_BROWSER_SCROLL_PAGE : value
end