Class: Crawlscope::Configuration

Inherits:
Object
  • Object
show all
Defined in:
lib/crawlscope/configuration.rb

Constant Summary collapse

DEFAULT_ALLOWED_STATUSES =
[200, 301, 302].freeze
DEFAULT_BROWSER_CONCURRENCY =
4
DEFAULT_BROWSER_NETWORK_IDLE_TIMEOUT_SECONDS =
5
DEFAULT_BROWSER_SCROLL_PAGE =
true
DEFAULT_CONCURRENCY =
10
RENDERERS =
%i[http browser].freeze
DEFAULT_TIMEOUT_SECONDS =
20

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#allowed_statusesObject



15
16
17
18
# File 'lib/crawlscope/configuration.rb', line 15

def allowed_statuses
  value = resolve(@allowed_statuses)
  Array(value.nil? ? DEFAULT_ALLOWED_STATUSES : value).map(&:to_i)
end

#base_urlObject



20
21
22
# File 'lib/crawlscope/configuration.rb', line 20

def base_url
  resolve(@base_url)
end

#browser_factoryObject



24
25
26
# File 'lib/crawlscope/configuration.rb', line 24

def browser_factory
  resolve(@browser_factory)
end

#concurrencyObject



28
29
30
31
# File 'lib/crawlscope/configuration.rb', line 28

def concurrency
  value = resolve(@concurrency)
  positive_integer(value, default: DEFAULT_CONCURRENCY, name: "concurrency")
end

#network_idle_timeout_secondsObject



44
45
46
47
# File 'lib/crawlscope/configuration.rb', line 44

def network_idle_timeout_seconds
  value = resolve(@network_idle_timeout_seconds)
  positive_integer(value, default: DEFAULT_BROWSER_NETWORK_IDLE_TIMEOUT_SECONDS, name: "network_idle_timeout_seconds")
end

#outputObject



49
50
51
52
# File 'lib/crawlscope/configuration.rb', line 49

def output
  value = resolve(@output)
  value.nil? ? $stdout : value
end

#rendererObject

Raises:



54
55
56
57
58
59
60
61
62
63
# File 'lib/crawlscope/configuration.rb', line 54

def renderer
  value = resolve(@renderer)
  normalized_value = value.to_s.strip
  normalized_value = "http" if normalized_value.empty?

  renderer = normalized_value.to_sym
  return renderer if RENDERERS.include?(renderer)

  raise ConfigurationError, "Crawlscope renderer must be http or browser"
end

#rule_registryObject



65
66
67
68
69
70
# File 'lib/crawlscope/configuration.rb', line 65

def rule_registry
  value = resolve(@rule_registry)
  return value unless value.nil?

  RuleRegistry.default(site_name: site_name)
end

#schema_registryObject



96
97
98
99
100
101
# File 'lib/crawlscope/configuration.rb', line 96

def schema_registry
  value = resolve(@schema_registry)
  return value unless value.nil?

  SchemaRegistry.default
end

#scroll_page=(value) ⇒ Object (writeonly)

Sets the attribute scroll_page

Parameters:

  • value

    the value to set the attribute scroll_page to.



13
14
15
# File 'lib/crawlscope/configuration.rb', line 13

def scroll_page=(value)
  @scroll_page = value
end

#site_nameObject



103
104
105
# File 'lib/crawlscope/configuration.rb', line 103

def site_name
  resolve(@site_name)
end

#sitemap_pathObject



112
113
114
# File 'lib/crawlscope/configuration.rb', line 112

def sitemap_path
  resolve(@sitemap_path)
end

#timeout_secondsObject



116
117
118
119
# File 'lib/crawlscope/configuration.rb', line 116

def timeout_seconds
  value = resolve(@timeout_seconds)
  positive_integer(value, default: DEFAULT_TIMEOUT_SECONDS, name: "timeout_seconds")
end

Instance Method Details

#audit(base_url: self.base_url, sitemap_path: self.sitemap_path, rule_names: nil) ⇒ Object



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/crawlscope/configuration.rb', line 72

def audit(base_url: self.base_url, sitemap_path: self.sitemap_path, rule_names: nil)
  if base_url.to_s.strip.empty?
    raise ConfigurationError, "Crawlscope base_url is not configured"
  end

  if sitemap_path.to_s.strip.empty?
    raise ConfigurationError, "Crawlscope sitemap_path is not configured"
  end

  Crawl.new(
    base_url: base_url,
    sitemap_path: sitemap_path,
    browser_factory: browser_factory,
    concurrency: concurrency,
    network_idle_timeout_seconds: network_idle_timeout_seconds,
    renderer: renderer,
    timeout_seconds: timeout_seconds,
    allowed_statuses: allowed_statuses,
    rules: rule_registry.rules_for(rule_names),
    schema_registry: schema_registry,
    scroll_page: scroll_page?
  )
end

#browser_concurrencyObject



33
34
35
36
37
38
39
40
41
42
# File 'lib/crawlscope/configuration.rb', line 33

def browser_concurrency
  value = concurrency
  default_value = DEFAULT_BROWSER_CONCURRENCY

  if value > default_value
    default_value
  else
    value
  end
end

#scroll_page?Boolean

Returns:

  • (Boolean)


107
108
109
110
# File 'lib/crawlscope/configuration.rb', line 107

def scroll_page?
  value = resolve(@scroll_page)
  value.nil? ? DEFAULT_BROWSER_SCROLL_PAGE : value
end