Class: Crawlscope::Configuration
- Inherits:
-
Object
- Object
- Crawlscope::Configuration
- Defined in:
- lib/crawlscope/configuration.rb
Constant Summary collapse
- DEFAULT_ALLOWED_STATUSES =
[200, 301, 302].freeze
- DEFAULT_BROWSER_CONCURRENCY =
4- DEFAULT_BROWSER_NETWORK_IDLE_TIMEOUT_SECONDS =
5- DEFAULT_BROWSER_SCROLL_PAGE =
true- DEFAULT_CONCURRENCY =
10- DEFAULT_FETCH_EXECUTOR =
:async- RENDERERS =
%i[http browser].freeze
- DEFAULT_TIMEOUT_SECONDS =
20
Instance Attribute Summary collapse
- #allowed_statuses ⇒ Object
- #base_url ⇒ Object
- #browser_factory ⇒ Object
- #concurrency ⇒ Object
- #fetch_executor ⇒ Object
- #network_idle_timeout_seconds ⇒ Object
- #output ⇒ Object
- #renderer ⇒ Object
- #rule_registry ⇒ Object
- #schema_registry ⇒ Object
-
#scroll_page ⇒ Object
writeonly
Sets the attribute scroll_page.
- #site_name ⇒ Object
- #sitemap_path ⇒ Object
- #timeout_seconds ⇒ Object
Instance Method Summary collapse
- #audit(base_url: self.base_url, sitemap_path: self.sitemap_path, rule_names: nil) ⇒ Object
- #browser_concurrency ⇒ Object
- #scroll_page? ⇒ Boolean
Instance Attribute Details
#allowed_statuses ⇒ Object
16 17 18 19 |
# File 'lib/crawlscope/configuration.rb', line 16 def allowed_statuses value = resolve(@allowed_statuses) Array(value.nil? ? DEFAULT_ALLOWED_STATUSES : value).map(&:to_i) end |
#base_url ⇒ Object
21 22 23 |
# File 'lib/crawlscope/configuration.rb', line 21 def base_url resolve(@base_url) end |
#browser_factory ⇒ Object
25 26 27 |
# File 'lib/crawlscope/configuration.rb', line 25 def browser_factory resolve(@browser_factory) end |
#concurrency ⇒ Object
29 30 31 32 |
# File 'lib/crawlscope/configuration.rb', line 29 def concurrency value = resolve(@concurrency) positive_integer(value, default: DEFAULT_CONCURRENCY, name: "concurrency") end |
#fetch_executor ⇒ Object
34 35 36 37 38 39 |
# File 'lib/crawlscope/configuration.rb', line 34 def fetch_executor value = resolve(@fetch_executor) default = (renderer == :browser) ? :threaded : DEFAULT_FETCH_EXECUTOR FetchExecutor.normalize(value.nil? ? default : value) end |
#network_idle_timeout_seconds ⇒ Object
52 53 54 55 |
# File 'lib/crawlscope/configuration.rb', line 52 def network_idle_timeout_seconds value = resolve(@network_idle_timeout_seconds) positive_integer(value, default: DEFAULT_BROWSER_NETWORK_IDLE_TIMEOUT_SECONDS, name: "network_idle_timeout_seconds") end |
#output ⇒ Object
57 58 59 60 |
# File 'lib/crawlscope/configuration.rb', line 57 def output value = resolve(@output) value.nil? ? $stdout : value end |
#renderer ⇒ Object
62 63 64 65 66 67 68 69 70 71 |
# File 'lib/crawlscope/configuration.rb', line 62 def renderer value = resolve(@renderer) normalized_value = value.to_s.strip normalized_value = "http" if normalized_value.empty? renderer = normalized_value.to_sym return renderer if RENDERERS.include?(renderer) raise ConfigurationError, "Crawlscope renderer must be http or browser" end |
#rule_registry ⇒ Object
73 74 75 76 77 78 |
# File 'lib/crawlscope/configuration.rb', line 73 def rule_registry value = resolve(@rule_registry) return value unless value.nil? RuleRegistry.default(site_name: site_name) end |
#schema_registry ⇒ Object
105 106 107 108 109 110 |
# File 'lib/crawlscope/configuration.rb', line 105 def schema_registry value = resolve(@schema_registry) return value unless value.nil? SchemaRegistry.default end |
#scroll_page=(value) ⇒ Object (writeonly)
Sets the attribute scroll_page
14 15 16 |
# File 'lib/crawlscope/configuration.rb', line 14 def scroll_page=(value) @scroll_page = value end |
#site_name ⇒ Object
112 113 114 |
# File 'lib/crawlscope/configuration.rb', line 112 def site_name resolve(@site_name) end |
#sitemap_path ⇒ Object
121 122 123 |
# File 'lib/crawlscope/configuration.rb', line 121 def sitemap_path resolve(@sitemap_path) end |
#timeout_seconds ⇒ Object
125 126 127 128 |
# File 'lib/crawlscope/configuration.rb', line 125 def timeout_seconds value = resolve(@timeout_seconds) positive_integer(value, default: DEFAULT_TIMEOUT_SECONDS, name: "timeout_seconds") end |
Instance Method Details
#audit(base_url: self.base_url, sitemap_path: self.sitemap_path, rule_names: nil) ⇒ Object
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
# File 'lib/crawlscope/configuration.rb', line 80 def audit(base_url: self.base_url, sitemap_path: self.sitemap_path, rule_names: nil) if base_url.to_s.strip.empty? raise ConfigurationError, "Crawlscope base_url is not configured" end if sitemap_path.to_s.strip.empty? raise ConfigurationError, "Crawlscope sitemap_path is not configured" end Crawl.new( base_url: base_url, sitemap_path: sitemap_path, browser_factory: browser_factory, concurrency: concurrency, fetch_executor: fetch_executor, network_idle_timeout_seconds: network_idle_timeout_seconds, renderer: renderer, timeout_seconds: timeout_seconds, allowed_statuses: allowed_statuses, rules: rule_registry.rules_for(rule_names), schema_registry: schema_registry, scroll_page: scroll_page? ) end |
#browser_concurrency ⇒ Object
41 42 43 44 45 46 47 48 49 50 |
# File 'lib/crawlscope/configuration.rb', line 41 def browser_concurrency value = concurrency default_value = DEFAULT_BROWSER_CONCURRENCY if value > default_value default_value else value end end |
#scroll_page? ⇒ Boolean
116 117 118 119 |
# File 'lib/crawlscope/configuration.rb', line 116 def scroll_page? value = resolve(@scroll_page) value.nil? ? DEFAULT_BROWSER_SCROLL_PAGE : value end |