Class: Crawlscope::Rules::Indexability

Inherits:
Object
  • Object
show all
Defined in:
lib/crawlscope/rules/indexability.rb

Constant Summary collapse

ROBOTS_META_SELECTOR =
'meta[name="robots"], meta[name="googlebot"]'
X_ROBOTS_TAG_HEADER =
"x-robots-tag"

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeIndexability

Returns a new instance of Indexability.



36
37
38
# File 'lib/crawlscope/rules/indexability.rb', line 36

def initialize
  @code = :indexability
end

Instance Attribute Details

#codeObject (readonly)

Returns the value of attribute code.



34
35
36
# File 'lib/crawlscope/rules/indexability.rb', line 34

def code
  @code
end

Class Method Details

.directives(value) ⇒ Object



23
24
25
26
27
28
# File 'lib/crawlscope/rules/indexability.rb', line 23

def self.directives(value)
  value
    .split(",")
    .map { |directive| directive.split(":", 2).last.to_s.strip }
    .reject(&:empty?)
end

.header_value(headers, name) ⇒ Object



19
20
21
# File 'lib/crawlscope/rules/indexability.rb', line 19

def self.header_value(headers, name)
  headers.find { |key, _value| key.to_s.casecmp?(name) }&.last.to_s
end

.noindex?(value) ⇒ Boolean

Returns:

  • (Boolean)


30
31
32
# File 'lib/crawlscope/rules/indexability.rb', line 30

def self.noindex?(value)
  directives(value).any? { |directive| directive.casecmp?("noindex") || directive.casecmp?("none") }
end

.noindex_header?(headers) ⇒ Boolean

Returns:

  • (Boolean)


9
10
11
# File 'lib/crawlscope/rules/indexability.rb', line 9

def self.noindex_header?(headers)
  noindex?(header_value(headers, X_ROBOTS_TAG_HEADER))
end

.noindex_meta?(doc) ⇒ Boolean

Returns:

  • (Boolean)


13
14
15
16
17
# File 'lib/crawlscope/rules/indexability.rb', line 13

def self.noindex_meta?(doc)
  return false unless doc

  doc.css(ROBOTS_META_SELECTOR).any? { |tag| noindex?(tag["content"].to_s) }
end

Instance Method Details

#call(urls:, pages:, issues:, context: nil) ⇒ Object



40
41
42
43
44
45
46
47
# File 'lib/crawlscope/rules/indexability.rb', line 40

def call(urls:, pages:, issues:, context: nil)
  sitemap_urls = normalized_sitemap_urls(urls)

  pages.each do |page|
    validate_meta_robots(page, issues, sitemap_urls) if page.html?
    validate_x_robots_tag(page, issues, sitemap_urls)
  end
end