Class: Crawlscope::Rules::ContentQuality

Inherits:
Object
  • Object
show all
Defined in:
lib/crawlscope/rules/content_quality.rb

Constant Summary collapse

MIN_VISIBLE_TEXT_RATIO =
0.08
MIN_VISIBLE_WORDS =
250
MIN_UNIQUE_TOKEN_RATIO =
0.25

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(min_visible_text_ratio: MIN_VISIBLE_TEXT_RATIO, min_visible_words: MIN_VISIBLE_WORDS, min_unique_token_ratio: MIN_UNIQUE_TOKEN_RATIO) ⇒ ContentQuality

Returns a new instance of ContentQuality.



12
13
14
15
16
17
18
19
20
21
# File 'lib/crawlscope/rules/content_quality.rb', line 12

def initialize(
  min_visible_text_ratio: MIN_VISIBLE_TEXT_RATIO,
  min_visible_words: MIN_VISIBLE_WORDS,
  min_unique_token_ratio: MIN_UNIQUE_TOKEN_RATIO
)
  @code = :content_quality
  @min_visible_text_ratio = min_visible_text_ratio
  @min_visible_words = min_visible_words
  @min_unique_token_ratio = min_unique_token_ratio
end

Instance Attribute Details

#codeObject (readonly)

Returns the value of attribute code.



10
11
12
# File 'lib/crawlscope/rules/content_quality.rb', line 10

def code
  @code
end

Instance Method Details

#call(urls:, pages:, issues:, context: nil) ⇒ Object



23
24
25
26
27
28
29
30
31
# File 'lib/crawlscope/rules/content_quality.rb', line 23

def call(urls:, pages:, issues:, context: nil)
  pages.each do |page|
    next unless page.html?

    validate_visible_words(page, issues)
    validate_visible_text_ratio(page, issues)
    validate_unique_token_ratio(page, issues)
  end
end