Module: Readability::Readerable

Defined in:
lib/readability/readerable.rb

Class Method Summary collapse

Class Method Details

.node_visible?(node) ⇒ Boolean

NOTE: This matches JS isNodeVisible exactly — does NOT check visibility:hidden

Returns:

  • (Boolean)


49
50
51
52
53
54
55
56
57
58
59
# File 'lib/readability/readerable.rb', line 49

def node_visible?(node)
  style = node['style']
  return false if style && style =~ /display:\s*none/i
  return false if !node['hidden'].nil?
  aria_hidden = node['aria-hidden']
  if aria_hidden == "true"
    class_name = node['class'] || ""
    return false unless class_name.include?("fallback-image")
  end
  true
end

.probably_readerable?(doc, options_or_checker = {}, **kwargs) ⇒ Boolean

For backward compat: accept a proc as second positional argument (matches JS API)

Returns:

  • (Boolean)


10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/readability/readerable.rb', line 10

def probably_readerable?(doc, options_or_checker = {}, **kwargs)
  if options_or_checker.is_a?(Proc)
    kwargs[:visibility_checker] = options_or_checker
    options_or_checker = {}
  end
  options = options_or_checker.is_a?(Hash) ? options_or_checker.merge(kwargs) : kwargs

  min_score = options.fetch(:min_score, 20)
  min_content_length = options.fetch(:min_content_length, 140)
  visibility_checker = options.fetch(:visibility_checker, nil)
  visibility_checker ||= method(:node_visible?)

  nodes = doc.css("p, pre, article")

  # Also include div parents of br nodes (some articles use div > br structure)
  br_nodes = doc.css("div > br")
  if br_nodes.any?
    node_set = Set.new(nodes.to_a)
    br_nodes.each { |br| node_set.add(br.parent) }
    nodes = node_set.to_a
  end

  score = 0.0
  nodes.any? do |node|
    next false unless visibility_checker.call(node)

    match_string = "#{node['class']} #{node['id']}"
    next false if UNLIKELY_CANDIDATES.match?(match_string) && !OK_MAYBE_CANDIDATE.match?(match_string)
    next false if node.matches?("li p")

    text_length = node.text.strip.length
    next false if text_length < min_content_length

    score += Math.sqrt(text_length - min_content_length)
    score > min_score
  end
end