Class: Digger::Index

Inherits:
Struct
  • Object
show all
Defined in:
lib/digger/index.rb

Defined Under Namespace

Classes: NoBlockError

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#argsObject

Returns the value of attribute args

Returns:

  • (Object)

    the current value of args



2
3
4
# File 'lib/digger/index.rb', line 2

def args
  @args
end

#patternObject

Returns the value of attribute pattern

Returns:

  • (Object)

    the current value of pattern



2
3
4
# File 'lib/digger/index.rb', line 2

def pattern
  @pattern
end

Class Method Details

.batch(entities, cocurrence = 1, &block) ⇒ Object

Raises:



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/digger/index.rb', line 49

def self.batch(entities, cocurrence = 1, &block)
  raise NoBlockError, 'No block given' unless block

  if cocurrence > 1
    results = Array.new(entities.size)
    entities.each_slice(cocurrence).with_index do |group, idx1|
      threads = []
      group.each_with_index do |entity, idx2|
        index = idx1 * cocurrence + idx2
        threads << Thread.new(entity) do |ent|
          results[index] = block.call(ent)
        end
      end
      threads.each(&:join)
    end
    results
  else
    entities.map { |ent| block.call(ent) }
  end
end

.slow_down(entities, conf = {}, &block) ⇒ Object

Raises:



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/digger/index.rb', line 20

def self.slow_down(entities, conf = {}, &block)
  raise NoBlockError, 'No block given' unless block

  config = {
    sleep_range_seconds: 4...10, # 随机等待时间范围
    fail_max_cnt: 10, # 最多失败次数
    fail_unit_seconds: 10 * 60, # 失败等待时间
    when_fail: ->(ent, e, failed_cnt) {}
  }.merge(conf)
  failed_cnt = 0
  cursor = 0
  result = []
  while cursor < entities.length
    begin
      result << block.call(entities[cursor])
    rescue StandardError => e
      failed_cnt += 1
      config[:when_fail].call(entities[cursor], e, failed_cnt)
      break if failed_cnt >= config[:fail_max_cnt]

      sleep(failed_cnt * config[:fail_unit_seconds])
    else
      cursor += 1
      sleep(rand(config[:sleep_range_seconds]))
    end
  end
  result
end

Instance Method Details

#pattern_applied_url(arg) ⇒ Object



16
17
18
# File 'lib/digger/index.rb', line 16

def pattern_applied_url(arg)
  pattern.gsub('*').each_with_index { |_, i| arg[i] }
end

#process(cocurrence = 1, &block) ⇒ Object



5
6
7
# File 'lib/digger/index.rb', line 5

def process(cocurrence = 1, &block)
  Index.batch(urls, cocurrence, &block)
end

#urlsObject



9
10
11
12
13
14
# File 'lib/digger/index.rb', line 9

def urls
  @urls ||= begin
    args = self.args.map { |a| a.respond_to?(:each) ? a.to_a : [a] }
    args.shift.product(*args).map { |arg| pattern_applied_url(arg) }
  end
end