Class: SourceMonitor::Scraping::BulkSourceScraper

Inherits:
Object
  • Object
show all
Defined in:
lib/source_monitor/scraping/bulk_source_scraper.rb

Overview

Orchestrates bulk scrape enqueues for a source based on a user-selected scope. Works alongside the single-item enqueuer to ensure we respect per-source limits and provide actionable feedback for the UI.

Defined Under Namespace

Classes: Result

Constant Summary collapse

SELECTIONS =
%i[current unscraped all].freeze
SELECTION_LABELS =
{
  current: "current view",
  unscraped: "unscraped items",
  all: "all items"
}.freeze
DEFAULT_PREVIEW_LIMIT =
10

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(source:, selection:, preview_limit: DEFAULT_PREVIEW_LIMIT, enqueuer: SourceMonitor::Scraping::Enqueuer, config: SourceMonitor.config.scraping) ⇒ BulkSourceScraper

Returns a new instance of BulkSourceScraper.



66
67
68
69
70
71
72
73
# File 'lib/source_monitor/scraping/bulk_source_scraper.rb', line 66

def initialize(source:, selection:, preview_limit: DEFAULT_PREVIEW_LIMIT, enqueuer: SourceMonitor::Scraping::Enqueuer, config: SourceMonitor.config.scraping)
  @source = source
  @selection = self.class.normalize_selection(selection) || :current
  normalized_limit = preview_limit.respond_to?(:to_i) ? preview_limit.to_i : DEFAULT_PREVIEW_LIMIT
  @preview_limit = normalized_limit.positive? ? normalized_limit : DEFAULT_PREVIEW_LIMIT
  @enqueuer = enqueuer
  @config = config
end

Class Method Details

.normalize_selection(selection) ⇒ Object



60
61
62
63
64
# File 'lib/source_monitor/scraping/bulk_source_scraper.rb', line 60

def self.normalize_selection(selection)
  value = selection.is_a?(String) ? selection.strip : selection
  value = value.to_s.downcase.to_sym if value
  value if SELECTIONS.include?(value)
end

.selection_counts(source:, preview_items:, preview_limit: 10) ⇒ Object



50
51
52
53
54
55
56
57
58
# File 'lib/source_monitor/scraping/bulk_source_scraper.rb', line 50

def self.selection_counts(source:, preview_items:, preview_limit: 10)
  preview_collection = Array(preview_items).compact
  base_scope = SourceMonitor::Item.active.where(source_id: source.id)
  {
    current: preview_collection.size.clamp(0, preview_limit.to_i.nonzero? || preview_collection.size),
    unscraped: base_scope.merge(unscraped_scope).count,
    all: base_scope.count
  }
end

.selection_label(selection) ⇒ Object



46
47
48
# File 'lib/source_monitor/scraping/bulk_source_scraper.rb', line 46

def self.selection_label(selection)
  SELECTION_LABELS[normalize_selection(selection)] || SELECTION_LABELS[:current]
end

.unscraped_scopeObject



156
157
158
159
160
161
162
163
# File 'lib/source_monitor/scraping/bulk_source_scraper.rb', line 156

def self.unscraped_scope
  item_table = SourceMonitor::Item.arel_table
  failed_statuses = %w[failed partial]
  SourceMonitor::Item.active.where(
    item_table[:scraped_at].eq(nil)
      .or(item_table[:scrape_status].in(failed_statuses))
  )
end

Instance Method Details

#callObject



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/source_monitor/scraping/bulk_source_scraper.rb', line 75

def call
  return disabled_result unless source.scraping_enabled?
  return invalid_selection_result unless SELECTIONS.include?(selection)

  items = scoped_items.to_a
  attempted_count = items.size

  return no_items_result if attempted_count.zero?

  failure_details = Hash.new(0)
  messages = []
  enqueued_count = 0
  already_enqueued_count = 0
  rate_limited = false

  items.each do |item|
    enqueue_result = enqueuer.enqueue(item: item, source:, reason: :manual)

    case enqueue_result.status
    when :enqueued
      enqueued_count += 1
    when :already_enqueued
      already_enqueued_count += 1
    when :rate_limited
      failure_details[:rate_limited] += 1
      messages << enqueue_result.message if enqueue_result.message.present?
      rate_limited = true
      break
    else
      key = enqueue_result.status || :unknown
      failure_details[key] += 1
      messages << enqueue_result.message if enqueue_result.message.present?
    end
  end

  failure_count = failure_details.values.sum
  status = determine_status(enqueued_count:, failure_count:, already_enqueued_count:)

  Result.new(
    status:,
    selection:,
    attempted_count: attempted_count,
    enqueued_count:,
    already_enqueued_count:,
    failure_count:,
    failure_details: failure_details.freeze,
    messages: messages.compact.uniq,
    rate_limited: rate_limited
  )
end