Class: Spidra::Resources::Batch

Inherits:
Object
  • Object
show all
Defined in:
lib/spidra/resources/batch.rb

Constant Summary collapse

TERMINAL_STATUSES =
%w[completed failed cancelled].freeze

Instance Method Summary collapse

Constructor Details

#initialize(http) ⇒ Batch

Returns a new instance of Batch.



6
7
8
# File 'lib/spidra/resources/batch.rb', line 6

def initialize(http)
  @http = http
end

Instance Method Details

#cancel(batch_id) ⇒ Object

Cancel a pending or active batch. Credits for unprocessed items are refunded.



34
35
36
# File 'lib/spidra/resources/batch.rb', line 34

def cancel(batch_id)
  @http.delete("/batch/scrape/#{batch_id}")
end

#get(batch_id) ⇒ Object

Get the current status of a batch job.



24
25
26
# File 'lib/spidra/resources/batch.rb', line 24

def get(batch_id)
  @http.get("/batch/scrape/#{batch_id}")
end

#list(page = 1, limit = 20) ⇒ Object

List past batch jobs, newest first.

Parameters:

  • page (Integer) (defaults to: 1)

    page number (default: 1)

  • limit (Integer) (defaults to: 20)

    results per page (default: 20)



14
15
16
# File 'lib/spidra/resources/batch.rb', line 14

def list(page = 1, limit = 20)
  @http.get("/batch/scrape", page: page, limit: limit)
end

#retry(batch_id) ⇒ Object

Retry failed items in a batch. Successful items are not re-run.



29
30
31
# File 'lib/spidra/resources/batch.rb', line 29

def retry(batch_id)
  @http.post("/batch/scrape/#{batch_id}/retry", {})
end

#run(params, poll_interval: 3, timeout: 120) ⇒ Object

Submit a batch and wait for it to complete.

Parameters:

  • params (Hash)

    batch parameters (urls, prompt, output, etc.)

  • poll_interval (Integer) (defaults to: 3)

    seconds between status checks (default: 3)

  • timeout (Integer) (defaults to: 120)

    seconds before giving up (default: 120)



43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/spidra/resources/batch.rb', line 43

def run(params, poll_interval: 3, timeout: 120)
  response = submit(params)
  batch_id = response["batchId"]
  deadline = Time.now + timeout

  loop do
    status = get(batch_id)
    return status.merge("batchId" => batch_id) if TERMINAL_STATUSES.include?(status["status"])
    return { "status" => "timeout", "batchId" => batch_id } if Time.now >= deadline

    sleep poll_interval
  end
end

#submit(params) ⇒ Object

Submit a batch of URLs. Returns immediately with a batchId.



19
20
21
# File 'lib/spidra/resources/batch.rb', line 19

def submit(params)
  @http.post("/batch/scrape", params)
end