Class: DataDrain::GlueRunner

Inherits:

Object

Object
DataDrain::GlueRunner

show all

Extended by:: Observability

Defined in:: lib/data_drain/glue_runner.rb

Overview

Orquestador para AWS Glue. Permite disparar y monitorear Jobs en AWS para delegar el movimiento masivo de datos (ej. tablas de 1TB).

Class Attribute Summary collapse

.client ⇒ Boolean

Dispara un Job de Glue y espera a que termine exitosamente.

Class Method Summary collapse

.create_job(job_name, role_arn:, script_location: nil, script_path: nil, script_bucket: nil, script_folder: "scripts", script_filename: nil, command_name: "glueetl", default_arguments: {}, description: nil, worker_type: nil, number_of_workers: nil, timeout: 2880, max_retries: 0, allocated_capacity: nil, glue_version: nil) ⇒ Object
.delete_job(job_name) ⇒ Object
.ensure_job(job_name, role_arn:, script_location: nil, script_path: nil, script_bucket: nil, script_folder: "scripts", script_filename: nil, command_name: "glueetl", default_arguments: {}, description: nil, worker_type: nil, number_of_workers: nil, timeout: 2880, max_retries: 0, allocated_capacity: nil, glue_version: nil) ⇒ Object
.get_job(job_name) ⇒ Object
.job_exists?(job_name) ⇒ Boolean
.run_and_wait(job_name, arguments = {}, polling_interval: 30, max_wait_seconds: nil) ⇒ Object
.update_job(job_name, role_arn: nil, command_name: nil, script_location: nil, default_arguments: nil, description: nil, worker_type: nil, number_of_workers: nil, timeout: nil, max_retries: nil, allocated_capacity: nil, glue_version: nil) ⇒ Object
.upload_script(local_path:, bucket:, folder: "scripts", filename: nil) ⇒ Object

Class Attribute Details

.client ⇒ `Boolean`

Dispara un Job de Glue y espera a que termine exitosamente.

Parameters:

job_name (String) —

Nombre del Job en la consola de AWS.
arguments (Hash) —

Argumentos de ejecución (deben empezar con –).
polling_interval (Integer) —

Segundos de espera entre cada chequeo de estado.
max_wait_seconds (Integer, nil) —

Timeout máximo en segundos. nil = sin límite (comportamiento anterior).

Returns:

(Boolean) —

true si el Job terminó exitosamente (SUCCEEDED).

Raises:

(DataDrain::Error) —

si max_wait_seconds excede antes de SUCCEEDED.
(RuntimeError) —

si el Job falla o se detiene.



22
23
24

# File 'lib/data_drain/glue_runner.rb', line 22

def self.client
  @client ||= Aws::Glue::Client.new(region: DataDrain.configuration.aws_region)
end

Class Method Details

.create_job(job_name, role_arn:, script_location: nil, script_path: nil, script_bucket: nil, script_folder: "scripts", script_filename: nil, command_name: "glueetl", default_arguments: {}, description: nil, worker_type: nil, number_of_workers: nil, timeout: 2880, max_retries: 0, allocated_capacity: nil, glue_version: nil) ⇒ `Object`

# File 'lib/data_drain/glue_runner.rb', line 43

def self.create_job(job_name, role_arn:, script_location: nil, script_path: nil,
                    script_bucket: nil, script_folder: "scripts", script_filename: nil,
                    command_name: "glueetl", default_arguments: {}, description: nil,
                    worker_type: nil, number_of_workers: nil, timeout: 2880,
                    max_retries: 0, allocated_capacity: nil, glue_version: nil)
  @logger = DataDrain.configuration.logger
  DataDrain::Validations.validate_glue_name!(:job_name, job_name)

  final_script_location = resolve_script_location(
    script_location: script_location,
    script_path: script_path,
    script_bucket: script_bucket,
    script_folder: script_folder,
    script_filename: script_filename
  )

  opts = {
    name: job_name,
    role: role_arn,
    command: {
      name: command_name,
      python_version: "3",
      script_location: final_script_location
    }
  }
  opts[:default_arguments] = default_arguments unless default_arguments.empty?
  opts[:description] = description if description
  opts[:timeout] = timeout if timeout
  opts[:max_retries] = max_retries if max_retries
  opts[:allocated_capacity] = allocated_capacity if allocated_capacity
  opts[:worker_type] = worker_type if worker_type
  opts[:number_of_workers] = number_of_workers if number_of_workers
  opts[:glue_version] = glue_version if glue_version

  client.create_job(**opts)
  safe_log(:info, "glue_runner.job_create", {
             job: job_name,
             glue_version: glue_version,
             worker_type: worker_type,
             number_of_workers: number_of_workers
           })
  get_job(job_name)
rescue Aws::Glue::Errors::ServiceError => e
  safe_log(:error, "glue_runner.job_create_error",
           { job: job_name }.merge(exception_metadata(e)))
  raise
end

.delete_job(job_name) ⇒ `Object`

# File 'lib/data_drain/glue_runner.rb', line 124

def self.delete_job(job_name)
  @logger = DataDrain.configuration.logger
  DataDrain::Validations.validate_glue_name!(:job_name, job_name)
  client.delete_job(job_name: job_name)
  safe_log(:info, "glue_runner.job_delete", { job: job_name })
  true
rescue Aws::Glue::Errors::EntityNotFoundException
  safe_log(:info, "glue_runner.job_delete_skipped", { job: job_name, reason: "not_found" })
  false
rescue Aws::Glue::Errors::ServiceError => e
  safe_log(:error, "glue_runner.job_delete_error",
           { job: job_name }.merge(exception_metadata(e)))
  raise
end

.ensure_job(job_name, role_arn:, script_location: nil, script_path: nil, script_bucket: nil, script_folder: "scripts", script_filename: nil, command_name: "glueetl", default_arguments: {}, description: nil, worker_type: nil, number_of_workers: nil, timeout: 2880, max_retries: 0, allocated_capacity: nil, glue_version: nil) ⇒ `Object`

# File 'lib/data_drain/glue_runner.rb', line 139

def self.ensure_job(job_name, role_arn:, script_location: nil, script_path: nil,
                    script_bucket: nil, script_folder: "scripts", script_filename: nil,
                    command_name: "glueetl", default_arguments: {}, description: nil,
                    worker_type: nil, number_of_workers: nil, timeout: 2880,
                    max_retries: 0, allocated_capacity: nil, glue_version: nil)
  @logger = DataDrain.configuration.logger

  final_script_location = resolve_script_location(
    script_location: script_location,
    script_path: script_path,
    script_bucket: script_bucket,
    script_folder: script_folder,
    script_filename: script_filename
  )

  if job_exists?(job_name)
    current = get_job(job_name)
    desired = {
      role: role_arn,
      command_name: command_name,
      script_location: final_script_location,
      default_arguments: default_arguments,
      description: description,
      worker_type: worker_type,
      number_of_workers: number_of_workers,
      timeout: timeout,
      max_retries: max_retries,
      glue_version: glue_version
    }
    changed = changed_fields(desired, current)
    if changed.empty?
      safe_log(:info, "glue_runner.job_unchanged", { job: job_name })
      current
    else
      safe_log(:info, "glue_runner.job_exists", { job: job_name })
      update_job(job_name, role_arn: role_arn, command_name: command_name,
                           script_location: final_script_location, default_arguments: default_arguments,
                           description: description, worker_type: worker_type,
                           number_of_workers: number_of_workers, timeout: timeout,
                           max_retries: max_retries, allocated_capacity: allocated_capacity,
                           glue_version: glue_version)
    end
  else
    safe_log(:info, "glue_runner.job_created", { job: job_name })
    create_job(job_name, role_arn: role_arn, script_location: final_script_location,
                         command_name: command_name, default_arguments: default_arguments,
                         description: description, worker_type: worker_type,
                         number_of_workers: number_of_workers, timeout: timeout,
                         max_retries: max_retries, allocated_capacity: allocated_capacity,
                         glue_version: glue_version)
  end
end

.get_job(job_name) ⇒ `Object`

# File 'lib/data_drain/glue_runner.rb', line 38

def self.get_job(job_name)
  DataDrain::Validations.validate_glue_name!(:job_name, job_name)
  client.get_job(job_name: job_name).job
end

.job_exists?(job_name) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/data_drain/glue_runner.rb', line 30

def self.job_exists?(job_name)
  DataDrain::Validations.validate_glue_name!(:job_name, job_name)
  get_job(job_name)
  true
rescue Aws::Glue::Errors::EntityNotFoundException
  false
end

.run_and_wait(job_name, arguments = {}, polling_interval: 30, max_wait_seconds: nil) ⇒ `Object`

# File 'lib/data_drain/glue_runner.rb', line 258

def self.run_and_wait(job_name, arguments = {}, polling_interval: 30, max_wait_seconds: nil)
  config = DataDrain.configuration
  config.validate!
  start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)

  @logger = config.logger

  safe_log(:info, "glue_runner.start", { job: job_name })
  resp = client.start_job_run(job_name: job_name, arguments: arguments)
  run_id = resp.job_run_id

  loop do
    if max_wait_seconds &&
       (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) > max_wait_seconds
      safe_log(:error, "glue_runner.timeout", {
                 job: job_name,
                 run_id: run_id,
                 max_wait_seconds: max_wait_seconds
               })
      raise DataDrain::Error,
            "Glue Job #{job_name} (Run ID: #{run_id}) excedió max_wait_seconds=#{max_wait_seconds}"
    end

    run_info = client.get_job_run(job_name: job_name, run_id: run_id).job_run
    status = run_info.job_run_state

    case status
    when "SUCCEEDED"
      duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
      safe_log(:info, "glue_runner.complete", { job: job_name, run_id: run_id, duration_s: duration.round(2) })
      return true
    when "FAILED", "STOPPED", "TIMEOUT"
      duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time
      error_metadata = { job: job_name, run_id: run_id, status: status, duration_s: duration.round(2) }

      error_metadata[:error_message] = run_info.error_message.gsub("\"", "'")[0, 200] if run_info.error_message

      safe_log(:error, "glue_runner.failed", error_metadata)
      raise "Glue Job #{job_name} (Run ID: #{run_id}) falló con estado #{status}."
    else
      safe_log(:info, "glue_runner.polling",
               { job: job_name, run_id: run_id, status: status, next_check_in_s: polling_interval })
      sleep polling_interval
    end
  end
end

.update_job(job_name, role_arn: nil, command_name: nil, script_location: nil, default_arguments: nil, description: nil, worker_type: nil, number_of_workers: nil, timeout: nil, max_retries: nil, allocated_capacity: nil, glue_version: nil) ⇒ `Object`

# File 'lib/data_drain/glue_runner.rb', line 91

def self.update_job(job_name, role_arn: nil, command_name: nil, script_location: nil,
                    default_arguments: nil, description: nil, worker_type: nil,
                    number_of_workers: nil, timeout: nil, max_retries: nil, allocated_capacity: nil,
                    glue_version: nil)
  @logger = DataDrain.configuration.logger
  DataDrain::Validations.validate_glue_name!(:job_name, job_name)
  job_update = {}
  job_update[:role] = role_arn if role_arn
  if command_name && script_location
    job_update[:command] =
      { name: command_name, python_version: "3", script_location: script_location }
  end
  job_update[:default_arguments] = default_arguments if default_arguments
  job_update[:description] = description if description
  job_update[:timeout] = timeout if timeout
  job_update[:max_retries] = max_retries if max_retries
  job_update[:allocated_capacity] = allocated_capacity if allocated_capacity
  job_update[:worker_type] = worker_type if worker_type
  job_update[:number_of_workers] = number_of_workers if number_of_workers
  job_update[:glue_version] = glue_version if glue_version

  client.update_job(job_name: job_name, job_update: job_update)
  safe_log(:info, "glue_runner.job_update", {
             job: job_name,
             changed_fields: job_update.keys.map(&:to_s)
           })
  get_job(job_name)
rescue Aws::Glue::Errors::ServiceError => e
  safe_log(:error, "glue_runner.job_update_error",
           { job: job_name }.merge(exception_metadata(e)))
  raise
end

.upload_script(local_path:, bucket:, folder: "scripts", filename: nil) ⇒ `Object`

# File 'lib/data_drain/glue_runner.rb', line 225

def self.upload_script(local_path:, bucket:, folder: "scripts", filename: nil)
  @logger = DataDrain.configuration.logger

  unless File.exist?(local_path)
    raise DataDrain::ConfigurationError,
          "Script local '#{local_path}' no existe"
  end

  actual_filename = filename || File.basename(local_path)
  s3_key = "#{folder.chomp("/")}/#{actual_filename}"
  bytes = File.size(local_path)

  adapter = DataDrain::Storage.adapter
  unless adapter.is_a?(DataDrain::Storage::S3)
    raise DataDrain::ConfigurationError,
          "upload_script requiere storage_mode = :s3, actual: #{DataDrain.configuration.storage_mode}"
  end

  s3_path = adapter.upload_file(local_path, bucket, s3_key, content_type: "text/x-python")

  safe_log(:info, "glue_runner.script_uploaded", {
             local_path: local_path,
             s3_path: s3_path,
             bytes: bytes
           })

  s3_path
rescue Aws::S3::Errors::ServiceError => e
  safe_log(:error, "glue_runner.script_upload_error",
           { local_path: local_path, bucket: bucket }.merge(exception_metadata(e)))
  raise
end

Class: DataDrain::GlueRunner

Overview

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.client ⇒ Boolean

Class Method Details

.delete_job(job_name) ⇒ Object

.get_job(job_name) ⇒ Object

.job_exists?(job_name) ⇒ Boolean

.run_and_wait(job_name, arguments = {}, polling_interval: 30, max_wait_seconds: nil) ⇒ Object

.update_job(job_name, role_arn: nil, command_name: nil, script_location: nil, default_arguments: nil, description: nil, worker_type: nil, number_of_workers: nil, timeout: nil, max_retries: nil, allocated_capacity: nil, glue_version: nil) ⇒ Object

.upload_script(local_path:, bucket:, folder: "scripts", filename: nil) ⇒ Object