Class: DataDrain::GlueRunner
- Inherits:
-
Object
- Object
- DataDrain::GlueRunner
- Extended by:
- Observability
- Defined in:
- lib/data_drain/glue_runner.rb
Overview
Orquestador para AWS Glue. Permite disparar y monitorear Jobs en AWS para delegar el movimiento masivo de datos (ej. tablas de 1TB).
Class Attribute Summary collapse
-
.client ⇒ Boolean
Dispara un Job de Glue y espera a que termine exitosamente.
Class Method Summary collapse
- .create_job(job_name, role_arn:, script_location: nil, script_path: nil, script_bucket: nil, script_folder: "scripts", script_filename: nil, command_name: "glueetl", default_arguments: {}, description: nil, worker_type: nil, number_of_workers: nil, timeout: 2880, max_retries: 0, allocated_capacity: nil, glue_version: nil) ⇒ Object
- .delete_job(job_name) ⇒ Object
- .ensure_job(job_name, role_arn:, script_location: nil, script_path: nil, script_bucket: nil, script_folder: "scripts", script_filename: nil, command_name: "glueetl", default_arguments: {}, description: nil, worker_type: nil, number_of_workers: nil, timeout: 2880, max_retries: 0, allocated_capacity: nil, glue_version: nil) ⇒ Object
- .get_job(job_name) ⇒ Object
- .job_exists?(job_name) ⇒ Boolean
- .run_and_wait(job_name, arguments = {}, polling_interval: 30, max_wait_seconds: nil) ⇒ Object
- .update_job(job_name, role_arn: nil, command_name: nil, script_location: nil, default_arguments: nil, description: nil, worker_type: nil, number_of_workers: nil, timeout: nil, max_retries: nil, allocated_capacity: nil, glue_version: nil) ⇒ Object
- .upload_script(local_path:, bucket:, folder: "scripts", filename: nil) ⇒ Object
Class Attribute Details
.client ⇒ Boolean
Dispara un Job de Glue y espera a que termine exitosamente.
22 23 24 |
# File 'lib/data_drain/glue_runner.rb', line 22 def self.client @client ||= Aws::Glue::Client.new(region: DataDrain.configuration.aws_region) end |
Class Method Details
.create_job(job_name, role_arn:, script_location: nil, script_path: nil, script_bucket: nil, script_folder: "scripts", script_filename: nil, command_name: "glueetl", default_arguments: {}, description: nil, worker_type: nil, number_of_workers: nil, timeout: 2880, max_retries: 0, allocated_capacity: nil, glue_version: nil) ⇒ Object
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/data_drain/glue_runner.rb', line 43 def self.create_job(job_name, role_arn:, script_location: nil, script_path: nil, script_bucket: nil, script_folder: "scripts", script_filename: nil, command_name: "glueetl", default_arguments: {}, description: nil, worker_type: nil, number_of_workers: nil, timeout: 2880, max_retries: 0, allocated_capacity: nil, glue_version: nil) @logger = DataDrain.configuration.logger DataDrain::Validations.validate_glue_name!(:job_name, job_name) final_script_location = resolve_script_location( script_location: script_location, script_path: script_path, script_bucket: script_bucket, script_folder: script_folder, script_filename: script_filename ) opts = { name: job_name, role: role_arn, command: { name: command_name, python_version: "3", script_location: final_script_location } } opts[:default_arguments] = default_arguments unless default_arguments.empty? opts[:description] = description if description opts[:timeout] = timeout if timeout opts[:max_retries] = max_retries if max_retries opts[:allocated_capacity] = allocated_capacity if allocated_capacity opts[:worker_type] = worker_type if worker_type opts[:number_of_workers] = number_of_workers if number_of_workers opts[:glue_version] = glue_version if glue_version client.create_job(**opts) safe_log(:info, "glue_runner.job_create", { job: job_name, glue_version: glue_version, worker_type: worker_type, number_of_workers: number_of_workers }) get_job(job_name) rescue Aws::Glue::Errors::ServiceError => e safe_log(:error, "glue_runner.job_create_error", { job: job_name }.merge((e))) raise end |
.delete_job(job_name) ⇒ Object
124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
# File 'lib/data_drain/glue_runner.rb', line 124 def self.delete_job(job_name) @logger = DataDrain.configuration.logger DataDrain::Validations.validate_glue_name!(:job_name, job_name) client.delete_job(job_name: job_name) safe_log(:info, "glue_runner.job_delete", { job: job_name }) true rescue Aws::Glue::Errors::EntityNotFoundException safe_log(:info, "glue_runner.job_delete_skipped", { job: job_name, reason: "not_found" }) false rescue Aws::Glue::Errors::ServiceError => e safe_log(:error, "glue_runner.job_delete_error", { job: job_name }.merge((e))) raise end |
.ensure_job(job_name, role_arn:, script_location: nil, script_path: nil, script_bucket: nil, script_folder: "scripts", script_filename: nil, command_name: "glueetl", default_arguments: {}, description: nil, worker_type: nil, number_of_workers: nil, timeout: 2880, max_retries: 0, allocated_capacity: nil, glue_version: nil) ⇒ Object
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
# File 'lib/data_drain/glue_runner.rb', line 139 def self.ensure_job(job_name, role_arn:, script_location: nil, script_path: nil, script_bucket: nil, script_folder: "scripts", script_filename: nil, command_name: "glueetl", default_arguments: {}, description: nil, worker_type: nil, number_of_workers: nil, timeout: 2880, max_retries: 0, allocated_capacity: nil, glue_version: nil) @logger = DataDrain.configuration.logger final_script_location = resolve_script_location( script_location: script_location, script_path: script_path, script_bucket: script_bucket, script_folder: script_folder, script_filename: script_filename ) if job_exists?(job_name) current = get_job(job_name) desired = { role: role_arn, command_name: command_name, script_location: final_script_location, default_arguments: default_arguments, description: description, worker_type: worker_type, number_of_workers: number_of_workers, timeout: timeout, max_retries: max_retries, glue_version: glue_version } changed = changed_fields(desired, current) if changed.empty? safe_log(:info, "glue_runner.job_unchanged", { job: job_name }) current else safe_log(:info, "glue_runner.job_exists", { job: job_name }) update_job(job_name, role_arn: role_arn, command_name: command_name, script_location: final_script_location, default_arguments: default_arguments, description: description, worker_type: worker_type, number_of_workers: number_of_workers, timeout: timeout, max_retries: max_retries, allocated_capacity: allocated_capacity, glue_version: glue_version) end else safe_log(:info, "glue_runner.job_created", { job: job_name }) create_job(job_name, role_arn: role_arn, script_location: final_script_location, command_name: command_name, default_arguments: default_arguments, description: description, worker_type: worker_type, number_of_workers: number_of_workers, timeout: timeout, max_retries: max_retries, allocated_capacity: allocated_capacity, glue_version: glue_version) end end |
.get_job(job_name) ⇒ Object
38 39 40 41 |
# File 'lib/data_drain/glue_runner.rb', line 38 def self.get_job(job_name) DataDrain::Validations.validate_glue_name!(:job_name, job_name) client.get_job(job_name: job_name).job end |
.job_exists?(job_name) ⇒ Boolean
30 31 32 33 34 35 36 |
# File 'lib/data_drain/glue_runner.rb', line 30 def self.job_exists?(job_name) DataDrain::Validations.validate_glue_name!(:job_name, job_name) get_job(job_name) true rescue Aws::Glue::Errors::EntityNotFoundException false end |
.run_and_wait(job_name, arguments = {}, polling_interval: 30, max_wait_seconds: nil) ⇒ Object
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 |
# File 'lib/data_drain/glue_runner.rb', line 258 def self.run_and_wait(job_name, arguments = {}, polling_interval: 30, max_wait_seconds: nil) config = DataDrain.configuration config.validate! start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) @logger = config.logger safe_log(:info, "glue_runner.start", { job: job_name }) resp = client.start_job_run(job_name: job_name, arguments: arguments) run_id = resp.job_run_id loop do if max_wait_seconds && (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) > max_wait_seconds safe_log(:error, "glue_runner.timeout", { job: job_name, run_id: run_id, max_wait_seconds: max_wait_seconds }) raise DataDrain::Error, "Glue Job #{job_name} (Run ID: #{run_id}) excedió max_wait_seconds=#{max_wait_seconds}" end run_info = client.get_job_run(job_name: job_name, run_id: run_id).job_run status = run_info.job_run_state case status when "SUCCEEDED" duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time safe_log(:info, "glue_runner.complete", { job: job_name, run_id: run_id, duration_s: duration.round(2) }) return true when "FAILED", "STOPPED", "TIMEOUT" duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time = { job: job_name, run_id: run_id, status: status, duration_s: duration.round(2) } [:error_message] = run_info..gsub("\"", "'")[0, 200] if run_info. safe_log(:error, "glue_runner.failed", ) raise "Glue Job #{job_name} (Run ID: #{run_id}) falló con estado #{status}." else safe_log(:info, "glue_runner.polling", { job: job_name, run_id: run_id, status: status, next_check_in_s: polling_interval }) sleep polling_interval end end end |
.update_job(job_name, role_arn: nil, command_name: nil, script_location: nil, default_arguments: nil, description: nil, worker_type: nil, number_of_workers: nil, timeout: nil, max_retries: nil, allocated_capacity: nil, glue_version: nil) ⇒ Object
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
# File 'lib/data_drain/glue_runner.rb', line 91 def self.update_job(job_name, role_arn: nil, command_name: nil, script_location: nil, default_arguments: nil, description: nil, worker_type: nil, number_of_workers: nil, timeout: nil, max_retries: nil, allocated_capacity: nil, glue_version: nil) @logger = DataDrain.configuration.logger DataDrain::Validations.validate_glue_name!(:job_name, job_name) job_update = {} job_update[:role] = role_arn if role_arn if command_name && script_location job_update[:command] = { name: command_name, python_version: "3", script_location: script_location } end job_update[:default_arguments] = default_arguments if default_arguments job_update[:description] = description if description job_update[:timeout] = timeout if timeout job_update[:max_retries] = max_retries if max_retries job_update[:allocated_capacity] = allocated_capacity if allocated_capacity job_update[:worker_type] = worker_type if worker_type job_update[:number_of_workers] = number_of_workers if number_of_workers job_update[:glue_version] = glue_version if glue_version client.update_job(job_name: job_name, job_update: job_update) safe_log(:info, "glue_runner.job_update", { job: job_name, changed_fields: job_update.keys.map(&:to_s) }) get_job(job_name) rescue Aws::Glue::Errors::ServiceError => e safe_log(:error, "glue_runner.job_update_error", { job: job_name }.merge((e))) raise end |
.upload_script(local_path:, bucket:, folder: "scripts", filename: nil) ⇒ Object
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 |
# File 'lib/data_drain/glue_runner.rb', line 225 def self.upload_script(local_path:, bucket:, folder: "scripts", filename: nil) @logger = DataDrain.configuration.logger unless File.exist?(local_path) raise DataDrain::ConfigurationError, "Script local '#{local_path}' no existe" end actual_filename = filename || File.basename(local_path) s3_key = "#{folder.chomp("/")}/#{actual_filename}" bytes = File.size(local_path) adapter = DataDrain::Storage.adapter unless adapter.is_a?(DataDrain::Storage::S3) raise DataDrain::ConfigurationError, "upload_script requiere storage_mode = :s3, actual: #{DataDrain.configuration.storage_mode}" end s3_path = adapter.upload_file(local_path, bucket, s3_key, content_type: "text/x-python") safe_log(:info, "glue_runner.script_uploaded", { local_path: local_path, s3_path: s3_path, bytes: bytes }) s3_path rescue Aws::S3::Errors::ServiceError => e safe_log(:error, "glue_runner.script_upload_error", { local_path: local_path, bucket: bucket }.merge((e))) raise end |