Class: Tapsoob::Pull
Instance Attribute Summary
Attributes inherited from Operation
#database_url, #dump_path, #opts
Class Method Summary
collapse
Instance Method Summary
collapse
Methods inherited from Operation
#add_completed_table, #apply_table_filter, #catch_errors, #completed_tables, #completed_tables_mutex, #data?, #db, #default_chunksize, #exclude_tables, #exiting?, #format_number, #indexes_first?, #initialize, #log, #parallel?, #parallel_workers, #resuming?, #schema?, #setup_signal_trap, #store_session, #stream_state, #stream_state=, #table_filter
Class Method Details
.factory(db, state) ⇒ Object
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
|
# File 'lib/tapsoob/operation.rb', line 373
def self.factory(db, state)
if defined?(Sequel::MySQL) && Sequel::MySQL.respond_to?(:convert_invalid_date_time=)
Sequel::MySQL.convert_invalid_date_time = :nil
end
if state.has_key?(:klass)
return eval(state[:klass]).new(db, state)
end
if Tapsoob::Utils.single_integer_primary_key(db, state[:table_name].to_sym)
DataStreamKeyed.new(db, state)
else
DataStream.new(db, state)
end
end
|
Instance Method Details
#fetch_tables_info ⇒ Object
363
364
365
366
367
368
369
370
371
|
# File 'lib/tapsoob/operation.rb', line 363
def fetch_tables_info
tables = db.send(:sort_dumped_tables, db.tables, {})
data = {}
apply_table_filter(tables).each do |table_name|
data[table_name] = db[table_name].count
end
data
end
|
#file_prefix ⇒ Object
178
179
180
|
# File 'lib/tapsoob/operation.rb', line 178
def file_prefix
"pull"
end
|
#pull_data ⇒ Object
215
216
217
218
219
220
221
222
223
224
225
|
# File 'lib/tapsoob/operation.rb', line 215
def pull_data
log.info "Receiving data"
log.info "#{tables.size} tables, #{format_number(record_count)} records"
if parallel?
pull_data_parallel
else
pull_data_serial
end
end
|
#pull_data_from_table(stream, progress) ⇒ Object
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
|
# File 'lib/tapsoob/operation.rb', line 285
def pull_data_from_table(stream, progress)
loop do
if exiting?
store_session
exit 0
end
row_size = 0
chunksize = stream.state[:chunksize]
begin
chunksize = Tapsoob::Utils.calculate_chunksize(chunksize) do |c|
stream.state[:chunksize] = c.to_i
encoded_data, row_size, elapsed_time = nil
d1 = c.time_delta do
encoded_data, row_size, elapsed_time = stream.fetch
end
data = nil
d2 = c.time_delta do
data = {
:state => stream.to_hash,
:checksum => Tapsoob::Utils.checksum(encoded_data).to_s,
:encoded_data => encoded_data
}
end
stream.fetch_data_from_database(data) do |rows|
next if rows == {}
progress.inc(1) if progress
if dump_path.nil?
puts JSON.generate(rows)
else
Tapsoob::Utils.export_rows(dump_path, stream.table_name, rows)
end
end
log.debug "row size: #{row_size}"
stream.error = false
self.stream_state = stream.to_hash
c.idle_secs = (d1 + d2)
elapsed_time
end
rescue Tapsoob::CorruptedData => e
log.info "Corrupted Data Received #{e.message}, retrying..."
stream.error = true
next
end
break if stream.complete?
end
progress.finish if progress
add_completed_table(stream.table_name)
self.stream_state = {}
end
|
#pull_data_parallel ⇒ Object
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
|
# File 'lib/tapsoob/operation.rb', line 239
def pull_data_parallel
log.info "Using #{parallel_workers} parallel workers"
multi_progress = opts[:progress] ? MultiProgressBar.new(parallel_workers) : nil
table_queue = Queue.new
tables.each { |table_name, count| table_queue << [table_name, count] }
workers = (1..parallel_workers).map do
Thread.new do
loop do
break if table_queue.empty?
table_name, count = table_queue.pop(true) rescue break
stream = Tapsoob::DataStream.factory(db, {
:chunksize => default_chunksize,
:table_name => table_name
}, { :debug => opts[:debug] })
estimated_chunks = [(count.to_f / default_chunksize).ceil, 1].max
progress = multi_progress ? multi_progress.create_bar(table_name.to_s, estimated_chunks) : nil
pull_data_from_table(stream, progress)
end
end
end
workers.each(&:join)
multi_progress.stop if multi_progress
end
|
#pull_data_serial ⇒ Object
227
228
229
230
231
232
233
234
235
236
237
|
# File 'lib/tapsoob/operation.rb', line 227
def pull_data_serial
tables.each do |table_name, count|
stream = Tapsoob::DataStream.factory(db, {
:chunksize => default_chunksize,
:table_name => table_name
}, { :debug => opts[:debug] })
estimated_chunks = [(count.to_f / default_chunksize).ceil, 1].max
progress = (opts[:progress] ? ProgressBar.new(table_name.to_s, estimated_chunks) : nil)
pull_data_from_table(stream, progress)
end
end
|
#pull_indexes ⇒ Object
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
|
# File 'lib/tapsoob/operation.rb', line 389
def pull_indexes
log.info "Receiving indexes"
raw_idxs = Tapsoob::Schema.indexes_individual(database_url)
idxs = (raw_idxs && raw_idxs.length >= 2 ? JSON.parse(raw_idxs) : {})
filtered_idxs = apply_table_filter(idxs).select { |table, indexes| indexes.size > 0 }
max_title_width = filtered_idxs.keys.map { |table| "#{table} indexes".length }.max || 14
filtered_idxs.each do |table, indexes|
progress = ProgressBar.new("#{table} indexes", indexes.size, STDOUT, max_title_width)
indexes.each do |idx|
output = Tapsoob::Utils.export_indexes(dump_path, table, idx)
puts output if dump_path.nil? && output
progress.inc(1)
end
progress.finish
end
end
|
#pull_partial_data ⇒ Object
271
272
273
274
275
276
277
278
279
280
281
282
283
|
# File 'lib/tapsoob/operation.rb', line 271
def pull_partial_data
return if stream_state == {}
table_name = stream_state[:table_name]
record_count = tables[table_name.to_s]
log.info "Resuming #{table_name}, #{format_number(record_count)} records"
stream = Tapsoob::DataStream.factory(db, stream_state)
chunksize = stream_state[:chunksize] || default_chunksize
estimated_chunks = [(record_count.to_f / chunksize).ceil, 1].max
progress = (opts[:progress] ? ProgressBar.new(table_name.to_s, estimated_chunks) : nil)
pull_data_from_table(stream, progress)
end
|
#pull_reset_sequences ⇒ Object
410
411
412
413
414
415
|
# File 'lib/tapsoob/operation.rb', line 410
def pull_reset_sequences
log.info "Resetting sequences"
output = Tapsoob::Utils.schema_bin(:reset_db_sequences, database_url)
puts output if dump_path.nil? && output
end
|
#pull_schema ⇒ Object
200
201
202
203
204
205
206
207
208
209
210
211
212
213
|
# File 'lib/tapsoob/operation.rb', line 200
def pull_schema
log.info "Receiving schema"
progress = ProgressBar.new('Schema', tables.size)
tables.each do |table_name, count|
schema_data = Tapsoob::Schema.dump_table(db, table_name, @opts.slice(:indexes, :same_db))
log.debug "Table: #{table_name}\n#{schema_data}\n"
output = Tapsoob::Utils.export_schema(dump_path, table_name, schema_data)
puts output if dump_path.nil? && output
progress.inc(1)
end
progress.finish
end
|
#record_count ⇒ Object
355
356
357
|
# File 'lib/tapsoob/operation.rb', line 355
def record_count
tables_info.values.inject(:+)
end
|
#run ⇒ Object
186
187
188
189
190
191
192
193
194
195
196
197
198
|
# File 'lib/tapsoob/operation.rb', line 186
def run
catch_errors do
unless resuming?
pull_schema if schema?
pull_indexes if indexes_first? && schema?
end
setup_signal_trap
pull_partial_data if data? && resuming?
pull_data if data?
pull_indexes if !indexes_first? && schema?
pull_reset_sequences
end
end
|
#tables ⇒ Object
346
347
348
349
350
351
352
353
|
# File 'lib/tapsoob/operation.rb', line 346
def tables
h = {}
tables_info.each do |table_name, count|
next if completed_tables.include?(table_name.to_s)
h[table_name.to_s] = count
end
h
end
|
#tables_info ⇒ Object
359
360
361
|
# File 'lib/tapsoob/operation.rb', line 359
def tables_info
opts[:tables_info] ||= fetch_tables_info
end
|
#to_hash ⇒ Object
182
183
184
|
# File 'lib/tapsoob/operation.rb', line 182
def to_hash
super.merge(:remote_tables_info => remote_tables_info)
end
|