Module: Tapsoob::Utils
Instance Method Summary collapse
- #base64decode(data) ⇒ Object
- #base64encode(data) ⇒ Object
- #bin(cmd) ⇒ Object
- #calculate_chunksize(old_chunksize) ⇒ Object
- #checksum(data) ⇒ Object
- #encode_blobs(row, columns) ⇒ Object
- #export_indexes(dump_path, table, index_data) ⇒ Object
- #export_rows(dump_path, table, row_data) ⇒ Object
- #export_schema(dump_path, table, schema_data) ⇒ Object
- #format_data(db, data, opts = {}) ⇒ Object
-
#incorrect_blobs(db, table) ⇒ Object
mysql text and blobs fields are handled the same way internally this is not true for other databases so we must check if the field is actually text and manually convert it back to a string.
- #load_indexes(database_url, index) ⇒ Object
- #load_schema(dump_path, database_url, table) ⇒ Object
- #order_by(db, table) ⇒ Object
- #primary_key(db, table) ⇒ Object
- #schema_bin(command, *args) ⇒ Object
- #single_integer_primary_key(db, table) ⇒ Object
- #valid_data?(data, crc32) ⇒ Boolean
- #windows? ⇒ Boolean
Instance Method Details
#base64decode(data) ⇒ Object
36 37 38 |
# File 'lib/tapsoob/utils.rb', line 36 def base64decode(data) data.unpack("m").first end |
#base64encode(data) ⇒ Object
32 33 34 |
# File 'lib/tapsoob/utils.rb', line 32 def base64encode(data) [data].pack("m") end |
#bin(cmd) ⇒ Object
19 20 21 22 |
# File 'lib/tapsoob/utils.rb', line 19 def bin(cmd) cmd = "#{cmd}.cmd" if windows? cmd end |
#calculate_chunksize(old_chunksize) ⇒ Object
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
# File 'lib/tapsoob/utils.rb', line 119 def calculate_chunksize(old_chunksize) c = Tapsoob::Chunksize.new(old_chunksize) begin c.start_time = Time.now c.time_in_db = yield c rescue Errno::EPIPE c.retries += 1 raise if c.retries > 2 # we got disconnected, the chunksize could be too large # reset the chunksize based on the number of retries c.reset_chunksize retry end c.end_time = Time.now c.calc_new_chunksize end |
#checksum(data) ⇒ Object
24 25 26 |
# File 'lib/tapsoob/utils.rb', line 24 def checksum(data) Zlib.crc32(data) end |
#encode_blobs(row, columns) ⇒ Object
111 112 113 114 115 116 117 |
# File 'lib/tapsoob/utils.rb', line 111 def encode_blobs(row, columns) return row if columns.size == 0 columns.each do |c| row[c] = base64encode(row[c]) unless row[c].nil? end row end |
#export_indexes(dump_path, table, index_data) ⇒ Object
145 146 147 148 149 150 151 152 |
# File 'lib/tapsoob/utils.rb', line 145 def export_indexes(dump_path, table, index_data) # Use append-only writes to avoid O(n²) complexity index_file = File.join(dump_path, "indexes", "#{table}.json") File.open(index_file, 'a') do |file| file.write(JSON.generate(index_data) + "\n") end end |
#export_rows(dump_path, table, row_data) ⇒ Object
154 155 156 157 158 159 160 161 162 |
# File 'lib/tapsoob/utils.rb', line 154 def export_rows(dump_path, table, row_data) # Use append-only writes to avoid O(n²) complexity # Store metadata separately and append data chunks as NDJSON data_file = File.join(dump_path, "data", "#{table}.json") File.open(data_file, 'a') do |file| file.write(JSON.generate(row_data) + "\n") end end |
#export_schema(dump_path, table, schema_data) ⇒ Object
139 140 141 142 143 |
# File 'lib/tapsoob/utils.rb', line 139 def export_schema(dump_path, table, schema_data) File.open(File.join(dump_path, "schemas", "#{table}.rb"), 'w') do |file| file.write(schema_data) end end |
#format_data(db, data, opts = {}) ⇒ Object
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
# File 'lib/tapsoob/utils.rb', line 40 def format_data(db, data, opts = {}) return {} if data.size == 0 string_columns = opts[:string_columns] || [] schema = opts[:schema] || [] table = opts[:table] max_lengths = schema.inject({}) do |hash, (column, )| if [:db_type] =~ /^varchar\((\d+)\)/ hash.update(column => $1.to_i) end hash end header = data[0].keys only_data = data.collect do |row| row = encode_blobs(row, string_columns) row.each do |column, data| if data.to_s.length > (max_lengths[column] || data.to_s.length) raise Tapsoob::InvalidData.new(<<-ERROR) Detected data that exceeds the length limitation of its column. This is generally due to the fact that SQLite does not enforce length restrictions. Table : #{table} Column : #{column} Type : #{schema.detect{|s| s.first == column}.last[:db_type]} Data : #{data} ERROR end # Type conversion row[column] = data.strftime('%Y-%m-%d %H:%M:%S') if data.is_a?(Time) end header.collect { |h| row[h] } end res = { table_name: table, header: header, data: only_data } # Add types if schema isn't empty db.extension :schema_dumper # Add schema dumper extension in case it hasn't been added until now res[:types] = schema.map do |c| case db.column_schema_to_ruby_type(c.last)[:type].to_s when "BigDecimal" "float" when "Bignum" "integer" when "File" "blob" when "TrueClass" "boolean" else db.column_schema_to_ruby_type(c.last)[:type].to_s.downcase end end unless schema.empty? res end |
#incorrect_blobs(db, table) ⇒ Object
mysql text and blobs fields are handled the same way internally this is not true for other databases so we must check if the field is actually text and manually convert it back to a string
100 101 102 103 104 105 106 107 108 109 |
# File 'lib/tapsoob/utils.rb', line 100 def incorrect_blobs(db, table) return [] if (db.url =~ /(mysql|mysql2):\/\//).nil? columns = [] db.schema(table).each do |data| column, cdata = data columns << column if cdata[:type] == :blob end columns end |
#load_indexes(database_url, index) ⇒ Object
169 170 171 |
# File 'lib/tapsoob/utils.rb', line 169 def load_indexes(database_url, index) Tapsoob::Schema.load_indexes(database_url, index) end |
#load_schema(dump_path, database_url, table) ⇒ Object
164 165 166 167 |
# File 'lib/tapsoob/utils.rb', line 164 def load_schema(dump_path, database_url, table) schema = File.join(dump_path, "schemas", "#{table}.rb") schema_bin(:load, database_url, schema.to_s) end |
#order_by(db, table) ⇒ Object
190 191 192 193 194 195 196 197 198 |
# File 'lib/tapsoob/utils.rb', line 190 def order_by(db, table) pkey = primary_key(db, table) if pkey pkey.kind_of?(Array) ? pkey : [pkey.to_sym] else table = table.to_sym unless table.kind_of?(Sequel::SQL::Identifier) db[table].columns end end |
#primary_key(db, table) ⇒ Object
180 181 182 |
# File 'lib/tapsoob/utils.rb', line 180 def primary_key(db, table) db.schema(table).select { |c| c[1][:primary_key] }.map { |c| c[0] } end |
#schema_bin(command, *args) ⇒ Object
173 174 175 176 177 178 |
# File 'lib/tapsoob/utils.rb', line 173 def schema_bin(command, *args) require 'tapsoob/cli' subcommand = "schema" script = Tapsoob::CLI::Schema.new script.invoke(command, args.map { |a| "#{a}" }) end |
#single_integer_primary_key(db, table) ⇒ Object
184 185 186 187 188 |
# File 'lib/tapsoob/utils.rb', line 184 def single_integer_primary_key(db, table) table = table.to_sym unless table.kind_of?(Sequel::SQL::Identifier) keys = db.schema(table).select { |c| c[1][:primary_key] and c[1][:type] == :integer } not keys.nil? and keys.size == 1 end |
#valid_data?(data, crc32) ⇒ Boolean
28 29 30 |
# File 'lib/tapsoob/utils.rb', line 28 def valid_data?(data, crc32) Zlib.crc32(data) == crc32.to_i end |
#windows? ⇒ Boolean
13 14 15 16 17 |
# File 'lib/tapsoob/utils.rb', line 13 def windows? return @windows if defined?(@windows) require 'rbconfig' @windows = !!(::RbConfig::CONFIG['host_os'] =~ /mswin|mingw/) end |