Class: Fluent::Plugin::MysqlReplicatorInput

Inherits:
Input
  • Object
show all
Defined in:
lib/fluent/plugin/in_mysql_replicator.rb

Instance Method Summary collapse

Instance Method Details

#configure(conf) ⇒ Object



29
30
31
32
33
34
35
36
37
38
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 29

def configure(conf)
  super
  @interval = Fluent::Config.time_value(@interval)

  if @tag.nil?
    raise Fluent::ConfigError, "mysql_replicator: missing 'tag' parameter. Please add following line into config like 'tag replicator.mydatabase.mytable.${event}.${primary_key}'"
  end

  log.info "adding mysql_replicator worker. :tag=>#{tag} :query=>#{@query} :prepared_query=>#{@prepared_query} :interval=>#{@interval}sec :enable_delete=>#{enable_delete} :json_columns=>#{@json_columns}"
end

#detect_deleted_ids(previous_ids, current_ids) ⇒ Object

Returns the primary keys that disappeared since the previous poll.

The first poll only establishes a baseline: there is no previous snapshot to diff against, so nothing is reported as deleted yet. This also avoids the old ‘[*1…current_ids.max]` range, which raised “bad value for range” for non-integer primary keys and allocated a huge array (and emitted phantom deletes) for large / sparse integer ids. (#42)



132
133
134
135
136
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 132

def detect_deleted_ids(previous_ids, current_ids)
  return [] if previous_ids.empty?
  return [] if current_ids.empty?
  previous_ids - current_ids
end

#emit_record(tag, record) ⇒ Object



171
172
173
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 171

def emit_record(tag, record)
  router.emit(tag, Fluent::Engine.now, record)
end

#format_tag(tag, param) ⇒ Object



154
155
156
157
158
159
160
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 154

def format_tag(tag, param)
  pattern = {'${event}' => param[:event].to_s, '${primary_key}' => @primary_key}
  tag.gsub(/(\${[a-z_]+})/) do
    log.warn "mysql_replicator: missing placeholder. :tag=>#{tag} :placeholder=>#{$1}" unless pattern.include?($1)
    pattern[$1]
  end
end

#get_connectionObject



187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 187

def get_connection
  begin
    return Mysql2::Client.new({
      :host => @host,
      :port => @port,
      :username => @username,
      :password => @password,
      :database => @database,
      :encoding => @encoding,
      :reconnect => true,
      :stream => true,
      :cache_rows => false
    })
  rescue Exception => e
    log.warn "mysql_replicator: #{e}"
    sleep @interval
    retry
  end
end

#hash_delete_by_list(hash, deleted_keys) ⇒ Object



121
122
123
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 121

def hash_delete_by_list (hash, deleted_keys)
  deleted_keys.each{|k| hash.delete(k)}
end

#nested_query_value?(value) ⇒ Boolean

A column value triggers a nested sub-query only when it is a query template containing a ‘$placeholder` (e.g. “SELECT … WHERE x = $id”). Requiring the placeholder prevents ordinary text values that merely begin with the word “SELECT” from being executed as SQL. (#4; mirrors the fix already applied to mysql_replicator_multi in #6.)

Returns:

  • (Boolean)


167
168
169
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 167

def nested_query_value?(value)
  value.to_s.strip.match?(/^SELECT[^\$]+\$\{[^\}]+\}/i)
end

#parse_json_columns!(row, columns) ⇒ Object

Parse the given columns’ JSON string values into nested objects in place. Non-string values, missing columns, and malformed JSON are left untouched so enabling this never corrupts non-JSON data.



141
142
143
144
145
146
147
148
149
150
151
152
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 141

def parse_json_columns!(row, columns)
  return if columns.empty?
  columns.each do |col|
    v = row[col]
    next unless v.is_a?(String)
    begin
      row[col] = JSON.parse(v)
    rescue JSON::ParserError
      # leave the original string as-is on malformed JSON
    end
  end
end

#pollObject



59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 59

def poll
  table_hash = Hash.new
  ids = Array.new
  con = get_connection()
  prepared_con = get_connection()
  loop do
    rows_count = 0
    start_time = Time.now
    previous_ids = ids
    current_ids = Array.new
    if !@prepared_query.nil?
      @prepared_query.split(/;/).each do |query|
        prepared_con.query(query)
      end
    end
    rows, con = query(@query, con)
    rows.each do |row|
      current_ids << row[@primary_key]
      current_hash = Digest::SHA1.hexdigest(row.flatten.join)
      row.each {|k, v| row[k] = v.to_s if v.is_a?(Time) || v.is_a?(Date) || v.is_a?(BigDecimal)}
      parse_json_columns!(row, @json_columns)
      row.select {|k, v| nested_query_value?(v) }.each do |k, v|
        row[k] = [] unless row[k].is_a?(Array)
        nest_rows, prepared_con = query(v.gsub(/\$\{([^\}]+)\}/, row[$1].to_s), prepared_con)
        nest_rows.each do |nest_row|
          nest_row.each {|k, v| nest_row[k] = v.to_s if v.is_a?(Time) || v.is_a?(Date) || v.is_a?(BigDecimal)}
          row[k] << nest_row
        end
        prepared_con.close
      end
      if row[@primary_key].nil?
        log.error "mysql_replicator: missing primary_key. :tag=>#{tag} :primary_key=>#{primary_key}"
        break
      end
      if !table_hash.include?(row[@primary_key])
        tag = format_tag(@tag, {:event => :insert})
        emit_record(tag, row)
      elsif table_hash[row[@primary_key]] != current_hash
        tag = format_tag(@tag, {:event => :update})
        emit_record(tag, row)
      end
      table_hash[row[@primary_key]] = current_hash
      rows_count += 1
    end
    con.close
    ids = current_ids
    if @enable_delete
      deleted_ids = detect_deleted_ids(previous_ids, current_ids)
      if deleted_ids.count > 0
        hash_delete_by_list(table_hash, deleted_ids)
        deleted_ids.each do |id|
          tag = format_tag(@tag, {:event => :delete})
          emit_record(tag, {@primary_key => id})
        end
      end
    end
    elapsed_time = sprintf("%0.02f", Time.now - start_time)
    log.info "mysql_replicator: finished execution :tag=>#{tag} :rows_count=>#{rows_count} :elapsed_time=>#{elapsed_time} sec"
    sleep @interval
  end
end

#query(query, con = nil) ⇒ Object



175
176
177
178
179
180
181
182
183
184
185
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 175

def query(query, con = nil)
  begin
    con = con.nil? ? get_connection : con
    con = con.ping ? con : get_connection
    return con.query(query), con
  rescue Exception => e
    log.warn "mysql_replicator: #{e}"
    sleep @interval
    retry
  end
end

#runObject



49
50
51
52
53
54
55
56
57
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 49

def run
  begin
    poll
  rescue StandardError => e
    log.error "mysql_replicator: failed to execute query."
    log.error "error: #{e.message}"
    log.error e.backtrace.join("\n")
  end
end

#shutdownObject



45
46
47
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 45

def shutdown
 super
end

#startObject



40
41
42
43
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 40

def start
  super
  thread_create(:in_mysql_replicator_runner, &method(:run))
end