Class: Fluent::Plugin::MysqlReplicatorInput

Inherits:
Input
  • Object
show all
Defined in:
lib/fluent/plugin/in_mysql_replicator.rb

Instance Method Summary collapse

Instance Method Details

#configure(conf) ⇒ Object



32
33
34
35
36
37
38
39
40
41
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 32

def configure(conf)
  super
  @interval = Fluent::Config.time_value(@interval)

  if @tag.nil?
    raise Fluent::ConfigError, "mysql_replicator: missing 'tag' parameter. Please add following line into config like 'tag replicator.mydatabase.mytable.${event}.${primary_key}'"
  end

  log.info "adding mysql_replicator worker. :tag=>#{tag} :query=>#{@query} :prepared_query=>#{@prepared_query} :interval=>#{@interval}sec :enable_delete=>#{enable_delete} :json_columns=>#{@json_columns}"
end

#detect_deleted_ids(previous_ids, current_ids) ⇒ Object

Returns the primary keys that disappeared since the previous poll.

The first poll only establishes a baseline: there is no previous snapshot to diff against, so nothing is reported as deleted yet. This also avoids the old ‘[*1…current_ids.max]` range, which raised “bad value for range” for non-integer primary keys and allocated a huge array (and emitted phantom deletes) for large / sparse integer ids. (#42)



142
143
144
145
146
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 142

def detect_deleted_ids(previous_ids, current_ids)
  return [] if previous_ids.empty?
  return [] if current_ids.empty?
  previous_ids - current_ids
end

#emit_record(tag, record) ⇒ Object



181
182
183
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 181

def emit_record(tag, record)
  router.emit(tag, Fluent::Engine.now, record)
end

#extract_id(row) ⇒ Object

A row’s id is the array of its primary-key column values, supporting composite keys. It is a single-element array for a single-column key.



131
132
133
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 131

def extract_id(row)
  @primary_key.map {|col| row[col] }
end

#format_tag(tag, param) ⇒ Object



164
165
166
167
168
169
170
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 164

def format_tag(tag, param)
  pattern = {'${event}' => param[:event].to_s, '${primary_key}' => @primary_key.join(',')}
  tag.gsub(/(\${[a-z_]+})/) do
    log.warn "mysql_replicator: missing placeholder. :tag=>#{tag} :placeholder=>#{$1}" unless pattern.include?($1)
    pattern[$1]
  end
end

#get_connectionObject



197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 197

def get_connection
  begin
    return Mysql2::Client.new({
      :host => @host,
      :port => @port,
      :username => @username,
      :password => @password,
      :database => @database,
      :encoding => @encoding,
      :reconnect => true,
      :stream => true,
      :cache_rows => false
    })
  rescue Exception => e
    log.warn "mysql_replicator: #{e}"
    sleep @interval
    retry
  end
end

#hash_delete_by_list(hash, deleted_keys) ⇒ Object



125
126
127
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 125

def hash_delete_by_list (hash, deleted_keys)
  deleted_keys.each{|k| hash.delete(k)}
end

#nested_query_value?(value) ⇒ Boolean

A column value triggers a nested sub-query only when it is a query template containing a ‘$placeholder` (e.g. “SELECT … WHERE x = $id”). Requiring the placeholder prevents ordinary text values that merely begin with the word “SELECT” from being executed as SQL. (#4; mirrors the fix already applied to mysql_replicator_multi in #6.)

Returns:

  • (Boolean)


177
178
179
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 177

def nested_query_value?(value)
  value.to_s.strip.match?(/^SELECT[^\$]+\$\{[^\}]+\}/i)
end

#parse_json_columns!(row, columns) ⇒ Object

Parse the given columns’ JSON string values into nested objects in place. Non-string values, missing columns, and malformed JSON are left untouched so enabling this never corrupts non-JSON data.



151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 151

def parse_json_columns!(row, columns)
  return if columns.empty?
  columns.each do |col|
    v = row[col]
    next unless v.is_a?(String)
    begin
      row[col] = JSON.parse(v)
    rescue JSON::ParserError
      # leave the original string as-is on malformed JSON
    end
  end
end

#pollObject



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 62

def poll
  table_hash = Hash.new
  ids = Array.new
  con = get_connection()
  prepared_con = get_connection()
  loop do
    rows_count = 0
    start_time = Time.now
    previous_ids = ids
    current_ids = Array.new
    if !@prepared_query.nil?
      @prepared_query.split(/;/).each do |query|
        prepared_con.query(query)
      end
    end
    rows, con = query(@query, con)
    rows.each do |row|
      id = extract_id(row)
      current_ids << id
      current_hash = Digest::SHA1.hexdigest(row.flatten.join)
      row.each {|k, v| row[k] = v.to_s if v.is_a?(Time) || v.is_a?(Date) || v.is_a?(BigDecimal)}
      parse_json_columns!(row, @json_columns)
      row.select {|k, v| nested_query_value?(v) }.each do |k, v|
        row[k] = [] unless row[k].is_a?(Array)
        nest_rows, prepared_con = query(v.gsub(/\$\{([^\}]+)\}/, row[$1].to_s), prepared_con)
        nest_rows.each do |nest_row|
          nest_row.each {|k, v| nest_row[k] = v.to_s if v.is_a?(Time) || v.is_a?(Date) || v.is_a?(BigDecimal)}
          row[k] << nest_row
        end
        prepared_con.close
      end
      if id.any?(&:nil?)
        log.error "mysql_replicator: missing primary_key. :tag=>#{tag} :primary_key=>#{@primary_key.join(',')} :id=>#{id}"
        break
      end
      if !table_hash.include?(id)
        tag = format_tag(@tag, {:event => :insert})
        emit_record(tag, row)
      elsif table_hash[id] != current_hash
        tag = format_tag(@tag, {:event => :update})
        emit_record(tag, row)
      end
      table_hash[id] = current_hash
      rows_count += 1
    end
    con.close
    ids = current_ids
    if @enable_delete
      deleted_ids = detect_deleted_ids(previous_ids, current_ids)
      if deleted_ids.count > 0
        hash_delete_by_list(table_hash, deleted_ids)
        deleted_ids.each do |id|
          tag = format_tag(@tag, {:event => :delete})
          emit_record(tag, Hash[@primary_key.zip(id)])
        end
      end
    end
    elapsed_time = sprintf("%0.02f", Time.now - start_time)
    log.info "mysql_replicator: finished execution :tag=>#{tag} :rows_count=>#{rows_count} :elapsed_time=>#{elapsed_time} sec"
    sleep @interval
  end
end

#query(query, con = nil) ⇒ Object



185
186
187
188
189
190
191
192
193
194
195
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 185

def query(query, con = nil)
  begin
    con = con.nil? ? get_connection : con
    con = con.ping ? con : get_connection
    return con.query(query), con
  rescue Exception => e
    log.warn "mysql_replicator: #{e}"
    sleep @interval
    retry
  end
end

#runObject



52
53
54
55
56
57
58
59
60
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 52

def run
  begin
    poll
  rescue StandardError => e
    log.error "mysql_replicator: failed to execute query."
    log.error "error: #{e.message}"
    log.error e.backtrace.join("\n")
  end
end

#shutdownObject



48
49
50
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 48

def shutdown
 super
end

#startObject



43
44
45
46
# File 'lib/fluent/plugin/in_mysql_replicator.rb', line 43

def start
  super
  thread_create(:in_mysql_replicator_runner, &method(:run))
end