Class: WaterDrop::Producer

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Includes:
Karafka::Core::Helpers::Time, Async, Buffer, Sync
Defined in:
lib/waterdrop/producer.rb,
lib/waterdrop/producer/sync.rb,
lib/waterdrop/producer/async.rb,
lib/waterdrop/producer/buffer.rb,
lib/waterdrop/producer/status.rb,
lib/waterdrop/producer/builder.rb

Overview

Main WaterDrop messages producer

Defined Under Namespace

Modules: Async, Buffer, Sync Classes: Builder, Status

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Buffer

#buffer, #buffer_many, #flush_async, #flush_sync

Methods included from Async

#produce_async, #produce_many_async

Methods included from Sync

#produce_many_sync, #produce_sync

Constructor Details

#initialize(&block) ⇒ Producer

Creates a not-yet-configured instance of the producer

Parameters:

  • block (Proc)

    configuration block



36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/waterdrop/producer.rb', line 36

def initialize(&block)
  @operations_in_progress = Concurrent::AtomicFixnum.new(0)
  @buffer_mutex = Mutex.new
  @connecting_mutex = Mutex.new
  @operating_mutex = Mutex.new

  @status = Status.new
  @messages = Concurrent::Array.new

  return unless block

  setup(&block)
end

Instance Attribute Details

#configObject (readonly)

Returns dry-configurable config object.

Returns:

  • (Object)

    dry-configurable config object



31
32
33
# File 'lib/waterdrop/producer.rb', line 31

def config
  @config
end

#idString (readonly)

Returns uuid of the current producer.

Returns:

  • (String)

    uuid of the current producer



23
24
25
# File 'lib/waterdrop/producer.rb', line 23

def id
  @id
end

#messagesConcurrent::Array (readonly)

Returns internal messages buffer.

Returns:

  • (Concurrent::Array)

    internal messages buffer



27
28
29
# File 'lib/waterdrop/producer.rb', line 27

def messages
  @messages
end

#monitorObject (readonly)

Returns monitor we want to use.

Returns:

  • (Object)

    monitor we want to use



29
30
31
# File 'lib/waterdrop/producer.rb', line 29

def monitor
  @monitor
end

#statusStatus (readonly)

Returns producer status object.

Returns:

  • (Status)

    producer status object



25
26
27
# File 'lib/waterdrop/producer.rb', line 25

def status
  @status
end

Instance Method Details

#clientRdkafka::Producer

Note:

Client is lazy initialized, keeping in mind also the fact of a potential fork that can happen any time.

Note:

It is not recommended to fork a producer that is already in use so in case of bootstrapping a cluster, it’s much better to fork configured but not used producers

Returns raw rdkafka producer.

Returns:

  • (Rdkafka::Producer)

    raw rdkafka producer

Raises:



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/waterdrop/producer.rb', line 71

def client
  return @client if @client && @pid == Process.pid

  # Don't allow to obtain a client reference for a producer that was not configured
  raise Errors::ProducerNotConfiguredError, id if @status.initial?

  @connecting_mutex.synchronize do
    return @client if @client && @pid == Process.pid

    # We undefine all the finalizers, in case it was a fork, so the finalizers from the parent
    # process don't leak
    ObjectSpace.undefine_finalizer(id)

    # We should raise an error when trying to use a producer with client from a fork. Always.
    if @client
      # We need to reset the client, otherwise there might be attempt to close the parent
      # client
      @client = nil
      raise Errors::ProducerUsedInParentProcess, Process.pid
    end

    # Finalizer tracking is needed for handling shutdowns gracefully.
    # I don't expect everyone to remember about closing all the producers all the time, thus
    # this approach is better. Although it is still worth keeping in mind, that this will
    # block GC from removing a no longer used producer unless closed properly but at least
    # won't crash the VM upon closing the process
    ObjectSpace.define_finalizer(id, proc { close })

    @pid = Process.pid
    @client = Builder.new.call(self, @config)

    # Register statistics runner for this particular type of callbacks
    ::Karafka::Core::Instrumentation.statistics_callbacks.add(
      @id,
      Instrumentation::Callbacks::Statistics.new(@id, @client.name, @config.monitor)
    )

    # Register error tracking callback
    ::Karafka::Core::Instrumentation.error_callbacks.add(
      @id,
      Instrumentation::Callbacks::Error.new(@id, @client.name, @config.monitor)
    )

    @status.connected!
  end

  @client
end

#closeObject

Flushes the buffers in a sync way and closes the producer



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# File 'lib/waterdrop/producer.rb', line 121

def close
  @operating_mutex.synchronize do
    return unless @status.active?

    @monitor.instrument(
      'producer.closed',
      producer_id: id
    ) do
      @status.closing!

      # No need for auto-gc if everything got closed by us
      # This should be used only in case a producer was not closed properly and forgotten
      ObjectSpace.undefine_finalizer(id)

      # We save this thread id because we need to bypass the activity verification on the
      # producer for final flush of buffers.
      @closing_thread_id = Thread.current.object_id

      # Wait until all the outgoing operations are done. Only when no one is using the
      # underlying client running operations we can close
      sleep(0.001) until @operations_in_progress.value.zero?

      # Flush has its own buffer mutex but even if it is blocked, flushing can still happen
      # as we close the client after the flushing (even if blocked by the mutex)
      flush(true)

      # We should not close the client in several threads the same time
      # It is safe to run it several times but not exactly the same moment
      # We also mark it as closed only if it was connected, if not, it would trigger a new
      # connection that anyhow would be immediately closed
      if @client
        client.close
        @client = nil
      end

      # Remove callbacks runners that were registered
      ::Karafka::Core::Instrumentation.statistics_callbacks.delete(@id)
      ::Karafka::Core::Instrumentation.error_callbacks.delete(@id)

      @status.closed!
    end
  end
end

#setup(&block) ⇒ Object

Sets up the whole configuration and initializes all that is needed

Parameters:

  • block (Block)

    configuration block

Raises:



52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/waterdrop/producer.rb', line 52

def setup(&block)
  raise Errors::ProducerAlreadyConfiguredError, id unless @status.initial?

  @config = Config
            .new
            .setup(&block)
            .config

  @id = @config.id
  @monitor = @config.monitor
  @contract = Contracts::Message.new(max_payload_size: @config.max_payload_size)
  @status.configured!
end