Class: Protobuf::Nats::Server
- Inherits:
-
Object
- Object
- Protobuf::Nats::Server
- Includes:
- Logging, Rpc::Server
- Defined in:
- lib/protobuf/nats/server.rb
Constant Summary collapse
- MILLISECOND =
1000
Instance Attribute Summary collapse
-
#nats ⇒ Object
readonly
Returns the value of attribute nats.
-
#subscription_manager ⇒ Object
readonly
Returns the value of attribute subscription_manager.
-
#thread_pool ⇒ Object
readonly
Returns the value of attribute thread_pool.
Instance Method Summary collapse
- #detect_and_handle_a_pause ⇒ Object
- #do_not_subscribe_to_includes?(subscription_key) ⇒ Boolean
- #enqueue_request(request_data, reply_id) ⇒ Object
-
#finish_slow_start ⇒ Object
Slow start subscriptions by adding X rounds of subz every Y seconds, where X is subscriptions_per_rpc_endpoint and Y is slow_start_delay.
- #handler_count ⇒ Object
-
#handler_overdue_ms ⇒ Object
A handler still running past this is “overdue”: the client has already given up (its response_timeout), so the work is orphaned and holding a pool slot for nothing.
-
#initialize(options) ⇒ Server
constructor
A new instance of Server.
-
#instrument_inflight_handlers ⇒ Object
Periodic in-flight handler health.
- #instrument_thread_pool_sizes ⇒ Object
- #max_queue_size ⇒ Object
- #monotonic ⇒ Object
- #only_subscribe_to_includes?(subscription_key) ⇒ Boolean
- #pause_file_path ⇒ Object
- #paused? ⇒ Boolean
- #print_subscription_keys ⇒ Object
- #run ⇒ Object
- #running? ⇒ Boolean
- #service_klasses ⇒ Object
-
#shutdown_drain_timeout ⇒ Object
How long to let in-flight handlers finish on shutdown.
-
#slow_handler_threshold_ms ⇒ Object
Informational SLA marker for slow handlers.
- #slow_start_delay ⇒ Object
- #stop ⇒ Object
- #subscribe ⇒ Object
- #subscribe_to_services_once ⇒ Object
- #subscriptions_per_rpc_endpoint ⇒ Object
- #threads ⇒ Object
- #unsubscribe ⇒ Object
- #with_each_subscription_key ⇒ Object
Constructor Details
#initialize(options) ⇒ Server
Returns a new instance of Server.
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/protobuf/nats/server.rb', line 18 def initialize() @options = @processing_requests = true @running = true @stopped = false @pause_mutex = ::Mutex.new @nats = @options[:client] || ::Protobuf::Nats::NatsClient.new @nats.connect(::Protobuf::Nats.config.) @thread_pool = ::Protobuf::Nats::ThreadPool.new(threads, :max_queue => max_queue_size) @subscription_manager = ::Protobuf::Nats::SuperSubscriptionManager.new(@nats) do |request_data, reply_id, subject| unless enqueue_request(request_data, reply_id) logger.error { "Thread pool is full! Dropping message for subject: #{subject}" } end end @server = .fetch(:server, ::Socket.gethostname) # In-flight handler tracking for observability. Long-running handlers are # allowed (and never aborted); we only measure/report. id => monotonic # start time; @overdue_flagged dedupes the per-handler overdue event. @inflight = ::Concurrent::Map.new @overdue_flagged = ::Concurrent::Map.new @request_seq = ::Concurrent::AtomicFixnum.new(0) end |
Instance Attribute Details
#nats ⇒ Object (readonly)
Returns the value of attribute nats.
14 15 16 |
# File 'lib/protobuf/nats/server.rb', line 14 def nats @nats end |
#subscription_manager ⇒ Object (readonly)
Returns the value of attribute subscription_manager.
14 15 16 |
# File 'lib/protobuf/nats/server.rb', line 14 def subscription_manager @subscription_manager end |
#thread_pool ⇒ Object (readonly)
Returns the value of attribute thread_pool.
14 15 16 |
# File 'lib/protobuf/nats/server.rb', line 14 def thread_pool @thread_pool end |
Instance Method Details
#detect_and_handle_a_pause ⇒ Object
291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 |
# File 'lib/protobuf/nats/server.rb', line 291 def detect_and_handle_a_pause @pause_mutex.synchronize do case # If we are taking requests and detect a pause file, then unsubscribe. when @processing_requests && paused? @processing_requests = false logger.warn("Pausing server!") unsubscribe # If we were paused and the pause file is no longer present, then subscribe again. when !@processing_requests && !paused? logger.warn("Resuming server: resubscribing to all services and restarting slow start!") @processing_requests = true subscribe end end end |
#do_not_subscribe_to_includes?(subscription_key) ⇒ Boolean
211 212 213 214 215 216 217 218 |
# File 'lib/protobuf/nats/server.rb', line 211 def do_not_subscribe_to_includes?(subscription_key) return false unless ::Protobuf::Nats.config.server_subscription_key_do_not_subscribe_to_when_includes_any_of.respond_to?(:any?) return false if ::Protobuf::Nats.config.server_subscription_key_do_not_subscribe_to_when_includes_any_of.empty? ::Protobuf::Nats.config.server_subscription_key_do_not_subscribe_to_when_includes_any_of.any? do |key| subscription_key.include?(key) end end |
#enqueue_request(request_data, reply_id) ⇒ Object
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
# File 'lib/protobuf/nats/server.rb', line 135 def enqueue_request(request_data, reply_id) ::Protobuf::Nats.instrument "server.message_received" enqueued_at = monotonic request_id = @request_seq.increment was_enqueued = thread_pool.push do begin # Instrument the thread pool time-to-execute duration. processed_at = monotonic ::Protobuf::Nats.instrument("server.thread_pool_execution_delay", (processed_at - enqueued_at) * MILLISECOND) # Track this handler as in-flight (long handlers are allowed; this is # only for observability -- we never abort it). @inflight[request_id] = processed_at # Process request. response_data = handle_request(request_data, 'server' => @server) # Publish response. logger.debug { "Publishing response to #{reply_id}" } if logger.debug? nats.publish(reply_id, response_data) rescue => error logger.debug { "rescued error => #{error}" } if logger.debug? ::Protobuf::Nats.notify_error_callbacks(error) # The client has already received our ACK and is now blocked waiting # for the response message. If we don't send one it will hang until # response_timeout (60s by default). Publish an encoded RPC error so # the client fails fast instead. (If the failure was the connection # itself, this publish will also fail and is swallowed below.) begin error_response = ::Protobuf::Rpc::PbError.new(error.) nats.publish(reply_id, error_response.encode) rescue => publish_error logger.error "Failed to publish error response for #{reply_id}: #{publish_error.}" end ensure @inflight.delete(request_id) @overdue_flagged.delete(request_id) # Instrument the request duration. completed_at = monotonic ::Protobuf::Nats.instrument("server.request_duration", (completed_at - enqueued_at) * MILLISECOND) # Informational slow-handler marker (opt-in; default off). if processed_at && slow_handler_threshold_ms.positive? handler_ms = (completed_at - processed_at) * MILLISECOND if handler_ms >= slow_handler_threshold_ms logger.warn "Slow handler for #{reply_id}: #{handler_ms.round}ms" ::Protobuf::Nats.instrument("server.slow_handler", handler_ms) end end end end # Publish an ACK to signal the server has picked up the work. begin if was_enqueued logger.debug { "[reply_id=#{reply_id}] Sending ACK" } if logger.debug? nats.publish(reply_id, ::Protobuf::Nats::Messages::ACK) else # Drop message if the thread pool is full ::Protobuf::Nats.instrument "server.thread_pool_saturated" ::Protobuf::Nats.instrument "server.message_dropped" logger.debug { "[reply_id=#{reply_id}] Sending NACK" } if logger.debug? # Let the client know we are not processing the message. nats.publish(reply_id, ::Protobuf::Nats::Messages::NACK) end rescue => e logger.error "Failed to send ACK/NACK for #{reply_id}: #{e.}" ::Protobuf::Nats.notify_error_callbacks(e) end was_enqueued end |
#finish_slow_start ⇒ Object
Slow start subscriptions by adding X rounds of subz every Y seconds, where X is subscriptions_per_rpc_endpoint and Y is slow_start_delay.
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 |
# File 'lib/protobuf/nats/server.rb', line 266 def finish_slow_start logger.info "Slow start has started..." completed = 1 # We have (X - 1) here because we always subscribe at least once. (subscriptions_per_rpc_endpoint - 1).times do unless @running logger.info "Slow start interrupted (server stopping) after #{completed}/#{subscriptions_per_rpc_endpoint} rounds" return end if paused? logger.info "Slow start interrupted (server paused) after #{completed}/#{subscriptions_per_rpc_endpoint} rounds" return end completed += 1 sleep slow_start_delay subscribe_to_services_once logger.info "Slow start adding another round of subscriptions (#{completed}/#{subscriptions_per_rpc_endpoint})..." end logger.info "Slow start finished successfully (#{completed}/#{subscriptions_per_rpc_endpoint} rounds completed)." end |
#handler_count ⇒ Object
49 50 51 |
# File 'lib/protobuf/nats/server.rb', line 49 def handler_count subscription_manager.handler_count end |
#handler_overdue_ms ⇒ Object
A handler still running past this is “overdue”: the client has already given up (its response_timeout), so the work is orphaned and holding a pool slot for nothing. Defaults above the client’s 60s response_timeout so legitimate ≤60s operations are never flagged.
63 64 65 |
# File 'lib/protobuf/nats/server.rb', line 63 def handler_overdue_ms @handler_overdue_ms ||= ::ENV.fetch("PB_NATS_SERVER_HANDLER_OVERDUE_MS", 65_000).to_i end |
#instrument_inflight_handlers ⇒ Object
Periodic in-flight handler health. Long handlers are normal, so inflight_oldest_age_ms can legitimately approach the client’s response_timeout; only overdue_handler_count (work the client has already abandoned) signals a problem.
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
# File 'lib/protobuf/nats/server.rb', line 87 def instrument_inflight_handlers now = monotonic overdue_ms = handler_overdue_ms count = 0 oldest_age_ms = 0.0 overdue = 0 @inflight.each_pair do |id, started_at| count += 1 age_ms = (now - started_at) * MILLISECOND oldest_age_ms = age_ms if age_ms > oldest_age_ms next unless overdue_ms.positive? && age_ms >= overdue_ms overdue += 1 # Emit the per-handler overdue event once (the client has already # given up; this handler's result is orphaned). next if @overdue_flagged[id] @overdue_flagged[id] = true logger.warn "Handler exceeded #{overdue_ms}ms (client already gave up); in-flight age=#{age_ms.round}ms" ::Protobuf::Nats.instrument("server.handler_overdue", age_ms) end ::Protobuf::Nats.instrument("server.pending_intake_queue_size", subscription_manager.pending_queue_size) ::Protobuf::Nats.instrument("server.inflight_count", count) ::Protobuf::Nats.instrument("server.inflight_oldest_age_ms", oldest_age_ms) ::Protobuf::Nats.instrument("server.overdue_handler_count", overdue) end |
#instrument_thread_pool_sizes ⇒ Object
77 78 79 80 81 |
# File 'lib/protobuf/nats/server.rb', line 77 def instrument_thread_pool_sizes ::Protobuf::Nats.instrument("server.thread_pool_enqueued_size", thread_pool.enqueued_size) ::Protobuf::Nats.instrument("server.thread_pool_max_size", thread_pool.max_size) ::Protobuf::Nats.instrument("server.thread_pool_running_size", thread_pool.size) end |
#max_queue_size ⇒ Object
115 116 117 |
# File 'lib/protobuf/nats/server.rb', line 115 def max_queue_size ::ENV.fetch("PB_NATS_SERVER_MAX_QUEUE_SIZE", @options[:threads]).to_i end |
#monotonic ⇒ Object
45 46 47 |
# File 'lib/protobuf/nats/server.rb', line 45 def monotonic ::Protobuf::Nats.monotonic_time end |
#only_subscribe_to_includes?(subscription_key) ⇒ Boolean
220 221 222 223 224 225 226 227 |
# File 'lib/protobuf/nats/server.rb', line 220 def only_subscribe_to_includes?(subscription_key) return true unless ::Protobuf::Nats.config.server_subscription_key_only_subscribe_to_when_includes_any_of.respond_to?(:any?) return true if ::Protobuf::Nats.config.server_subscription_key_only_subscribe_to_when_includes_any_of.empty? ::Protobuf::Nats.config.server_subscription_key_only_subscribe_to_when_includes_any_of.any? do |key| subscription_key.include?(key) end end |
#pause_file_path ⇒ Object
229 230 231 |
# File 'lib/protobuf/nats/server.rb', line 229 def pause_file_path ::ENV.fetch("PB_NATS_SERVER_PAUSE_FILE_PATH", nil) end |
#paused? ⇒ Boolean
309 310 311 |
# File 'lib/protobuf/nats/server.rb', line 309 def paused? !pause_file_path.nil? && ::File.exist?(pause_file_path) end |
#print_subscription_keys ⇒ Object
233 234 235 236 237 238 239 |
# File 'lib/protobuf/nats/server.rb', line 233 def print_subscription_keys logger.info "Creating subscriptions:" with_each_subscription_key do |subscription_key| logger.info " - #{subscription_key}" end end |
#run ⇒ Object
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 |
# File 'lib/protobuf/nats/server.rb', line 313 def run nats.on_reconnect do logger.warn "Server NATS connection was reconnected" end nats.on_disconnect do logger.warn "Server NATS connection was disconnected" end nats.on_error do |error| # Runs on nats-pure's read/flush thread -- offload so a slow callback # can't stall the server's intake. ::Protobuf::Nats.notify_error_callbacks_async(error) end nats.on_close do logger.warn "Server NATS connection was closed" end print_subscription_keys if paused? yield if block_given? else subscribe { yield if block_given? } end loop do break unless @running detect_and_handle_a_pause instrument_thread_pool_sizes instrument_inflight_handlers thread_pool.replenish # respawn workers killed by non-StandardError sleep 1 end unsubscribe logger.info "Shutting down subscription manager..." begin Timeout.timeout(10) do subscription_manager.shutdown(5) end rescue Timeout::Error logger.error "Subscription manager shutdown timed out!" rescue => e logger.error "Error during subscription manager shutdown: #{e.}" end # Give in-flight handlers time to finish. Long operations are allowed # (up to ~the client's response_timeout), so the drain timeout tracks # handler_overdue_ms rather than a fixed 60s -- otherwise a legitimate # ~60s handler would be killed and its client left waiting. drain_timeout = shutdown_drain_timeout logger.info "Waiting up to #{drain_timeout.round}s for the thread pool to finish shutting down..." thread_pool.shutdown unless thread_pool.wait_for_termination(drain_timeout) abandoned = @inflight.size logger.warn "Thread pool did not shut down cleanly within #{drain_timeout.round}s! Abandoned #{abandoned} in-flight handler(s)." ::Protobuf::Nats.instrument "server.thread_pool_shutdown_timeout" ::Protobuf::Nats.instrument "server.shutdown_abandoned_handlers", abandoned end ensure @stopped = true begin logger.info "Closing NATS connection..." @nats.close if @nats rescue => e logger.warn "Failed to close NATS connection: #{e.}" end end |
#running? ⇒ Boolean
385 386 387 |
# File 'lib/protobuf/nats/server.rb', line 385 def running? !@stopped end |
#service_klasses ⇒ Object
131 132 133 |
# File 'lib/protobuf/nats/server.rb', line 131 def service_klasses ::Protobuf::Rpc::Service.implemented_services.map(&:safe_constantize) end |
#shutdown_drain_timeout ⇒ Object
How long to let in-flight handlers finish on shutdown. Tracks the overdue window (plus grace) so a legitimate long handler isn’t killed mid-flight.
69 70 71 72 73 74 75 |
# File 'lib/protobuf/nats/server.rb', line 69 def shutdown_drain_timeout @shutdown_drain_timeout ||= if ::ENV.key?("PB_NATS_SERVER_SHUTDOWN_DRAIN_TIMEOUT") ::ENV["PB_NATS_SERVER_SHUTDOWN_DRAIN_TIMEOUT"].to_f else (handler_overdue_ms / 1000.0) + 5 end end |
#slow_handler_threshold_ms ⇒ Object
Informational SLA marker for slow handlers. Default 0 (off) so normal long-running operations are not flagged.
55 56 57 |
# File 'lib/protobuf/nats/server.rb', line 55 def slow_handler_threshold_ms @slow_handler_threshold_ms ||= ::ENV.fetch("PB_NATS_SERVER_SLOW_HANDLER_THRESHOLD_MS", 0).to_i end |
#slow_start_delay ⇒ Object
119 120 121 |
# File 'lib/protobuf/nats/server.rb', line 119 def slow_start_delay @slow_start_delay ||= ::ENV.fetch("PB_NATS_SERVER_SLOW_START_DELAY", 10).to_i end |
#stop ⇒ Object
389 390 391 |
# File 'lib/protobuf/nats/server.rb', line 389 def stop @running = false end |
#subscribe ⇒ Object
393 394 395 396 397 |
# File 'lib/protobuf/nats/server.rb', line 393 def subscribe subscribe_to_services_once yield if block_given? finish_slow_start end |
#subscribe_to_services_once ⇒ Object
241 242 243 244 245 |
# File 'lib/protobuf/nats/server.rb', line 241 def subscribe_to_services_once with_each_subscription_key do |subscription_key_and_queue| subscription_manager.queue_subscribe(subscription_key_and_queue) end end |
#subscriptions_per_rpc_endpoint ⇒ Object
123 124 125 |
# File 'lib/protobuf/nats/server.rb', line 123 def subscriptions_per_rpc_endpoint @subscriptions_per_rpc_endpoint ||= ::ENV.fetch("PB_NATS_SERVER_SUBSCRIPTIONS_PER_RPC_ENDPOINT", 10).to_i end |
#threads ⇒ Object
127 128 129 |
# File 'lib/protobuf/nats/server.rb', line 127 def threads @options[:threads] || 10 # Default to 10 if not provided, consistent with original behavior end |
#unsubscribe ⇒ Object
399 400 401 402 |
# File 'lib/protobuf/nats/server.rb', line 399 def unsubscribe logger.info "Unsubscribing from rpc routes..." subscription_manager.unsubscribe_all end |
#with_each_subscription_key ⇒ Object
247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 |
# File 'lib/protobuf/nats/server.rb', line 247 def with_each_subscription_key fail ::ArgumentError unless block_given? service_klasses.each do |service_klass| service_klass.rpcs.each do |service_method, _| # Skip services that are not implemented. next unless service_klass.method_defined?(service_method) subscription_key = ::Protobuf::Nats.subscription_key(service_klass, service_method) next if do_not_subscribe_to_includes?(subscription_key) next unless only_subscribe_to_includes?(subscription_key) yield subscription_key end end end |