Class: Clacky::Server::Master

Inherits:
Object
  • Object
show all
Defined in:
lib/clacky/server/server_master.rb

Overview

Master process — owns the listen socket, spawns/monitors worker processes.

Lifecycle:

clacky server
  └─ Master.run  (this file)
        ├─ creates TCPServer, holds it forever
        ├─ spawns Worker via spawn() — full new Ruby process, loads fresh gem
        ├─ traps USR1 → hot_restart (spawn new worker, gracefully stop old)
        └─ traps TERM/INT → shutdown (stop worker, exit cleanly)

Worker receives:

CLACKY_WORKER=1          — "I am a worker, start HttpServer directly"
CLACKY_INHERIT_FD=<n>   — file descriptor number of the inherited TCPServer socket
CLACKY_MASTER_PID=<n>   — master PID so worker can send USR1 back on upgrade

Constant Summary collapse

RESTART_EXIT_CODE =

Worker exits with this code to request a hot restart (e.g. after gem upgrade).

75
MAX_CONSECUTIVE_FAILURES =
5
NEW_WORKER_BOOT_WAIT =

How long (seconds) to wait for a new worker to become ready before killing the old one.

3

Instance Method Summary collapse

Constructor Details

#initialize(host:, port:, argv: nil, extra_flags: []) ⇒ Master

Returns a new instance of Master.



32
33
34
35
36
37
38
39
40
41
42
# File 'lib/clacky/server/server_master.rb', line 32

def initialize(host:, port:, argv: nil, extra_flags: [])
  @host   = host
  @port   = port
  @argv   = argv          # kept for backward compat but no longer used
  @extra_flags = extra_flags  # e.g. ["--brand-test"]

  @socket     = nil
  @worker_pid = nil
  @restart_requested = false
  @shutdown_requested = false
end

Instance Method Details

#bind_with_fallback(host, preferred_port, max_port:) ⇒ Object

Try to bind to preferred_port, fall back to next ports if occupied. Returns the bound TCPServer, or nil if all ports in range are occupied.



241
242
243
244
245
246
247
248
249
250
251
252
# File 'lib/clacky/server/server_master.rb', line 241

def bind_with_fallback(host, preferred_port, max_port:)
  (preferred_port..max_port).each do |port|
    begin
      server = TCPServer.new(host, port)
      Clacky::Logger.info("[Master] Bound to port #{port}") if port != preferred_port
      return server
    rescue Errno::EADDRINUSE
      next
    end
  end
  nil
end

#hot_restartObject

Spawn a new worker, wait for it to boot, then gracefully stop the old one.



156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# File 'lib/clacky/server/server_master.rb', line 156

def hot_restart
  old_pid = @worker_pid
  Clacky::Logger.info("[Master] Hot restart: spawning new worker (old PID=#{old_pid})...")

  new_pid = spawn_worker
  @worker_pid = new_pid

  # Give the new worker time to bind and start serving
  sleep NEW_WORKER_BOOT_WAIT

  # Gracefully stop old worker — TERM the whole process group first so
  # grandchildren (node MCP, etc.) also get a chance to shut down cleanly.
  begin
    Process.kill("TERM", -old_pid)
    # Reap it (non-blocking loop so we don't block the monitor)
    deadline = Time.now + 5
    loop do
      pid, = Process.waitpid2(old_pid, Process::WNOHANG)
      break if pid
      break if Time.now > deadline
      sleep 0.1
    end
    Process.kill("KILL", -old_pid) rescue nil  # force-kill entire group if still alive
  rescue Errno::ESRCH
    # already gone — fine
  end

  Clacky::Logger.info("[Master] Hot restart complete. New worker PID=#{new_pid}")
end

#kill_existing_masterObject



272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
# File 'lib/clacky/server/server_master.rb', line 272

def kill_existing_master
  return unless File.exist?(pid_file_path)

  pid = File.read(pid_file_path).strip.to_i
  return if pid <= 0

  begin
    Process.kill("TERM", pid)
    Clacky::Logger.info("[Master] Sent TERM to existing master (PID=#{pid}), waiting up to 3s...")

    unless port_free_within?(5)
      Clacky::Logger.warn("[Master] Port #{@port} still in use after 5s, sending KILL to PID=#{pid}...")
      Process.kill("KILL", pid) rescue Errno::ESRCH
      unless port_free_within?(2)
        Clacky::Logger.error("[Master] Port #{@port} still in use after KILL, giving up.")
        exit(1)
      end
    end

    Clacky::Logger.info("[Master] Port #{@port} is now free.")
  rescue Errno::ESRCH
    Clacky::Logger.info("[Master] Existing master PID=#{pid} already gone.")
  rescue Errno::EPERM
    Clacky::Logger.warn("[Master] Could not stop existing master (PID=#{pid}) — permission denied.")
    exit(1)
  ensure
    File.delete(pid_file_path) if File.exist?(pid_file_path)
  end
end

#pid_file_pathObject



214
215
216
# File 'lib/clacky/server/server_master.rb', line 214

def pid_file_path
  File.join(Dir.tmpdir, "clacky-master-#{@port}.pid")
end

#port_free_within?(seconds) ⇒ Boolean

Returns:

  • (Boolean)


226
227
228
229
230
231
232
233
234
235
236
237
# File 'lib/clacky/server/server_master.rb', line 226

def port_free_within?(seconds)
  deadline = Time.now + seconds
  loop do
    begin
      TCPServer.new(@host, @port).close
      return true
    rescue Errno::EADDRINUSE
      return false if Time.now > deadline
      sleep 0.1
    end
  end
end


254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
# File 'lib/clacky/server/server_master.rb', line 254

def print_banner(port_changed: false, original_port: nil)
  banner = Clacky::Banner.new
  puts ""
  puts banner.
  puts banner.colored_tagline
  puts ""
  
  if port_changed
    puts "   [!] Port #{original_port} is in use, using #{@port} instead"
    puts ""
  end
  
  puts "   Web UI: #{banner.highlight("http://#{@host}:#{@port}")}"
  puts "   Version: #{Clacky::VERSION}"
  puts "   Press Ctrl-C to stop."
  puts ""
end

#remove_pid_fileObject



222
223
224
# File 'lib/clacky/server/server_master.rb', line 222

def remove_pid_file
  File.delete(pid_file_path) if File.exist?(pid_file_path)
end

#runObject



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/clacky/server/server_master.rb', line 44

def run
  # 0. Kill any existing master on this port before binding.
  kill_existing_master

  # 1. Try to bind the socket.
  # If port is 7070 (default), try fallback ports 7071-7075 if occupied.
  # If port is non-default (user-specified), only try that exact port.
  original_port = @port
  max_port = (@port == 7070) ? (@port + 5) : @port
  @socket = bind_with_fallback(@host, @port, max_port: max_port)
  
  if @socket.nil?
    if @port == 7070
      Clacky::Logger.error("[Master] No available ports in range 7070-7075")
    else
      Clacky::Logger.error("[Master] Port #{@port} is in use")
    end
    exit(1)
  end
  
  @socket.setsockopt(Socket::SOL_SOCKET, Socket::SO_REUSEADDR, true)
  @port = @socket.local_address.ip_port  # Update to actual bound port

  # 2. Print banner after port is determined
  print_banner(port_changed: @port != original_port, original_port: original_port)

  write_pid_file

  # 3. Signal handlers
  Signal.trap("USR1") { @restart_requested  = true }
  Signal.trap("TERM") { @shutdown_requested = true }
  Signal.trap("INT")  { @shutdown_requested = true }
  Signal.trap("HUP")  { @shutdown_requested = true }

  # 4. Spawn first worker
  @worker_pid = spawn_worker
  @consecutive_failures = 0

  # 4. Monitor loop
  loop do
    if @shutdown_requested
      shutdown
      break
    end

    if @restart_requested
      @restart_requested = false
      hot_restart
      @consecutive_failures = 0
    end

    # Non-blocking wait: check if worker has exited
    pid, status = Process.waitpid2(@worker_pid, Process::WNOHANG)
    if pid
      exit_code = status.exitstatus
      if exit_code == RESTART_EXIT_CODE
        Clacky::Logger.info("[Master] Worker requested restart (exit #{RESTART_EXIT_CODE}).")
        @worker_pid = spawn_worker
        @consecutive_failures = 0
      elsif @shutdown_requested
        break
      else
        @consecutive_failures += 1
        if @consecutive_failures >= MAX_CONSECUTIVE_FAILURES
          Clacky::Logger.error("[Master] Worker failed #{MAX_CONSECUTIVE_FAILURES} times in a row, giving up.")
          shutdown
          break
        end
        delay = [0.5 * (2 ** (@consecutive_failures - 1)), 30].min  # exponential backoff, max 30s
        Clacky::Logger.warn("[Master] Worker exited unexpectedly (exit #{exit_code}), failure #{@consecutive_failures}/#{MAX_CONSECUTIVE_FAILURES}, restarting in #{delay}s...")
        sleep delay
        @worker_pid = spawn_worker
      end
    end

    sleep 0.1
  end
ensure
  remove_pid_file
end

#shutdownObject



186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# File 'lib/clacky/server/server_master.rb', line 186

def shutdown
  Clacky::Logger.info("[Master] Shutting down (worker PID=#{@worker_pid})...")
  if @worker_pid
    begin
      # TERM the entire worker process group so grandchildren (node MCP, etc.)
      # are also signalled and can clean up before we force-kill.
      Process.kill("TERM", -@worker_pid)
      # Wait up to 2s for worker graceful exit, then KILL the whole group
      deadline = Time.now + 3
      loop do
        pid, = Process.waitpid2(@worker_pid, Process::WNOHANG)
        break if pid
        if Time.now > deadline
          Clacky::Logger.warn("[Master] Worker did not exit in time, sending KILL...")
          Process.kill("KILL", -@worker_pid) rescue nil
          break
        end
        sleep 0.1
      end
    rescue Errno::ESRCH, Errno::ECHILD
      # already gone
    end
  end
  @socket.close rescue nil
  Clacky::Logger.info("[Master] Exited.")
  exit(0)
end

#spawn_workerObject

Spawn a fresh Ruby process that loads the (possibly updated) gem from disk. The listen socket is inherited via its file descriptor number.



128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/clacky/server/server_master.rb', line 128

def spawn_worker
  env = {
    "CLACKY_WORKER"      => "1",
    "CLACKY_INHERIT_FD"  => @socket.fileno.to_s,
    "CLACKY_MASTER_PID"  => Process.pid.to_s
  }
  # Keep the socket fd open across exec — mark it as non-CLOEXEC.
  @socket.close_on_exec = false

  # Reconstruct the worker command explicitly.
  # We cannot rely on ARGV (Thor has already consumed it), so we rebuild
  # the minimal args: `clacky server --host HOST --port PORT [extra_flags]`
  ruby   = RbConfig.ruby
  script = File.expand_path($0)
  worker_argv = ["server", "--host", @host.to_s, "--port", @port.to_s] + @extra_flags

  Clacky::Logger.info("[Master PID=#{Process.pid}] spawn: #{ruby} #{script} #{worker_argv.join(' ')}")
  Clacky::Logger.info("[Master PID=#{Process.pid}] env: #{env.inspect}")
  # pgroup: 0 puts worker in its own process group.
  # This lets Master send TERM/KILL to the entire group (-pid) on shutdown,
  # ensuring grandchildren (e.g. chrome-devtools-mcp node process) are also
  # cleaned up even if the worker is force-killed before its shutdown_proc runs.
  pid = spawn(env, ruby, script, *worker_argv, pgroup: 0)
  Clacky::Logger.info("[Master PID=#{Process.pid}] Spawned worker PID=#{pid} pgroup=#{pid}")
  pid
end

#write_pid_fileObject



218
219
220
# File 'lib/clacky/server/server_master.rb', line 218

def write_pid_file
  File.write(pid_file_path, Process.pid.to_s)
end