Module: Tracelit::Instrumentation

Defined in:
lib/tracelit/instrumentation.rb

Constant Summary collapse

SETUP_MUTEX =
Mutex.new

Class Method Summary collapse

Class Method Details

.detect_frameworkObject

Detects the web framework in use for the telemetry.sdk.name attribute. This value appears as the ‘framework` column in the services table.



126
127
128
129
130
131
# File 'lib/tracelit/instrumentation.rb', line 126

def self.detect_framework
  return "rails"   if defined?(::Rails)
  return "sinatra" if defined?(::Sinatra)
  return "rack"    if defined?(::Rack)
  "ruby"
end

.error_always_on_sampler(rate) ⇒ Object

Returns an ErrorAlwaysOnSampler wrapped in ParentBased so child spans honour the parent’s sampling decision. ErrorAlwaysOnSampler upgrades DROP → RECORD_ONLY so that ErrorSpanProcessor.on_finish fires for all spans, allowing error spans to be exported even outside the sampling ratio.



137
138
139
140
141
# File 'lib/tracelit/instrumentation.rb', line 137

def self.error_always_on_sampler(rate)
  OpenTelemetry::SDK::Trace::Samplers.parent_based(
    root: Tracelit::ErrorAlwaysOnSampler.new(rate)
  )
end

.install_fork_hook(config) ⇒ Object

Fix 5: Register a Process._fork hook (Ruby 3.1+) so that background polling threads are restarted inside each forked Puma/Unicorn worker. In the parent (pid != 0) nothing changes. In the child (pid == 0) we restart the metric pollers so each worker reports its own stats.



177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
# File 'lib/tracelit/instrumentation.rb', line 177

def self.install_fork_hook(config)
  return unless Process.respond_to?(:_fork)

  hook_module = Module.new do
    define_method(:_fork) do
      pid = super()
      if pid == 0
        # We are in the child — restart pollers for this worker
        Tracelit::Metrics.restart_pollers(config)
      end
      pid
    end
  end

  Process.singleton_class.prepend(hook_module)
rescue StandardError => e
  OpenTelemetry.logger.warn("[Tracelit] could not install fork hook: #{e.message}")
end

.reset!Object



110
111
112
113
114
115
# File 'lib/tracelit/instrumentation.rb', line 110

def self.reset!
  SETUP_MUTEX.synchronize do
    @configured = false
    @config     = nil
  end
end

.setup(config) ⇒ Object

Sets up the OpenTelemetry SDK with the Tracelit OTLP exporter. Called once at application boot. Idempotent — safe to call multiple times. Never raises — a misconfigured SDK must not crash the host application.



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/tracelit/instrumentation.rb', line 20

def self.setup(config)
  SETUP_MUTEX.synchronize do
    return if @configured
    return unless config.enabled

    # Fix 1: Install a clean single-line error handler before any OTel SDK
    # calls so that internal OTel errors never dump raw stack traces into
    # the application's logs.
    OpenTelemetry.error_handler = lambda do |exception:, message:|
      msg = [message, exception&.message].compact.join("")
      OpenTelemetry.logger.warn("[Tracelit] #{msg}")
    end

    # Fix 2/3: Soft validation — warn and bail out rather than raise.
    # An observability SDK must never crash the host application.
    errors = config.valid?
    if errors.any?
      OpenTelemetry.logger.warn("[Tracelit] disabled — #{errors.join(', ')}")
      return
    end

    OpenTelemetry::SDK.configure do |otel|
      # Resource attributes identify this service in Tracelit.
      # These populate the `resource` Map column on every telemetry row.
      base_attrs = {
        OpenTelemetry::SemanticConventions::Resource::SERVICE_NAME    => config.resolved_service_name,
        OpenTelemetry::SemanticConventions::Resource::DEPLOYMENT_ENVIRONMENT => config.environment,
        "telemetry.sdk.language" => "ruby",
        "telemetry.sdk.name"     => detect_framework,
        "telemetry.sdk.version"  => Tracelit::VERSION,
      }
      sha = config.resolved_commit_sha
      base_attrs["service.commit_sha"] = sha if sha

      otel.resource = OpenTelemetry::SDK::Resources::Resource.create(
        base_attrs.merge(config.resource_attributes)
      )

      # Build the OTLP exporter once — shared by both processors
      exporter = OpenTelemetry::Exporter::OTLP::Exporter.new(
        endpoint: "#{config.endpoint}/v1/traces",
        headers: {
          "Authorization"  => "Bearer #{config.api_key}",
          "X-Service-Name" => config.resolved_service_name,
          "X-Environment"  => config.environment,
        }
      )

      # Primary processor: batches and exports sampled spans
      otel.add_span_processor(
        OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor.new(exporter)
      )

      # Error processor: always exports error spans regardless of
      # sampling decision — fires on_finish after status is known
      otel.add_span_processor(
        Tracelit::ErrorSpanProcessor.new(exporter)
      )

      # Auto-instrumentation: instruments Rails, Rack, ActiveRecord,
      # Action View, Net::HTTP, Faraday, Redis, Sidekiq, and more.
      # use_all() enables every installed instrumentation gem.
      otel.use_all
    end

    # Set sampler after configure — Configurator does not expose
    # sampler= in OTel SDK 1.x, must be set on the provider directly.
    # Skip at 1.0: the default AlwaysOn sampler is correct and we do not touch it.
    if config.sample_rate < 1.0
      OpenTelemetry.tracer_provider.sampler = error_always_on_sampler(config.sample_rate)
    end

    @configured = true
    @config     = config

    setup_logs(config)
    Tracelit::Metrics.setup(config)

    # Fix 5: Fork safety for Puma cluster mode and Unicorn.
    # Background threads (pollers) are killed in forked worker processes.
    # Process._fork (Ruby 3.1+) fires in the child after every fork so we
    # can restart pollers in each worker without touching the master.
    install_fork_hook(config)

    # Fix 9: Flush and shut down both providers gracefully on process exit
    # so the last metrics/traces batch is not lost during deploys.
    at_exit { shutdown }
  end
end

.setup_logs(config) ⇒ Object

Sets up the OTel Logs SDK: creates a LoggerProvider, attaches a BatchLogRecordProcessor with an OTLP/HTTP exporter, registers it globally, and installs the Rails.logger bridge.



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/tracelit/instrumentation.rb', line 146

def self.setup_logs(config)
  logs_exporter = OpenTelemetry::Exporter::OTLP::Logs::LogsExporter.new(
    endpoint: "#{config.endpoint}/v1/logs",
    headers: {
      "Authorization"  => "Bearer #{config.api_key}",
      "X-Service-Name" => config.resolved_service_name,
      "X-Environment"  => config.environment,
    }
  )

  logger_provider = OpenTelemetry::SDK::Logs::LoggerProvider.new(
    resource: OpenTelemetry.tracer_provider.resource
  )

  logger_provider.add_log_record_processor(
    OpenTelemetry::SDK::Logs::Export::BatchLogRecordProcessor.new(logs_exporter)
  )

  OpenTelemetry.logger_provider = logger_provider

  # Install the Rails.logger → OTel bridge after the provider is ready.
  # Called here (after Rails boot) so Rails.logger is already initialised.
  RailsLoggerBridge.install(logger_provider)
rescue StandardError => e
  OpenTelemetry.logger.warn("[Tracelit] failed to set up logs: #{e.message}")
end

.shutdownObject



117
118
119
120
# File 'lib/tracelit/instrumentation.rb', line 117

def self.shutdown
  OpenTelemetry.tracer_provider.shutdown rescue nil
  OpenTelemetry.meter_provider.shutdown  rescue nil
end