Module: StillActive::Diff

Extended by:
Diff
Included in:
Diff
Defined in:
lib/still_active/diff.rb

Overview

Compares two still_active JSON snapshots and produces a structured Diff. Designed for PR review: surfaces regressions (CI-failable deltas) on top of the full added/removed/bumped breakdown.

Schema versions accepted: see SUPPORTED_SCHEMA_VERSIONS. A snapshot with a higher schema_version is rejected loudly rather than silently parsed.

Defined Under Namespace

Classes: Added, Bumped, Regression, Removed, Result, SignalChange, UnsupportedSchemaError

Constant Summary collapse

SUPPORTED_SCHEMA_VERSIONS =
[1].freeze
SCORECARD_DROP_THRESHOLD =

absolute drop to flag

1.0
SCORECARD_GOOD_THRESHOLD =

categorical threshold (good -> below-good)

7.0
NEW_GEM_LIBYEAR_THRESHOLD =

added gems already this far behind regress

0.5
LIBYEAR_DELTA_THRESHOLD =

floating-point fuzz

0.01
NUMERIC_GEM_FIELDS =

The diff dereferences gem fields with type-sensitive operations: a non-Hash value crashes the intersection branch, an arithmetic field that isn’t numeric crashes (libyear/scorecard) or silently fabricates a count (vulnerability_count via .to_i), and a non-array vulnerabilities silently drops advisory ids. The baseline is untrusted user input, so validate the shape the diff requires here and let the rest of the code assume it.

["vulnerability_count", "scorecard_score", "libyear"].freeze

Instance Method Summary collapse

Instance Method Details

#advisory_ids(data) ⇒ Object



276
277
278
# File 'lib/still_active/diff.rb', line 276

def advisory_ids(data)
  Array(data["vulnerabilities"]).flat_map { |v| [v["id"], *Array(v["aliases"])].compact }.uniq
end

#call(baseline:, current:) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/still_active/diff.rb', line 29

def call(baseline:, current:)
  validate_schema!(baseline, "baseline")
  validate_schema!(current, "current")

  b_gems = baseline.fetch("gems", {})
  c_gems = current.fetch("gems", {})

  added = (c_gems.keys - b_gems.keys).sort.map { |n| Added.new(name: n, data: c_gems[n]) }
  removed = (b_gems.keys - c_gems.keys).sort.map { |n| Removed.new(name: n, data: b_gems[n]) }

  bumped = []
  signal_changes = []
  (b_gems.keys & c_gems.keys).sort.each do |name|
    before = b_gems[name]
    after = c_gems[name]
    if before["version_used"] != after["version_used"]
      bumped << Bumped.new(
        name: name,
        before_version: before["version_used"],
        after_version: after["version_used"],
        kind: classify_bump(before, after),
        before: before,
        after: after,
      )
    end
    changes = collect_signal_changes(before, after)
    signal_changes << SignalChange.new(name: name, changes: changes, before: before, after: after) if changes.any?
  end

  ruby = ruby_delta(baseline["ruby"], current["ruby"])
  regressions = collect_regressions(
    added: added,
    bumped: bumped,
    signal_changes: signal_changes,
    ruby_delta: ruby,
  )

  Result.new(
    added: added,
    removed: removed,
    bumped: bumped,
    signal_changes: signal_changes,
    regressions: regressions,
    ruby: ruby,
  )
end

#classify_bump(before, after) ⇒ Object

Categorises a version bump:

  • :introduced_vulns - new advisories appeared on the resolved version

  • :closed_vulns - all advisories cleared

  • :older_relative - libyear-to-latest grew (rare; usually unchanged)

  • :fresher - libyear-to-latest shrank

  • :neutral - no obvious signal change



146
147
148
149
150
151
152
153
154
155
156
# File 'lib/still_active/diff.rb', line 146

def classify_bump(before, after)
  opened = vuln_count(after) - vuln_count(before)
  return :introduced_vulns if opened.positive?
  return :closed_vulns if opened.negative?

  delta = (after["libyear"] || 0.0) - (before["libyear"] || 0.0)
  return :older_relative if delta > LIBYEAR_DELTA_THRESHOLD
  return :fresher if delta < -LIBYEAR_DELTA_THRESHOLD

  :neutral
end

#collect_regressions(added:, bumped:, signal_changes:, ruby_delta:) ⇒ Object



203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/still_active/diff.rb', line 203

def collect_regressions(added:, bumped:, signal_changes:, ruby_delta:)
  regs = []

  added.each do |a|
    data = a.data
    if vuln_count(data).positive?
      regs << Regression.new(kind: :new_gem_with_vulns, gem: a.name, detail: "#{vuln_count(data)} vulns at introduction")
    elsif data["archived"]
      regs << Regression.new(kind: :new_gem_archived, gem: a.name, detail: "added gem points at archived repo")
    elsif data["libyear"] && data["libyear"] > NEW_GEM_LIBYEAR_THRESHOLD
      regs << Regression.new(kind: :new_gem_stale, gem: a.name, detail: "added gem already #{data["libyear"]} libyears behind latest")
    end
  end

  bumped.each do |b|
    if b.kind == :introduced_vulns
      regs << Regression.new(
        kind: :bump_introduced_vulns,
        gem: b.name,
        detail: "#{b.before_version} -> #{b.after_version}",
      )
    end
  end

  signal_changes.each do |sc|
    sc.changes.each do |ch|
      case ch[:kind]
      when :archived
        regs << Regression.new(kind: :archived, gem: sc.name, detail: "repo archived since baseline")
      when :new_vulnerability
        ids = Array(ch[:ids]).join(", ")
        regs << Regression.new(kind: :new_vulnerability, gem: sc.name, detail: "#{ch[:from]} -> #{ch[:to]}#{" (#{ids})" unless ids.empty?}")
      when :scorecard_dropped
        note = ch[:crossed_good] ? " crossed #{SCORECARD_GOOD_THRESHOLD}" : ""
        regs << Regression.new(kind: :scorecard_dropped, gem: sc.name, detail: "#{ch[:from]} -> #{ch[:to]}#{note}")
      when :version_yanked
        regs << Regression.new(kind: :version_yanked, gem: sc.name, detail: "pinned version yanked from rubygems")
      when :libyear_worsened
        regs << Regression.new(
          kind: :libyear_worsened,
          gem: sc.name,
          detail: "libyear #{ch[:from]} -> #{ch[:to]} (+#{ch[:delta]}y; same pinned version)",
        )
      end
    end
  end

  if ruby_delta && ruby_delta[:newly_eol]
    regs << Regression.new(kind: :ruby_eol_introduced, gem: "(ruby)", detail: "Ruby #{ruby_delta[:to]} is now EOL")
  end

  regs
end

#collect_signal_changes(before, after) ⇒ Object



158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# File 'lib/still_active/diff.rb', line 158

def collect_signal_changes(before, after)
  changes = []

  if !before["archived"] && after["archived"]
    changes << { kind: :archived, from: false, to: true }
  end

  opened = vuln_count(after) - vuln_count(before)
  if opened.positive? && before["version_used"] == after["version_used"]
    # Set difference is enough for the common case. Edge case: an advisory
    # backfilled with a CVE alias alongside an existing GHSA can show up as
    # "new" in this list even though it's a re-keying of the same issue.
    # The vulnerability_count gate above keeps that to detail-string noise.
    new_ids = advisory_ids(after) - advisory_ids(before)
    changes << { kind: :new_vulnerability, from: vuln_count(before), to: vuln_count(after), ids: new_ids.first(3) }
  end

  if before["scorecard_score"] && after["scorecard_score"]
    drop = before["scorecard_score"] - after["scorecard_score"]
    # OSSF treats >= 7.0 as "good". A score landing at 7.0 stays good (a 7.5
    # -> 7.0 dip is noise within the safe zone). Only drops below 7.0 cross.
    crossed = before["scorecard_score"] >= SCORECARD_GOOD_THRESHOLD && after["scorecard_score"] < SCORECARD_GOOD_THRESHOLD
    if drop >= SCORECARD_DROP_THRESHOLD || crossed
      changes << { kind: :scorecard_dropped, from: before["scorecard_score"], to: after["scorecard_score"], crossed_good: crossed }
    end
  end

  if before["libyear"] && after["libyear"] && before["version_used"] == after["version_used"]
    # Same pinned version + libyear grew = upstream released and we didn't
    # follow. That IS a regression. If version_used moved forward we deliberately
    # don't flag — moving forward isn't a PR regression even when libyear-to-latest
    # technically grows (because upstream is releasing faster).
    delta = after["libyear"] - before["libyear"]
    if delta > LIBYEAR_DELTA_THRESHOLD
      changes << { kind: :libyear_worsened, from: before["libyear"], to: after["libyear"], delta: delta.round(2) }
    end
  end

  if !before["version_yanked"] && after["version_yanked"]
    changes << { kind: :version_yanked }
  end

  changes
end

#ruby_delta(before, after) ⇒ Object



257
258
259
260
261
262
263
264
265
266
267
268
269
270
# File 'lib/still_active/diff.rb', line 257

def ruby_delta(before, after)
  return if before.nil? && after.nil?

  before ||= {}
  after ||= {}
  {
    version_changed: before["version"] != after["version"],
    from: before["version"],
    to: after["version"],
    newly_eol: !before["eol"] && !!after["eol"],
    libyear_before: before["libyear"],
    libyear_after: after["libyear"],
  }
end

#validate_gem!(role, name, data) ⇒ Object



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/still_active/diff.rb', line 112

def validate_gem!(role, name, data)
  unless data.is_a?(Hash)
    raise UnsupportedSchemaError, "#{role} gem #{name.inspect} is malformed (expected an object, got #{data.class})"
  end

  NUMERIC_GEM_FIELDS.each do |field|
    value = data[field]
    next if value.nil? || value.is_a?(Numeric)

    raise UnsupportedSchemaError, "#{role} gem #{name.inspect} has a non-numeric #{field} (got #{value.class})"
  end

  vulns = data["vulnerabilities"]
  return if vulns.nil?

  unless vulns.is_a?(Array)
    raise UnsupportedSchemaError, "#{role} gem #{name.inspect} has a malformed vulnerabilities list (expected an array, got #{vulns.class})"
  end

  # advisory_ids dereferences each entry as a hash (entry["id"]); a scalar
  # element would crash there, so reject non-object entries up front.
  vulns.each do |entry|
    next if entry.is_a?(Hash)

    raise UnsupportedSchemaError, "#{role} gem #{name.inspect} has a malformed vulnerability entry (expected an object, got #{entry.class})"
  end
end

#validate_schema!(snapshot, role) ⇒ Object



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/still_active/diff.rb', line 76

def validate_schema!(snapshot, role)
  # A user can point --baseline at any JSON file. Reject a wrong shape here
  # as an UnsupportedSchemaError (which emit_diff turns into a clean exit 2)
  # rather than letting snapshot["..."] / gems.keys raise a raw stack trace.
  unless snapshot.is_a?(Hash)
    raise UnsupportedSchemaError, "#{role} is not a still_active JSON object (got #{snapshot.class})"
  end

  version = snapshot["schema_version"]
  unless SUPPORTED_SCHEMA_VERSIONS.include?(version)
    raise UnsupportedSchemaError, "#{role} has schema_version=#{version.inspect}; supported: #{SUPPORTED_SCHEMA_VERSIONS.join(", ")}"
  end

  ruby = snapshot["ruby"]
  unless ruby.nil? || ruby.is_a?(Hash)
    raise UnsupportedSchemaError, "#{role} has a malformed ruby section (expected an object, got #{ruby.class})"
  end

  gems = snapshot["gems"]
  return if gems.nil?

  unless gems.is_a?(Hash)
    raise UnsupportedSchemaError, "#{role} has a malformed gems section (expected an object, got #{gems.class})"
  end

  gems.each { |name, data| validate_gem!(role, name, data) }
end

#vuln_count(data) ⇒ Object



272
273
274
# File 'lib/still_active/diff.rb', line 272

def vuln_count(data)
  data["vulnerability_count"].to_i
end