Module: StillActive::Diff

Extended by:
Diff
Included in:
Diff
Defined in:
lib/still_active/diff.rb

Overview

Compares two still_active JSON snapshots and produces a structured Diff. Designed for PR review: surfaces regressions (CI-failable deltas) on top of the full added/removed/bumped breakdown.

Schema versions accepted: see SUPPORTED_SCHEMA_VERSIONS. A snapshot with a higher schema_version is rejected loudly rather than silently parsed.

Defined Under Namespace

Classes: Added, Bumped, Regression, Removed, Result, SignalChange, UnsupportedSchemaError

Constant Summary collapse

SUPPORTED_SCHEMA_VERSIONS =
[1].freeze
SCORECARD_DROP_THRESHOLD =

absolute drop to flag

1.0
SCORECARD_GOOD_THRESHOLD =

categorical threshold (good -> below-good)

7.0
NEW_GEM_LIBYEAR_THRESHOLD =

added gems already this far behind regress

0.5
LIBYEAR_DELTA_THRESHOLD =

floating-point fuzz

0.01

Instance Method Summary collapse

Instance Method Details

#advisory_ids(data) ⇒ Object



219
220
221
# File 'lib/still_active/diff.rb', line 219

def advisory_ids(data)
  Array(data["vulnerabilities"]).flat_map { |v| [v["id"], *Array(v["aliases"])].compact }.uniq
end

#call(baseline:, current:) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/still_active/diff.rb', line 29

def call(baseline:, current:)
  validate_schema!(baseline, "baseline")
  validate_schema!(current, "current")

  b_gems = baseline.fetch("gems", {})
  c_gems = current.fetch("gems", {})

  added = (c_gems.keys - b_gems.keys).sort.map { |n| Added.new(name: n, data: c_gems[n]) }
  removed = (b_gems.keys - c_gems.keys).sort.map { |n| Removed.new(name: n, data: b_gems[n]) }

  bumped = []
  signal_changes = []
  (b_gems.keys & c_gems.keys).sort.each do |name|
    before = b_gems[name]
    after = c_gems[name]
    if before["version_used"] != after["version_used"]
      bumped << Bumped.new(
        name: name,
        before_version: before["version_used"],
        after_version: after["version_used"],
        kind: classify_bump(before, after),
        before: before,
        after: after,
      )
    end
    changes = collect_signal_changes(before, after)
    signal_changes << SignalChange.new(name: name, changes: changes, before: before, after: after) if changes.any?
  end

  ruby = ruby_delta(baseline["ruby"], current["ruby"])
  regressions = collect_regressions(
    added: added,
    bumped: bumped,
    signal_changes: signal_changes,
    ruby_delta: ruby,
  )

  Result.new(
    added: added,
    removed: removed,
    bumped: bumped,
    signal_changes: signal_changes,
    regressions: regressions,
    ruby: ruby,
  )
end

#classify_bump(before, after) ⇒ Object

Categorises a version bump:

  • :introduced_vulns - new advisories appeared on the resolved version

  • :closed_vulns - all advisories cleared

  • :older_relative - libyear-to-latest grew (rare; usually unchanged)

  • :fresher - libyear-to-latest shrank

  • :neutral - no obvious signal change



89
90
91
92
93
94
95
96
97
98
99
# File 'lib/still_active/diff.rb', line 89

def classify_bump(before, after)
  opened = vuln_count(after) - vuln_count(before)
  return :introduced_vulns if opened.positive?
  return :closed_vulns if opened.negative?

  delta = (after["libyear"] || 0.0) - (before["libyear"] || 0.0)
  return :older_relative if delta > LIBYEAR_DELTA_THRESHOLD
  return :fresher if delta < -LIBYEAR_DELTA_THRESHOLD

  :neutral
end

#collect_regressions(added:, bumped:, signal_changes:, ruby_delta:) ⇒ Object



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# File 'lib/still_active/diff.rb', line 146

def collect_regressions(added:, bumped:, signal_changes:, ruby_delta:)
  regs = []

  added.each do |a|
    data = a.data
    if vuln_count(data).positive?
      regs << Regression.new(kind: :new_gem_with_vulns, gem: a.name, detail: "#{vuln_count(data)} vulns at introduction")
    elsif data["archived"]
      regs << Regression.new(kind: :new_gem_archived, gem: a.name, detail: "added gem points at archived repo")
    elsif data["libyear"] && data["libyear"] > NEW_GEM_LIBYEAR_THRESHOLD
      regs << Regression.new(kind: :new_gem_stale, gem: a.name, detail: "added gem already #{data["libyear"]} libyears behind latest")
    end
  end

  bumped.each do |b|
    if b.kind == :introduced_vulns
      regs << Regression.new(
        kind: :bump_introduced_vulns,
        gem: b.name,
        detail: "#{b.before_version} -> #{b.after_version}",
      )
    end
  end

  signal_changes.each do |sc|
    sc.changes.each do |ch|
      case ch[:kind]
      when :archived
        regs << Regression.new(kind: :archived, gem: sc.name, detail: "repo archived since baseline")
      when :new_vulnerability
        ids = Array(ch[:ids]).join(", ")
        regs << Regression.new(kind: :new_vulnerability, gem: sc.name, detail: "#{ch[:from]} -> #{ch[:to]}#{" (#{ids})" unless ids.empty?}")
      when :scorecard_dropped
        note = ch[:crossed_good] ? " crossed #{SCORECARD_GOOD_THRESHOLD}" : ""
        regs << Regression.new(kind: :scorecard_dropped, gem: sc.name, detail: "#{ch[:from]} -> #{ch[:to]}#{note}")
      when :version_yanked
        regs << Regression.new(kind: :version_yanked, gem: sc.name, detail: "pinned version yanked from rubygems")
      when :libyear_worsened
        regs << Regression.new(
          kind: :libyear_worsened,
          gem: sc.name,
          detail: "libyear #{ch[:from]} -> #{ch[:to]} (+#{ch[:delta]}y; same pinned version)",
        )
      end
    end
  end

  if ruby_delta && ruby_delta[:newly_eol]
    regs << Regression.new(kind: :ruby_eol_introduced, gem: "(ruby)", detail: "Ruby #{ruby_delta[:to]} is now EOL")
  end

  regs
end

#collect_signal_changes(before, after) ⇒ Object



101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/still_active/diff.rb', line 101

def collect_signal_changes(before, after)
  changes = []

  if !before["archived"] && after["archived"]
    changes << { kind: :archived, from: false, to: true }
  end

  opened = vuln_count(after) - vuln_count(before)
  if opened.positive? && before["version_used"] == after["version_used"]
    # Set difference is enough for the common case. Edge case: an advisory
    # backfilled with a CVE alias alongside an existing GHSA can show up as
    # "new" in this list even though it's a re-keying of the same issue.
    # The vulnerability_count gate above keeps that to detail-string noise.
    new_ids = advisory_ids(after) - advisory_ids(before)
    changes << { kind: :new_vulnerability, from: vuln_count(before), to: vuln_count(after), ids: new_ids.first(3) }
  end

  if before["scorecard_score"] && after["scorecard_score"]
    drop = before["scorecard_score"] - after["scorecard_score"]
    # OSSF treats >= 7.0 as "good". A score landing at 7.0 stays good (a 7.5
    # -> 7.0 dip is noise within the safe zone). Only drops below 7.0 cross.
    crossed = before["scorecard_score"] >= SCORECARD_GOOD_THRESHOLD && after["scorecard_score"] < SCORECARD_GOOD_THRESHOLD
    if drop >= SCORECARD_DROP_THRESHOLD || crossed
      changes << { kind: :scorecard_dropped, from: before["scorecard_score"], to: after["scorecard_score"], crossed_good: crossed }
    end
  end

  if before["libyear"] && after["libyear"] && before["version_used"] == after["version_used"]
    # Same pinned version + libyear grew = upstream released and we didn't
    # follow. That IS a regression. If version_used moved forward we deliberately
    # don't flag — moving forward isn't a PR regression even when libyear-to-latest
    # technically grows (because upstream is releasing faster).
    delta = after["libyear"] - before["libyear"]
    if delta > LIBYEAR_DELTA_THRESHOLD
      changes << { kind: :libyear_worsened, from: before["libyear"], to: after["libyear"], delta: delta.round(2) }
    end
  end

  if !before["version_yanked"] && after["version_yanked"]
    changes << { kind: :version_yanked }
  end

  changes
end

#ruby_delta(before, after) ⇒ Object



200
201
202
203
204
205
206
207
208
209
210
211
212
213
# File 'lib/still_active/diff.rb', line 200

def ruby_delta(before, after)
  return if before.nil? && after.nil?

  before ||= {}
  after ||= {}
  {
    version_changed: before["version"] != after["version"],
    from: before["version"],
    to: after["version"],
    newly_eol: !before["eol"] && !!after["eol"],
    libyear_before: before["libyear"],
    libyear_after: after["libyear"],
  }
end

#validate_schema!(snapshot, role) ⇒ Object



76
77
78
79
80
81
# File 'lib/still_active/diff.rb', line 76

def validate_schema!(snapshot, role)
  version = snapshot["schema_version"]
  return if SUPPORTED_SCHEMA_VERSIONS.include?(version)

  raise UnsupportedSchemaError, "#{role} has schema_version=#{version.inspect}; supported: #{SUPPORTED_SCHEMA_VERSIONS.join(", ")}"
end

#vuln_count(data) ⇒ Object



215
216
217
# File 'lib/still_active/diff.rb', line 215

def vuln_count(data)
  data["vulnerability_count"].to_i
end