|
| 1 | +-- Renames criticality_score → impact on both packages_universe and packages, |
| 2 | +-- and installs rank_packages_universe() with the updated formula. |
| 3 | +-- |
| 4 | +-- Formula (ADR-0001 §Criticality scoring methodology): |
| 5 | +-- impact = w_downloads * pct_rank( LOG(1 + downloads_last_30d) ) within ecosystem |
| 6 | +-- + w_dep_pkgs * pct_rank( LOG(1 + dependent_count) ) within ecosystem |
| 7 | +-- + w_transitive * pct_rank( LOG(1 + transitive_dependent_count) ) within ecosystem |
| 8 | +-- |
| 9 | +-- Default weights: 0.25 / 0.25 / 0.50 (sum to 1.0). |
| 10 | +-- All weights and the top-N budget are call-time parameters — tunable without |
| 11 | +-- schema or code changes. |
| 12 | +-- |
| 13 | +-- Steps inside the function: |
| 14 | +-- 1. Score — compute impact via weighted PERCENT_RANK() |
| 15 | +-- 2. Rank — ROW_NUMBER() per ecosystem, flag top-N as is_critical |
| 16 | +-- 2.5 Spotlight — force is_critical = TRUE for rows in package_criticality_spotlight |
| 17 | +-- 3. Propagate — copy impact + is_critical onto the packages table |
| 18 | + |
| 19 | +ALTER TABLE packages_universe |
| 20 | + RENAME COLUMN criticality_score TO impact; |
| 21 | + |
| 22 | +ALTER TABLE packages |
| 23 | + RENAME COLUMN criticality_score TO impact; |
| 24 | + |
| 25 | +CREATE OR REPLACE FUNCTION rank_packages_universe( |
| 26 | + weight_downloads numeric DEFAULT 0.25, |
| 27 | + weight_dependent_packages numeric DEFAULT 0.25, |
| 28 | + weight_transitive numeric DEFAULT 0.50, |
| 29 | + critical_top_n_by_ecosystem jsonb DEFAULT '{ |
| 30 | + "npm": 210000, |
| 31 | + "pypi": 140000, |
| 32 | + "maven": 120000, |
| 33 | + "nuget": 70000, |
| 34 | + "packagist": 56000, |
| 35 | + "go": 42000, |
| 36 | + "cargo": 28000, |
| 37 | + "rubygems": 21000, |
| 38 | + "docker": 13000 |
| 39 | + }'::jsonb |
| 40 | +) |
| 41 | +RETURNS TABLE(scored_rows int, ranked_rows int, propagated_rows int) |
| 42 | +LANGUAGE plpgsql AS $$ |
| 43 | +DECLARE |
| 44 | + n_scored int; |
| 45 | + n_ranked int; |
| 46 | + n_propagated int; |
| 47 | +BEGIN |
| 48 | + -- ── Step 1: score ────────────────────────────────────────────────────────── |
| 49 | + -- last_rank_pass_at updated unconditionally on every pass (schema requirement). |
| 50 | + WITH percentile_scores AS ( |
| 51 | + SELECT |
| 52 | + id, |
| 53 | + ( |
| 54 | + weight_downloads * PERCENT_RANK() OVER ( |
| 55 | + PARTITION BY ecosystem ORDER BY LOG(1 + COALESCE(downloads_last_30d, 0))) |
| 56 | + |
| 57 | + + weight_dependent_packages * PERCENT_RANK() OVER ( |
| 58 | + PARTITION BY ecosystem ORDER BY LOG(1 + COALESCE(dependent_count, 0))) |
| 59 | + |
| 60 | + + weight_transitive * PERCENT_RANK() OVER ( |
| 61 | + PARTITION BY ecosystem ORDER BY LOG(1 + COALESCE(transitive_dependent_count, 0))) |
| 62 | + )::numeric(10, 4) AS new_impact |
| 63 | + FROM packages_universe |
| 64 | + ) |
| 65 | + UPDATE packages_universe pu |
| 66 | + SET impact = ps.new_impact, |
| 67 | + last_rank_pass_at = NOW() |
| 68 | + FROM percentile_scores ps |
| 69 | + WHERE pu.id = ps.id; |
| 70 | + |
| 71 | + GET DIAGNOSTICS n_scored = ROW_COUNT; |
| 72 | + |
| 73 | + -- ── Step 2: rank + flag ──────────────────────────────────────────────────── |
| 74 | + WITH ranked AS ( |
| 75 | + SELECT |
| 76 | + id, ecosystem, |
| 77 | + ROW_NUMBER() OVER ( |
| 78 | + PARTITION BY ecosystem |
| 79 | + ORDER BY impact DESC NULLS LAST, id |
| 80 | + ) AS r |
| 81 | + FROM packages_universe |
| 82 | + WHERE purl IS NOT NULL |
| 83 | + ), |
| 84 | + flagged AS ( |
| 85 | + SELECT |
| 86 | + id, r, |
| 87 | + COALESCE( |
| 88 | + r <= (critical_top_n_by_ecosystem ->> ecosystem)::int, |
| 89 | + FALSE |
| 90 | + ) AS new_is_critical |
| 91 | + FROM ranked |
| 92 | + ) |
| 93 | + UPDATE packages_universe pu |
| 94 | + SET rank_in_ecosystem = f.r, |
| 95 | + is_critical = f.new_is_critical |
| 96 | + FROM flagged f |
| 97 | + WHERE pu.id = f.id |
| 98 | + AND ( |
| 99 | + pu.rank_in_ecosystem IS DISTINCT FROM f.r |
| 100 | + OR pu.is_critical IS DISTINCT FROM f.new_is_critical |
| 101 | + ); |
| 102 | + |
| 103 | + GET DIAGNOSTICS n_ranked = ROW_COUNT; |
| 104 | + |
| 105 | + -- ── Step 2.5: apply spotlight overrides ─────────────────────────────────── |
| 106 | + -- Force is_critical = TRUE for any row in package_criticality_spotlight, |
| 107 | + -- regardless of computed score or rank. Runs after Step 2 so overrides |
| 108 | + -- survive every automated re-rank pass. |
| 109 | + -- IS NOT DISTINCT FROM handles the NULL namespace case (e.g. cargo crates). |
| 110 | + UPDATE packages_universe pu |
| 111 | + SET is_critical = TRUE |
| 112 | + FROM package_criticality_spotlight s |
| 113 | + WHERE pu.ecosystem = s.ecosystem |
| 114 | + AND (pu.namespace IS NOT DISTINCT FROM s.namespace) |
| 115 | + AND pu.name = s.name |
| 116 | + AND pu.is_critical = FALSE; |
| 117 | + |
| 118 | + -- ── Step 3: propagate to packages ───────────────────────────────────────── |
| 119 | + -- last_rank_pass_at updated unconditionally on every pass (schema requirement). |
| 120 | + UPDATE packages p |
| 121 | + SET impact = pu.impact, |
| 122 | + is_critical = pu.is_critical, |
| 123 | + last_rank_pass_at = NOW() |
| 124 | + FROM packages_universe pu |
| 125 | + WHERE p.purl = pu.purl |
| 126 | + AND p.ecosystem = pu.ecosystem; |
| 127 | + |
| 128 | + GET DIAGNOSTICS n_propagated = ROW_COUNT; |
| 129 | + |
| 130 | + RETURN QUERY SELECT n_scored, n_ranked, n_propagated; |
| 131 | +END; |
| 132 | +$$; |
0 commit comments