@@ -7,7 +7,11 @@ CREATE TABLE packages_universe (
77 ecosystem text NOT NULL ,
88 namespace text ,
99 name text NOT NULL ,
10- downloads_30d bigint ,
10+ -- Cached latest 30-day window count. Written by the same weekly ranking worker that upserts rows into
11+ -- the downloads_last_30d table (keyed by purl/end_date). This column is the denormalized latest value
12+ -- used directly by rank_packages_universe() to avoid a join; the downloads_last_30d table holds the
13+ -- full rolling-window timeline.
14+ downloads_last_30d bigint ,
1115 dependent_packages_count int ,
1216 dependent_repos_count int ,
1317 criticality_score numeric (10 , 4 ),
@@ -54,7 +58,6 @@ CREATE TABLE packages (
5458 latest_release_at timestamptz ,
5559 dependent_packages_count int ,
5660 dependent_repos_count int ,
57- downloads_last_month bigint ,
5861 -- has_critical_vulnerability bool NOT NULL DEFAULT FALSE,
5962 -- Deferred: semantics undecided between (a) any advisory with no fixed_version vs
6063 -- (b) latest_version falls inside an affected semver range. Lateral join against
@@ -79,10 +82,6 @@ CREATE INDEX ON packages (ecosystem, name);
7982
8083CREATE INDEX ON packages USING gin (keywords);
8184
82- CREATE INDEX ON packages (downloads_last_month DESC )
83- WHERE
84- status = ' active' ;
85-
8685-- INDEX on has_critical_vulnerability removed — column is commented out above.
8786-- Uncomment both when semantics are decided.
8887
@@ -126,7 +125,6 @@ CREATE TABLE versions (
126125 is_yanked bool,
127126 is_prerelease bool NOT NULL DEFAULT FALSE,
128127 license text , -- SPDX where available; can differ per version
129- download_count bigint , -- per-version where available (npm, crates)
130128 last_synced_at timestamptz NOT NULL DEFAULT NOW(),
131129 PRIMARY KEY (id, package_id),
132130 UNIQUE (package_id, number )
@@ -646,9 +644,27 @@ CREATE TABLE package_maintainers (
646644);
647645
648646-- ============================================================
649- -- DOWNLOADS (time-series, partitioned by month via pg_partman)
647+ -- DOWNLOADS
648+ --
649+ -- Two tables track download volume at different tiers and granularities:
650+ --
651+ -- downloads_daily (tier 2 — packages)
652+ -- Source of truth for daily download counts. One row per package per day.
653+ -- No denormalized rollup on the packages table — consumers SUM over this
654+ -- table when they need a window (e.g. last 30 days).
655+ --
656+ -- downloads_last_30d (tier 3 — packages_universe)
657+ -- Rolling 30-day download timeline keyed by purl. Each row represents one
658+ -- 30-day window (start_date..end_date). Keyed by purl so rows survive the
659+ -- weekly truncation of packages_universe. The latest window's count is also
660+ -- cached in packages_universe.downloads_last_30d for fast access by the
661+ -- criticality-ranking function (no join needed).
662+ --
663+ -- ============================================================
664+ -- DOWNLOADS DAILY (tier 2 — packages, daily granularity)
650665--
651- -- pg_partman MUST be enabled in OCI config before this migration runs:
666+ -- Partitioned by month via pg_partman. pg_partman MUST be enabled in OCI
667+ -- config before this migration runs:
652668-- OCI Console → Database → Configuration → Extensions → enable pg_partman
653669--
654670-- After enabling, run the setup below (once, outside Flyway or in a
@@ -676,14 +692,65 @@ CREATE TABLE package_maintainers (
676692-- ============================================================
677693CREATE TABLE downloads_daily (
678694 id bigserial ,
679- package_id bigint NOT NULL ,
695+ package_id bigint NOT NULL REFERENCES packages (id) ,
680696 date date NOT NULL ,
681697 count bigint NOT NULL ,
682698 PRIMARY KEY (id, date ),
683699 UNIQUE (package_id, date )
684700)
685701PARTITION BY RANGE (date );
686702
703+ -- ============================================================
704+ -- DOWNLOADS LAST 30D (tier 3 — packages_universe, rolling 30-day granularity)
705+ --
706+ -- Historical timeline of rolling 30-day download counts, keyed by purl.
707+ -- Each row captures one window: downloads from start_date to end_date (inclusive).
708+ -- Keyed by purl (not packages_universe.id) so rows survive the weekly
709+ -- truncation of packages_universe. The latest window is also written
710+ -- to packages_universe.downloads_last_30d column for fast access by the ranking function.
711+ --
712+ -- Writers should upsert: INSERT ... ON CONFLICT (purl, end_date) DO UPDATE SET count = EXCLUDED.count, start_date = EXCLUDED.start_date
713+ -- PK includes end_date because Postgres requires the partition key to be
714+ -- part of the primary key on range-partitioned tables.
715+ --
716+ -- Partitioned by month via pg_partman. pg_partman MUST be enabled in OCI
717+ -- config before this migration runs:
718+ -- OCI Console → Database → Configuration → Extensions → enable pg_partman
719+ --
720+ -- After enabling, run the setup below (once, outside Flyway or in a
721+ -- separate migration) to register pg_partman and create initial partitions:
722+ --
723+ -- CREATE EXTENSION IF NOT EXISTS pg_partman SCHEMA partman;
724+ --
725+ -- SELECT partman.create_parent(
726+ -- p_parent_table => 'public.downloads_last_30d',
727+ -- p_control => 'end_date',
728+ -- p_interval => '1 month',
729+ -- p_premake => 3 -- pre-creates 3 future monthly partitions
730+ -- );
731+ --
732+ -- -- pg_cron job to maintain partitions (also needs pg_cron enabled in OCI):
733+ -- SELECT cron.schedule('partman-maintain-30d', '0 2 * * *',
734+ -- $$CALL partman.run_maintenance_proc()$$);
735+ --
736+ -- Without this setup, inserts into downloads_last_30d will fail with
737+ -- "no partition found for row". The table structure below is correct;
738+ -- only the partition management setup is deferred.
739+ --
740+ -- ============================================================
741+ CREATE TABLE downloads_last_30d (
742+ id bigserial ,
743+ purl text NOT NULL ,
744+ start_date date NOT NULL ,
745+ end_date date NOT NULL ,
746+ count bigint NOT NULL ,
747+ PRIMARY KEY (id, end_date),
748+ UNIQUE (purl, end_date)
749+ )
750+ PARTITION BY RANGE (end_date);
751+
752+ CREATE INDEX ON downloads_last_30d (purl, end_date DESC );
753+
687754-- ============================================================
688755-- CRITICALITY RANKING FUNCTION
689756-- ============================================================
@@ -707,7 +774,7 @@ BEGIN
707774 -- and to compress the gap between small and large values (e.g. LN(1)=0
708775 -- vs LN(2)≈0.69 gives a gentler floor than LN(0)=-∞). Typically 1.0.
709776 --
710- -- Until the npm-registry / Maven downloads enricher runs, downloads_30d
777+ -- Until the npm-registry / Maven downloads enricher runs, downloads_last_30d
711778 -- is NULL on every row. weight_downloads contributes 0 to the score;
712779 -- ranking effectively reduces to:
713780 -- LN(1 + dependent_repos_count) * weight_dependent_repos
@@ -717,7 +784,7 @@ BEGIN
717784 WITH new_scores AS (
718785 SELECT
719786 id,
720- ( LN(log_smoothing + COALESCE(downloads_30d , 0 )) * weight_downloads
787+ ( LN(log_smoothing + COALESCE(downloads_last_30d , 0 )) * weight_downloads
721788 + LN(log_smoothing + COALESCE(dependent_repos_count, 0 )) * weight_dependent_repos
722789 + LN(log_smoothing + COALESCE(dependent_packages_count, 0 )) * weight_dependent_packages
723790 )::numeric (10 , 4 ) AS new_score
0 commit comments