@@ -58,10 +58,12 @@ CREATE TABLE packages (
5858 latest_release_at timestamptz ,
5959 dependent_packages_count int ,
6060 dependent_repos_count int ,
61- -- has_critical_vulnerability bool NOT NULL DEFAULT FALSE,
62- -- Deferred: semantics undecided between (a) any advisory with no fixed_version vs
63- -- (b) latest_version falls inside an affected semver range. Lateral join against
64- -- advisory_packages used in queries until this is resolved.
61+ -- has_critical_vulnerability: TRUE iff latest_version is inside an active
62+ -- affected range of a critical advisory (CVSS >= 7.0) OR a MAL-* malicious-
63+ -- package advisory matches the package. Maintained by the deriveCriticalFlag
64+ -- activity in packages_worker/src/osv/. See ADR-0001 §`has_critical_vulnerability`
65+ -- semantics for the option-b + MAL- override rationale.
66+ has_critical_vulnerability bool NOT NULL DEFAULT FALSE,
6567 criticality_score numeric (10 , 4 ),
6668 -- is_critical and last_rank_pass_at are not in the original pckgs.md spec; added so
6769 -- the packages table can answer "is this package critical?" without joining packages_universe,
@@ -82,8 +84,12 @@ CREATE INDEX ON packages (ecosystem, name);
8284
8385CREATE INDEX ON packages USING gin (keywords);
8486
85- -- INDEX on has_critical_vulnerability removed — column is commented out above.
86- -- Uncomment both when semantics are decided.
87+ -- Partial index on has_critical_vulnerability TRUE rows only — that's the bucket
88+ -- the security overlay query needs ("list all packages with a known critical
89+ -- vuln"). The FALSE rows dominate the table and don't need an index.
90+ CREATE INDEX ON packages (has_critical_vulnerability)
91+ WHERE
92+ has_critical_vulnerability;
8793
8894CREATE INDEX ON packages (criticality_score DESC )
8995WHERE
@@ -569,6 +575,15 @@ CREATE TABLE advisories (
569575 aliases text [], -- CVE-XXXX, GHSA-...
570576 severity text , -- 'LOW' | 'MEDIUM' | 'HIGH' | 'CRITICAL'
571577 cvss numeric (3 , 1 ),
578+ -- Provenance of the cvss value above. Lets downstream consumers distinguish
579+ -- a real vendor-supplied vector from a synthesized qualitative fallback.
580+ -- See ADR-0001 §CVSS scoring strategy. Allowed values:
581+ -- 'osv_cvss_v3' numeric score from a CVSS_V3 vector
582+ -- 'osv_cvss_v4' reserved; v4 numeric scoring deferred
583+ -- 'osv_qualitative_fallback' synthesized from database_specific.severity
584+ -- 'osv_malicious_package' MAL-* id with no CVSS vector
585+ -- Extensible to 'ghsa' | 'nvd' as additional sources come online.
586+ cvss_source text ,
572587 -- >= 7.0 intentional: treat HIGH + CRITICAL both as actionable
573588 is_critical bool GENERATED ALWAYS AS (cvss >= 7 .0 ) STORED,
574589 summary text ,
@@ -599,10 +614,27 @@ CREATE INDEX ON advisory_packages (package_id)
599614WHERE
600615 package_id IS NOT NULL ;
601616
602- -- Version ranges affected by an advisory per package.
617+ -- Drives the resolveMissingPackageIds catch-up UPDATE in deriveCriticalFlag:
618+ -- the query filters WHERE package_id IS NULL and joins on (ecosystem,
619+ -- package_name). The non-partial (ecosystem, package_name) index above is
620+ -- usable here too (the planner just adds a Filter on package_id IS NULL), but
621+ -- as the table grows the vast majority of rows have package_id IS NOT NULL,
622+ -- so the non-partial scan ends up filtering out most of what it reads. This
623+ -- partial index only contains the still-unresolved rows, keeping it tiny
624+ -- regardless of total table size and making the daily catch-up O(unresolved)
625+ -- instead of O(total).
626+ CREATE INDEX ON advisory_packages (ecosystem, package_name)
627+ WHERE
628+ package_id IS NULL ;
629+
630+ -- Version ranges affected by an advisory per package. Populated by the OSV
631+ -- ingest worker (packages_worker/src/osv) using introduced_version /
632+ -- fixed_version / last_affected. range_raw / unaffected_raw are reserved
633+ -- for the deps.dev BQ ingest worker (future): that worker writes the raw
634+ -- range strings without parsing into structured boundaries. The OSV upsert
635+ -- path only deletes rows where range_raw / unaffected_raw are both NULL,
636+ -- so deps.dev rows are not clobbered when OSV re-syncs.
603637-- COALESCE prevents silent duplicates when introduced_version is NULL.
604- -- BQ-sourced rows populate range_raw / unaffected_raw only; introduced/fixed/last_affected
605- -- are populated by a future range-parsing workstream.
606638CREATE TABLE advisory_affected_ranges (
607639 id bigserial PRIMARY KEY ,
608640 advisory_package_id bigint NOT NULL REFERENCES advisory_packages (id),
@@ -613,7 +645,18 @@ CREATE TABLE advisory_affected_ranges (
613645 unaffected_raw text -- raw UnaffectedVersions string from deps.dev BQ
614646);
615647
616- CREATE UNIQUE INDEX ON advisory_affected_ranges (advisory_package_id, COALESCE(introduced_version, ' ' ));
648+ -- Full-tuple uniqueness so two ranges sharing introduced_version but differing
649+ -- in fixed_version or last_affected (cross-distro patches, partial fixes in a
650+ -- single advisory) both survive insertion. The narrower (advisory_package_id,
651+ -- introduced_version) form silently collapsed those cases to one row, dropping
652+ -- the wider range and under-reporting vulnerable windows in the derive step.
653+ -- See ADR-0001 §`advisory_affected_ranges` uniqueness scope.
654+ CREATE UNIQUE INDEX ON advisory_affected_ranges (
655+ advisory_package_id,
656+ COALESCE(introduced_version, ' ' ),
657+ COALESCE(fixed_version, ' ' ),
658+ COALESCE(last_affected, ' ' )
659+ );
617660
618661CREATE INDEX ON advisory_affected_ranges (advisory_package_id);
619662
0 commit comments