diff --git a/.github/workflows/vulndb-1.7.3.yaml b/.github/workflows/vulndb-1.7.3.yaml new file mode 100644 index 000000000..587058174 --- /dev/null +++ b/.github/workflows/vulndb-1.7.3.yaml @@ -0,0 +1,89 @@ +name: VulnDB Workflow v2 (DevGuard v1.7.3) + +on: + workflow_dispatch: + schedule: + - cron: "0 */1 * * *" + +permissions: + contents: read + packages: write + +env: + POSTGRES_DB: devguard + POSTGRES_USER: devguard + POSTGRES_HOST: localhost + POSTGRES_PASSWORD: not_reachable_from_the_internet + +jobs: + build: + runs-on: ubuntu-latest + env: + FRONTEND_URL: "doesntmatter" + services: + postgres: + image: ghcr.io/l3montree-dev/devguard/postgresql:v1.3.1 + env: + POSTGRES_DB: ${{env.POSTGRES_DB}} + POSTGRES_USER: ${{env.POSTGRES_USER}} + POSTGRES_PASSWORD: ${{env.POSTGRES_PASSWORD}} + ports: + - 5432:5432 + options: '--health-cmd="pg_isready -U devguard" --health-interval=10s --health-timeout=5s --health-retries=5 --tmpfs /docker-entrypoint-initdb.d --tmpfs /run/postgresql' + steps: + - name: Install postgresql client + run: | + sudo apt-get update + sudo apt-get install -y wget + wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add - + echo "deb http://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main" | sudo tee /etc/apt/sources.list.d/pgdg.list + sudo apt-get update + sudo apt-get install -y postgresql-client-16 + - name: Create semver extension + run: | + PGPASSWORD=${{env.POSTGRES_PASSWORD}} psql -h localhost -U devguard devguard -c "CREATE EXTENSION IF NOT EXISTS semver;" + - name: Checkout code + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 - https://github.com/actions/checkout/releases/tag/v5.0.0 + with: + persist-credentials: false + ref: v1.7.3 # latest stable version for vulndb v2 + - name: Install Golang + uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0 - https://github.com/actions/setup-go/releases/tag/v5.5.0 + with: + go-version: "1.25" + - name: Export the vulnerability database archive + run: | + # writes the database snapshot files and bundles them into a single tar.zst archive + go run ./cmd/devguard-cli/main.go vulndb export --diffToPrevious + + - name: Install Cosign + uses: sigstore/cosign-installer@faadad0cce49287aee09b3a48701e75088a2c6ad # v4.0.0 + with: + cosign-release: "v2.6.1" + + - name: Write signing key to disk + run: echo "${{ secrets.COSIGN_PRIVATE_KEY }}" > cosign.key + + - name: Sign the database archive + env: + COSIGN_PASSWORD: "" + run: | + cosign import-key-pair --key cosign.key + cosign sign-blob --yes --key import-cosign.key vulndb.tar.zst > vulndb.tar.zst.sig + - name: Login to GitHub Container Registry + uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3.7.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Setup oras cli + uses: oras-project/setup-oras@22ce207df3b08e061f537244349aac6ae1d214f6 # v1 + + - name: Push the database archive to GitHub Container Registry + run: | + oras push ghcr.io/l3montree-dev/devguard/vulndb/v2:latest \ + vulndb.tar.zst + - name: Push the archive signature to the GitHub Container Registry + run: | + oras push ghcr.io/l3montree-dev/devguard/vulndb/v2:latest.sig \ + vulndb.tar.zst.sig diff --git a/.github/workflows/vulndb.yaml b/.github/workflows/vulndb.yaml index 54449a6cd..0e0285f3a 100644 --- a/.github/workflows/vulndb.yaml +++ b/.github/workflows/vulndb.yaml @@ -80,9 +80,9 @@ jobs: - name: Push the database archive to GitHub Container Registry run: | - oras push ghcr.io/l3montree-dev/devguard/vulndb/v2:latest \ + oras push ghcr.io/l3montree-dev/devguard/vulndb/v3:latest \ vulndb.tar.zst - name: Push the archive signature to the GitHub Container Registry run: | - oras push ghcr.io/l3montree-dev/devguard/vulndb/v2:latest.sig \ + oras push ghcr.io/l3montree-dev/devguard/vulndb/v3:latest.sig \ vulndb.tar.zst.sig diff --git a/.vscode/launch.json b/.vscode/launch.json index 9be93827c..882376a07 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -115,7 +115,20 @@ "program": "${workspaceRoot}/cmd/devguard-cli/main.go", "args": [ "vulndb", - "import" + "import", + ] + }, + { + "name": "VulnDB Local Import", + "type": "go", + "request": "launch", + "cwd": "${workspaceRoot}", + "mode": "auto", + "program": "${workspaceRoot}/cmd/devguard-cli/main.go", + "args": [ + "vulndb", + "import", + "--localArchive", ] }, { diff --git a/database/migrations/20260622103622_add-euvd-kev-information-to-cves.up.sql b/database/migrations/20260622103622_add-euvd-kev-information-to-cves.up.sql new file mode 100644 index 000000000..ad471412a --- /dev/null +++ b/database/migrations/20260622103622_add-euvd-kev-information-to-cves.up.sql @@ -0,0 +1 @@ +ALTER TABLE public.cves ADD COLUMN euvd_exploit_add date; \ No newline at end of file diff --git a/database/models/cve_model.go b/database/models/cve_model.go index 79ab917e9..a1730bdfa 100644 --- a/database/models/cve_model.go +++ b/database/models/cve_model.go @@ -48,6 +48,7 @@ type CVE struct { Risk dtos.RiskMetrics `json:"risk" gorm:"-" cve:"risk"` Exploits []Exploit `json:"exploits" gorm:"foreignKey:CVEID;references:CVE;"` Relationships []CVERelationship `json:"relationships" gorm:"foreignKey:SourceCVE;references:CVE;constraint:OnDelete:CASCADE;" cve:"relationships"` + EUVDExploitAdd *datatypes.Date `json:"euvdExploitAdd" gorm:"type:date"` } type Weakness struct { diff --git a/database/models/cve_relationship_model.go b/database/models/cve_relationship_model.go index 4927bda85..62b4c7bdc 100644 --- a/database/models/cve_relationship_model.go +++ b/database/models/cve_relationship_model.go @@ -4,8 +4,8 @@ import "github.com/l3montree-dev/devguard/dtos" // CVERelationship stores source/target/type — TargetCVE is a plain string, not a DB FK constraint. type CVERelationship struct { - SourceCVE string `json:"source_cve" gorm:"type:text;primaryKey"` - TargetCVE string `json:"target_cve" gorm:"type:text;primaryKey"` + SourceCVE string `json:"source_cve" gorm:"type:text;primaryKey"` // an external CVE-ID (like DEBIAN-CVE-...) + TargetCVE string `json:"target_cve" gorm:"type:text;primaryKey"` // the official CVE-XXXX-... the external CVE-ID relates to RelationshipType dtos.RelationshipType `json:"relationship_type" gorm:"type:text;primaryKey"` // TargetCVEData is populated by GORM nested preload. It is nil when the target // CVE does not exist in this database — no DB-level FK constraint is added. diff --git a/database/repositories/cve_repository.go b/database/repositories/cve_repository.go index 1073bc5f5..499fa4183 100644 --- a/database/repositories/cve_repository.go +++ b/database/repositories/cve_repository.go @@ -258,40 +258,3 @@ func (g *cveRepository) UpdateEpssBatch(ctx context.Context, tx *gorm.DB, batch // avoid slow sql log return g.GetDB(ctx, tx).Exec(sql, ids, epss, percentiles).Error } - -// this function is used by the CISA KEV mirror function to update the KEV information for all cves -func (g *cveRepository) UpdateCISAKEVBatch(ctx context.Context, tx *gorm.DB, batch []models.CVE) error { - ids := make([]string, len(batch)) - exploitAdds := make([]any, len(batch)) - actionDues := make([]any, len(batch)) - requiredActions := make([]string, len(batch)) - vulnNames := make([]string, len(batch)) - - for i := range batch { - ids[i] = batch[i].CVE - if batch[i].CISAExploitAdd != nil { - exploitAdds[i] = time.Time(*batch[i].CISAExploitAdd).Format("2006-01-02") - } - if batch[i].CISAActionDue != nil { - actionDues[i] = time.Time(*batch[i].CISAActionDue).Format("2006-01-02") - } - requiredActions[i] = *batch[i].CISARequiredAction - vulnNames[i] = *batch[i].CISAVulnerabilityName - } - - sql := `UPDATE cves SET - cisa_exploit_add = new.cisa_exploit_add::date, - cisa_action_due = new.cisa_action_due::date, - cisa_required_action = new.cisa_required_action, - cisa_vulnerability_name = new.cisa_vulnerability_name - FROM (SELECT - unnest($1::text[]) as cve, - unnest($2::text[]) as cisa_exploit_add, - unnest($3::text[]) as cisa_action_due, - unnest($4::text[]) as cisa_required_action, - unnest($5::text[]) as cisa_vulnerability_name - ) as new - WHERE cves.cve = new.cve;` - - return g.GetDB(ctx, tx).Session(&gorm.Session{Logger: logger.Default.LogMode(logger.Silent)}).Exec(sql, ids, exploitAdds, actionDues, requiredActions, vulnNames).Error -} diff --git a/dtos/cve_relationship_dto.go b/dtos/cve_relationship_dto.go index 200a904fc..458dbe139 100644 --- a/dtos/cve_relationship_dto.go +++ b/dtos/cve_relationship_dto.go @@ -6,6 +6,7 @@ const ( RelationshipTypeAlias RelationshipType = "alias" RelationshipTypeUpstream RelationshipType = "upstream" RelationshipTypeRelated RelationshipType = "related" + RelationshipTypeEUVD RelationshipType = "euvd" // flag relationships only imported through euvd explicitly ) // CVERelationshipDTO is the API response shape. diff --git a/dtos/dependency_vuln_dto.go b/dtos/dependency_vuln_dto.go index ad6949680..10da726a9 100644 --- a/dtos/dependency_vuln_dto.go +++ b/dtos/dependency_vuln_dto.go @@ -56,6 +56,7 @@ type CVEDTO struct { Risk RiskMetrics `json:"risk"` Exploits []ExploitDTO `json:"exploits"` Relationships []RelationshipDTO `json:"relationships"` + EUVDExploitAdd *datatypes.Date `json:"euvdExploitAdd" format:"date"` } type VulnState string diff --git a/mocks/mock_APIStatsCollector.go b/mocks/mock_APIStatsCollector.go new file mode 100644 index 000000000..38c17cf38 --- /dev/null +++ b/mocks/mock_APIStatsCollector.go @@ -0,0 +1,90 @@ +// Code generated by mockery; DO NOT EDIT. +// github.com/vektra/mockery +// template: testify + +package mocks + +import ( + "context" + + "github.com/l3montree-dev/devguard/telemetry" + mock "github.com/stretchr/testify/mock" +) + +// NewAPIStatsCollector creates a new instance of APIStatsCollector. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// The first argument is typically a *testing.T value. +func NewAPIStatsCollector(t interface { + mock.TestingT + Cleanup(func()) +}) *APIStatsCollector { + mock := &APIStatsCollector{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} + +// APIStatsCollector is an autogenerated mock type for the APIStatsCollector type +type APIStatsCollector struct { + mock.Mock +} + +type APIStatsCollector_Expecter struct { + mock *mock.Mock +} + +func (_m *APIStatsCollector) EXPECT() *APIStatsCollector_Expecter { + return &APIStatsCollector_Expecter{mock: &_m.Mock} +} + +// CollectAPIStartupStats provides a mock function for the type APIStatsCollector +func (_mock *APIStatsCollector) CollectAPIStartupStats(ctx context.Context) telemetry.APIStats { + ret := _mock.Called(ctx) + + if len(ret) == 0 { + panic("no return value specified for CollectAPIStartupStats") + } + + var r0 telemetry.APIStats + if returnFunc, ok := ret.Get(0).(func(context.Context) telemetry.APIStats); ok { + r0 = returnFunc(ctx) + } else { + r0 = ret.Get(0).(telemetry.APIStats) + } + return r0 +} + +// APIStatsCollector_CollectAPIStartupStats_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'CollectAPIStartupStats' +type APIStatsCollector_CollectAPIStartupStats_Call struct { + *mock.Call +} + +// CollectAPIStartupStats is a helper method to define mock.On call +// - ctx context.Context +func (_e *APIStatsCollector_Expecter) CollectAPIStartupStats(ctx interface{}) *APIStatsCollector_CollectAPIStartupStats_Call { + return &APIStatsCollector_CollectAPIStartupStats_Call{Call: _e.mock.On("CollectAPIStartupStats", ctx)} +} + +func (_c *APIStatsCollector_CollectAPIStartupStats_Call) Run(run func(ctx context.Context)) *APIStatsCollector_CollectAPIStartupStats_Call { + _c.Call.Run(func(args mock.Arguments) { + var arg0 context.Context + if args[0] != nil { + arg0 = args[0].(context.Context) + } + run( + arg0, + ) + }) + return _c +} + +func (_c *APIStatsCollector_CollectAPIStartupStats_Call) Return(aPIStats telemetry.APIStats) *APIStatsCollector_CollectAPIStartupStats_Call { + _c.Call.Return(aPIStats) + return _c +} + +func (_c *APIStatsCollector_CollectAPIStartupStats_Call) RunAndReturn(run func(ctx context.Context) telemetry.APIStats) *APIStatsCollector_CollectAPIStartupStats_Call { + _c.Call.Return(run) + return _c +} diff --git a/mocks/mock_HTTPDoer.go b/mocks/mock_HTTPDoer.go new file mode 100644 index 000000000..2a6b38f00 --- /dev/null +++ b/mocks/mock_HTTPDoer.go @@ -0,0 +1,100 @@ +// Code generated by mockery; DO NOT EDIT. +// github.com/vektra/mockery +// template: testify + +package mocks + +import ( + "net/http" + + mock "github.com/stretchr/testify/mock" +) + +// NewHTTPDoer creates a new instance of HTTPDoer. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// The first argument is typically a *testing.T value. +func NewHTTPDoer(t interface { + mock.TestingT + Cleanup(func()) +}) *HTTPDoer { + mock := &HTTPDoer{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} + +// HTTPDoer is an autogenerated mock type for the HTTPDoer type +type HTTPDoer struct { + mock.Mock +} + +type HTTPDoer_Expecter struct { + mock *mock.Mock +} + +func (_m *HTTPDoer) EXPECT() *HTTPDoer_Expecter { + return &HTTPDoer_Expecter{mock: &_m.Mock} +} + +// Do provides a mock function for the type HTTPDoer +func (_mock *HTTPDoer) Do(req *http.Request) (*http.Response, error) { + ret := _mock.Called(req) + + if len(ret) == 0 { + panic("no return value specified for Do") + } + + var r0 *http.Response + var r1 error + if returnFunc, ok := ret.Get(0).(func(*http.Request) (*http.Response, error)); ok { + return returnFunc(req) + } + if returnFunc, ok := ret.Get(0).(func(*http.Request) *http.Response); ok { + r0 = returnFunc(req) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*http.Response) + } + } + if returnFunc, ok := ret.Get(1).(func(*http.Request) error); ok { + r1 = returnFunc(req) + } else { + r1 = ret.Error(1) + } + return r0, r1 +} + +// HTTPDoer_Do_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Do' +type HTTPDoer_Do_Call struct { + *mock.Call +} + +// Do is a helper method to define mock.On call +// - req *http.Request +func (_e *HTTPDoer_Expecter) Do(req interface{}) *HTTPDoer_Do_Call { + return &HTTPDoer_Do_Call{Call: _e.mock.On("Do", req)} +} + +func (_c *HTTPDoer_Do_Call) Run(run func(req *http.Request)) *HTTPDoer_Do_Call { + _c.Call.Run(func(args mock.Arguments) { + var arg0 *http.Request + if args[0] != nil { + arg0 = args[0].(*http.Request) + } + run( + arg0, + ) + }) + return _c +} + +func (_c *HTTPDoer_Do_Call) Return(response *http.Response, err error) *HTTPDoer_Do_Call { + _c.Call.Return(response, err) + return _c +} + +func (_c *HTTPDoer_Do_Call) RunAndReturn(run func(req *http.Request) (*http.Response, error)) *HTTPDoer_Do_Call { + _c.Call.Return(run) + return _c +} diff --git a/shared/common_interfaces.go b/shared/common_interfaces.go index c6ff223a3..fef65305f 100644 --- a/shared/common_interfaces.go +++ b/shared/common_interfaces.go @@ -205,7 +205,6 @@ type CveRepository interface { CreateCVEWithConflictHandling(ctx context.Context, tx DB, cve *models.CVE) error CreateCVEAffectedComponentsEntries(ctx context.Context, tx DB, cve *models.CVE, components []models.AffectedComponent) error UpdateEpssBatch(ctx context.Context, tx DB, batch []models.CVE) error - UpdateCISAKEVBatch(ctx context.Context, tx DB, batch []models.CVE) error } type EPSService interface { diff --git a/tests/euvd_integration_test.go b/tests/euvd_integration_test.go new file mode 100644 index 000000000..ffe445fac --- /dev/null +++ b/tests/euvd_integration_test.go @@ -0,0 +1,103 @@ +// Copyright (C) 2026 l3montree GmbH +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package tests + +import ( + "context" + "testing" + + "github.com/jackc/pgx/v5/pgxpool" + "github.com/l3montree-dev/devguard/database/models" + "github.com/l3montree-dev/devguard/vulndb" + "github.com/stretchr/testify/assert" +) + +// queryEUVDRelationships returns the euvd-derived rows (target_cve is an EUVD id) as +// "target|source|type" keys. +func queryEUVDRelationships(ctx context.Context, t *testing.T, pool *pgxpool.Pool) []string { + t.Helper() + conn, err := pool.Acquire(ctx) + assert.NoError(t, err) + defer conn.Release() + + rows, err := conn.Query(ctx, `SELECT target_cve, source_cve, relationship_type FROM cve_relationships WHERE target_cve LIKE 'EUVD-%'`) + assert.NoError(t, err) + defer rows.Close() + + var keys []string + for rows.Next() { + var target, source, relType string + assert.NoError(t, rows.Scan(&target, &source, &relType)) + keys = append(keys, target+"|"+source+"|"+relType) + } + assert.NoError(t, rows.Err()) + return keys +} + +// TestResolveEUVDRelationships verifies the resolution: an EUVD id becomes the target_cve and every +// cve related to its original cve becomes a source (keeping that relation's type); an EUVD id whose +// cve has no relationship is kept only when the cve exists in the cves table. +func TestResolveEUVDRelationships(t *testing.T) { + t.Parallel() + ctx := context.Background() + _, pool, terminate := InitDatabaseContainer("../initdb.sql") + defer terminate() + + cves := []models.CVE{ + makeCVE(9301, "CVE-2024-0001", "original cve with an alias", 5.0, testVector), + makeCVE(9302, "CVE-2024-0002", "the alias of 0001", 6.0, testVector), + makeCVE(9303, "CVE-2024-0003", "standalone cve present in cves", 7.0, testVector), + } + // CVE-2024-0002 is an alias of CVE-2024-0001 (source -> target). + rels := []models.CVERelationship{ + {SourceCVE: "CVE-2024-0002", TargetCVE: "CVE-2024-0001", RelationshipType: "alias"}, + } + seedCVEState(ctx, t, pool, cves, rels, nil, nil) + + // raw EUVD csv mapping: source = EUVD id, target = original cve. + rawEUVD := []models.CVERelationship{ + {SourceCVE: "EUVD-2024-1", TargetCVE: "CVE-2024-0001", RelationshipType: "euvd"}, // has an alias -> resolved + {SourceCVE: "EUVD-2024-3", TargetCVE: "CVE-2024-0003", RelationshipType: "euvd"}, // no relationship, in cves -> fallback kept + {SourceCVE: "EUVD-2024-9", TargetCVE: "CVE-2024-9999", RelationshipType: "euvd"}, // no relationship, not in cves -> dropped + } + + conn, err := pool.Acquire(ctx) + assert.NoError(t, err) + defer conn.Release() + tx, err := conn.Begin(ctx) + assert.NoError(t, err) + defer tx.Rollback(ctx) //nolint:errcheck + + resolved, err := vulndb.NewEUVDService(nil, nil, pool).ResolveAndInsertEUVDRelationships(ctx, tx, rawEUVD) + assert.NoError(t, err) + assert.NoError(t, tx.Commit(ctx)) + + // the returned (gob-bound) rows must be exactly the resolved alias and the fallback. + returned := make([]string, 0, len(resolved)) + for _, r := range resolved { + returned = append(returned, r.TargetCVE+"|"+r.SourceCVE+"|"+r.RelationshipType) + } + assert.ElementsMatch(t, []string{ + "EUVD-2024-1|CVE-2024-0002|alias", // resolved: alias source linked to the EUVD id, type kept + "EUVD-2024-3|CVE-2024-0003|euvd", // fallback: direct link kept because the cve exists in cves + }, returned) + + // the live table must hold the same euvd-derived rows and no raw source=EUVD rows. + assert.ElementsMatch(t, []string{ + "EUVD-2024-1|CVE-2024-0002|alias", + "EUVD-2024-3|CVE-2024-0003|euvd", + }, queryEUVDRelationships(ctx, t, pool)) +} diff --git a/tests/quick_diff_integration_test.go b/tests/quick_diff_integration_test.go index 663018bfe..05cbf5c45 100644 --- a/tests/quick_diff_integration_test.go +++ b/tests/quick_diff_integration_test.go @@ -49,7 +49,7 @@ func makeCVE(id int64, cveStr, desc string, cvss float32, vector string) models. // seedCVEState inserts the given CVEs (plus optional EPSS/KEV) as the current // live-table state via staging→sync, so the DB reflects exactly this set. -func seedCVEState(ctx context.Context, t *testing.T, pool *pgxpool.Pool, cves []models.CVE, rels []models.CVERelationship, epss map[string]dtos.EPSS, kev []vulndb.CISAKEVEntry) { +func seedCVEState(ctx context.Context, t *testing.T, pool *pgxpool.Pool, cves []models.CVE, rels []models.CVERelationship, epss map[string]dtos.EPSS, kev []vulndb.KEVEntry) { t.Helper() conn, err := pool.Acquire(ctx) assert.NoError(t, err) @@ -66,7 +66,7 @@ func seedCVEState(ctx context.Context, t *testing.T, pool *pgxpool.Pool, cves [] } assert.NoError(t, vulndb.SyncAllTables(ctx, tx)) assert.NoError(t, vulndb.InsertEPSSBulk(ctx, tx, epss)) - assert.NoError(t, vulndb.InsertCISAKEVBulk(ctx, tx, kev)) + assert.NoError(t, vulndb.InsertKEVBulk(ctx, tx, kev)) assert.NoError(t, tx.Commit(ctx)) } @@ -77,7 +77,7 @@ func simulateExport( ctx context.Context, t *testing.T, pool *pgxpool.Pool, prevVersion time.Time, newCVEs []models.CVE, newRels []models.CVERelationship, - newEPSS map[string]dtos.EPSS, newKEV []vulndb.CISAKEVEntry, + newEPSS map[string]dtos.EPSS, newKEV []vulndb.KEVEntry, ) (*vulndb.QuickDiff, vulndb.IntegrityInformation) { t.Helper() conn, err := pool.Acquire(ctx) @@ -97,7 +97,7 @@ func simulateExport( } assert.NoError(t, vulndb.FlushOSVStagingTables(ctx, tx)) assert.NoError(t, vulndb.InsertEPSSBulk(ctx, tx, newEPSS)) - assert.NoError(t, vulndb.InsertCISAKEVBulk(ctx, tx, newKEV)) + assert.NoError(t, vulndb.InsertKEVBulk(ctx, tx, newKEV)) diff, err := vulndb.ComputeQuickDiff(ctx, tx, prevVersion) assert.NoError(t, err) @@ -116,7 +116,7 @@ func simulateExport( func applyQuickDiffAndVerify( ctx context.Context, t *testing.T, pool *pgxpool.Pool, decoded *vulndb.QuickDiff, - newEPSS map[string]dtos.EPSS, newKEV []vulndb.CISAKEVEntry, + newEPSS map[string]dtos.EPSS, newKEV []vulndb.KEVEntry, groundTruth vulndb.IntegrityInformation, ) { t.Helper() @@ -130,7 +130,7 @@ func applyQuickDiffAndVerify( assert.NoError(t, vulndb.ApplyQuickDiff(ctx, tx, decoded)) assert.NoError(t, vulndb.InsertEPSSBulk(ctx, tx, newEPSS)) - assert.NoError(t, vulndb.InsertCISAKEVBulk(ctx, tx, newKEV)) + assert.NoError(t, vulndb.InsertKEVBulk(ctx, tx, newKEV)) localIntegrity, err := vulndb.CalculateTotalIntegrityInformation(ctx, tx) assert.NoError(t, err) @@ -293,7 +293,7 @@ func TestQuickDiffCISAKEVAdded(t *testing.T) { seedCVEState(ctx, t, pool, prevCVEs, nil, nil, nil) prevVersion := time.Now() - newKEV := []vulndb.CISAKEVEntry{ + newKEV := []vulndb.KEVEntry{ {CVE: "CVE-2024-4001", RequiredAction: "patch immediately", VulnerabilityName: "Super Bug"}, } @@ -315,7 +315,7 @@ func TestQuickDiffCISAKEVRemoved(t *testing.T) { prevCVEs := []models.CVE{ makeCVE(5001, "CVE-2024-5001", "was in KEV", 8.0, testVector), } - prevKEV := []vulndb.CISAKEVEntry{ + prevKEV := []vulndb.KEVEntry{ {CVE: "CVE-2024-5001", RequiredAction: "apply workaround", VulnerabilityName: "Old Bug"}, } @@ -421,7 +421,7 @@ func TestQuickDiffCISAViaRelationship(t *testing.T) { prevVersion := time.Now() // New: CVE-B added to CISA KEV — CVE-A (alias) must also get the CISA data. - newKEV := []vulndb.CISAKEVEntry{ + newKEV := []vulndb.KEVEntry{ {CVE: "CVE-2025-8002", RequiredAction: "patch now", VulnerabilityName: "Critical Bug"}, } @@ -431,6 +431,75 @@ func TestQuickDiffCISAViaRelationship(t *testing.T) { applyQuickDiffAndVerify(ctx, t, pool, roundtripDiff(t, diff), nil, newKEV, groundTruth) } +// queryKEVDates returns cisa_exploit_add and euvd_exploit_add of a CVE as YYYY-MM-DD +// strings ("" when NULL). to_char renders in SQL to avoid timezone interpretation. +func queryKEVDates(ctx context.Context, t *testing.T, pool *pgxpool.Pool, cve string) (string, string) { + t.Helper() + conn, err := pool.Acquire(ctx) + assert.NoError(t, err) + defer conn.Release() + + var cisa, euvd *string + err = conn.QueryRow(ctx, + `SELECT to_char(cisa_exploit_add, 'YYYY-MM-DD'), to_char(euvd_exploit_add, 'YYYY-MM-DD') FROM cves WHERE cve = $1`, + cve, + ).Scan(&cisa, &euvd) + assert.NoError(t, err) + + deref := func(s *string) string { + if s == nil { + return "" + } + return *s + } + return deref(cisa), deref(euvd) +} + +// TestKEVBulkAliasMergesCISAAndEUVDDates verifies an alias CVE that inherits from two +// different KEV records — one CISA-only, one EUVD-only — keeps both dates instead of +// losing one to the single DISTINCT ON winner in InsertKEVBulk. +func TestKEVBulkAliasMergesCISAAndEUVDDates(t *testing.T) { + t.Parallel() + ctx := context.Background() + _, pool, terminate := InitDatabaseContainer("../initdb.sql") + defer terminate() + + cisaDate := time.Date(2025, 1, 15, 0, 0, 0, 0, time.UTC) + euvdDate := time.Date(2025, 3, 20, 0, 0, 0, 0, time.UTC) + + cves := []models.CVE{ + makeCVE(8201, "CVE-2025-8201", "alias of both", 5.0, testVector), + makeCVE(8202, "CVE-2025-8202", "CISA-only canonical", 9.0, testVector), + makeCVE(8203, "CVE-2025-8203", "EUVD-only canonical", 8.0, testVector), + } + // CVE-8201 is an alias of both canonical CVEs. + rels := []models.CVERelationship{ + {SourceCVE: "CVE-2025-8201", TargetCVE: "CVE-2025-8202", RelationshipType: "alias"}, + {SourceCVE: "CVE-2025-8201", TargetCVE: "CVE-2025-8203", RelationshipType: "alias"}, + } + // One canonical is CISA-only, the other EUVD-only. + kev := []vulndb.KEVEntry{ + {CVE: "CVE-2025-8202", CISAExploitAddDate: &cisaDate, RequiredAction: "patch now", VulnerabilityName: "CISA Bug"}, + {CVE: "CVE-2025-8203", EUVDExploitAddDate: &euvdDate}, + } + + seedCVEState(ctx, t, pool, cves, rels, nil, kev) + + // The alias must carry the CISA date from one canonical and the EUVD date from the other. + cisaGot, euvdGot := queryKEVDates(ctx, t, pool, "CVE-2025-8201") + assert.Equal(t, cisaDate.Format("2006-01-02"), cisaGot) + assert.Equal(t, euvdDate.Format("2006-01-02"), euvdGot) + + // Each canonical keeps only its own dimension. + cisaOnly, euvdOnEmpty := queryKEVDates(ctx, t, pool, "CVE-2025-8202") + assert.Equal(t, cisaDate.Format("2006-01-02"), cisaOnly) + assert.Equal(t, "", euvdOnEmpty) + + cisaOnEmpty, euvdOnly := queryKEVDates(ctx, t, pool, "CVE-2025-8203") + assert.Equal(t, "", cisaOnEmpty) + assert.Equal(t, euvdDate.Format("2006-01-02"), euvdOnly) +} + // TestQuickDiff_LargeBatchManyChanges exercises a large number of simultaneous // inserts, deletes, updates, and EPSS changes to surface any batch-size edge cases. func TestQuickDiffLargeBatchManyChanges(t *testing.T) { @@ -513,7 +582,7 @@ func TestQuickDiffSequentialImports(t *testing.T) { diff1, gt1 := simulateExport(ctx, t, pool, v1Time, v2CVEs, nil, v2EPSS, nil) seedCVEState(ctx, t, pool, v1CVEs, nil, v1EPSS, nil) // Apply round-1 quickdiff and commit so the DB is the importer's v2 state. - applyAndCommit := func(decoded *vulndb.QuickDiff, epss map[string]dtos.EPSS, kev []vulndb.CISAKEVEntry) { + applyAndCommit := func(decoded *vulndb.QuickDiff, epss map[string]dtos.EPSS, kev []vulndb.KEVEntry) { t.Helper() conn, err := pool.Acquire(ctx) assert.NoError(t, err) @@ -522,7 +591,7 @@ func TestQuickDiffSequentialImports(t *testing.T) { assert.NoError(t, err) assert.NoError(t, vulndb.ApplyQuickDiff(ctx, tx, decoded)) assert.NoError(t, vulndb.InsertEPSSBulk(ctx, tx, epss)) - assert.NoError(t, vulndb.InsertCISAKEVBulk(ctx, tx, kev)) + assert.NoError(t, vulndb.InsertKEVBulk(ctx, tx, kev)) assert.NoError(t, tx.Commit(ctx)) } applyAndCommit(roundtripDiff(t, diff1), v2EPSS, nil) diff --git a/transformer/dependency_vuln_transformer.go b/transformer/dependency_vuln_transformer.go index 54fb64a47..fba6c2408 100644 --- a/transformer/dependency_vuln_transformer.go +++ b/transformer/dependency_vuln_transformer.go @@ -44,6 +44,7 @@ func CVEToDTO(cve models.CVE) dtos.CVEDTO { Risk: cve.Risk, Exploits: utils.Map(cve.Exploits, ExploitModelToDTO), Relationships: utils.Map(cve.Relationships, RelationshipToDTO), + EUVDExploitAdd: cve.EUVDExploitAdd, } } diff --git a/utils/egress_client.go b/utils/egress_client.go index 2fa6798c1..2c44c158f 100644 --- a/utils/egress_client.go +++ b/utils/egress_client.go @@ -29,11 +29,15 @@ var EgressTransport http.RoundTripper = otelhttp.NewTransport(EgressRoundTripper R: http.DefaultTransport, }) -var EgressClient = http.Client{ - Timeout: 30 * time.Second, - Transport: EgressTransport, +func NewEgressClient(timeout time.Duration) *http.Client { + return &http.Client{ + Timeout: timeout, + Transport: EgressTransport, + } } +var EgressClient = *NewEgressClient(30 * time.Second) + type EgressRoundTripper struct { R http.RoundTripper } diff --git a/vulndb/cisa_kev_service.go b/vulndb/cisa_kev_service.go index 5287a463e..33a03b51a 100644 --- a/vulndb/cisa_kev_service.go +++ b/vulndb/cisa_kev_service.go @@ -111,64 +111,13 @@ func parseDate(dateStr string) (*datatypes.Date, error) { return &d, nil } -// Apply writes pre-fetched CISA KEV entries to the database using the provided transaction, -// expanding KEV data to alias CVEs via the relationship table. -// The caller is responsible for committing or rolling back the transaction. -func (s cisaKEVService) Apply(ctx context.Context, tx shared.DB, cves []models.CVE) error { - kevMap := make(map[string]models.CVE, len(cves)) - cveIDs := make([]string, len(cves)) - for i, cve := range cves { - kevMap[cve.CVE] = cve - cveIDs[i] = cve.CVE - } - - var relationships []models.CVERelationship - for i := 0; i < len(cveIDs); i += kevBatchSize { - end := min(i+kevBatchSize, len(cveIDs)) - batch, err := s.cveRelationshipRepository.GetRelationshipsByTargetCVEBatch(ctx, tx, cveIDs[i:end]) - if err != nil { - slog.Error("could not fetch CVE relationships", "error", err) - return err - } - relationships = append(relationships, batch...) - } - - for _, rel := range relationships { - if kevData, ok := kevMap[rel.TargetCVE]; ok { - if _, exists := kevMap[rel.SourceCVE]; !exists { - relatedCVE := models.CVE{ - CVE: rel.SourceCVE, - CISAExploitAdd: kevData.CISAExploitAdd, - CISAActionDue: kevData.CISAActionDue, - CISARequiredAction: kevData.CISARequiredAction, - CISAVulnerabilityName: kevData.CISAVulnerabilityName, - } - cves = append(cves, relatedCVE) - kevMap[rel.SourceCVE] = relatedCVE - } - } - } - - slog.Info("updating CISA KEV data", "direct", len(cveIDs), "viaRelationships", len(cves)-len(cveIDs)) - - for i := 0; i < len(cves); i += kevBatchSize { - end := min(i+kevBatchSize, len(cves)) - if err := s.cveRepository.UpdateCISAKEVBatch(ctx, tx, cves[i:end]); err != nil { - slog.Error("error when trying to save CISA KEV information batch") - return err - } - } - - return nil -} - -func InsertCISAKEVBulk(ctx context.Context, tx pgx.Tx, entries []CISAKEVEntry) error { +func InsertKEVBulk(ctx context.Context, tx pgx.Tx, entries []KEVEntry) error { // Always reset CISA fields so CVEs that are no longer in the catalog (or were // never in it) end up with NULL — not the empty-string zero-value written by // the initial CVE insert. The reset must run even when the entry list is empty, // otherwise existing CVEs keep "" while newly-inserted quickdiff CVEs get NULL, // and the integrity checksum (coalesce(field, '\0')) treats the two differently. - if _, err := tx.Exec(ctx, `UPDATE cves SET cisa_exploit_add = NULL, cisa_action_due = NULL, cisa_required_action = NULL, cisa_vulnerability_name = NULL`); err != nil { + if _, err := tx.Exec(ctx, `UPDATE cves SET cisa_exploit_add = NULL, cisa_action_due = NULL, cisa_required_action = NULL, cisa_vulnerability_name = NULL, euvd_exploit_add = NULL`); err != nil { return fmt.Errorf("could not reset cisa kev fields: %w", err) } if len(entries) == 0 { @@ -180,35 +129,42 @@ func InsertCISAKEVBulk(ctx context.Context, tx pgx.Tx, entries []CISAKEVEntry) e cisa_exploit_add date, cisa_action_due date, cisa_required_action text, - cisa_vulnerability_name text + cisa_vulnerability_name text, + euvd_exploit_add date ) ON COMMIT DROP`); err != nil { return fmt.Errorf("could not create kev staging table: %w", err) } if _, err := tx.CopyFrom(ctx, pgx.Identifier{"kev_stage"}, - []string{"cve", "cisa_exploit_add", "cisa_action_due", "cisa_required_action", "cisa_vulnerability_name"}, + []string{"cve", "cisa_exploit_add", "cisa_action_due", "cisa_required_action", "cisa_vulnerability_name", "euvd_exploit_add"}, pgx.CopyFromSlice(len(entries), func(i int) ([]any, error) { e := entries[i] - return []any{e.CVE, e.ExploitAddDate, e.ActionDueDate, e.RequiredAction, e.VulnerabilityName}, nil + return []any{e.CVE, e.CISAExploitAddDate, e.ActionDueDate, e.RequiredAction, e.VulnerabilityName, e.EUVDExploitAddDate}, nil })); err != nil { return fmt.Errorf("could not copy kev rows into staging table: %w", err) } // Update direct CVEs and alias CVEs. DISTINCT ON with ORDER BY cisa_exploit_add ASC gives a - // deterministic winner when an alias maps to multiple KEV canonical CVEs. + // deterministic winner for the CISA fields when an alias maps to multiple KEV canonical CVEs. + // euvd_exploit_add is resolved with a window min over the whole CVE partition (computed before + // DISTINCT ON collapses the rows) so a CVE aliased to a cisa-only and a euvd-only record keeps + // both dates instead of losing one to the single DISTINCT ON winner. tag, err := tx.Exec(ctx, ` UPDATE cves SET cisa_exploit_add = ks.cisa_exploit_add, cisa_action_due = ks.cisa_action_due, cisa_required_action = ks.cisa_required_action, - cisa_vulnerability_name = ks.cisa_vulnerability_name + cisa_vulnerability_name = ks.cisa_vulnerability_name, + euvd_exploit_add = ks.euvd_exploit_add FROM ( - SELECT DISTINCT ON (cve) cve, cisa_exploit_add, cisa_action_due, cisa_required_action, cisa_vulnerability_name + SELECT DISTINCT ON (cve) + cve, cisa_exploit_add, cisa_action_due, cisa_required_action, cisa_vulnerability_name, + min(euvd_exploit_add) OVER (PARTITION BY cve) AS euvd_exploit_add FROM ( - SELECT cve, cisa_exploit_add, cisa_action_due, cisa_required_action, cisa_vulnerability_name + SELECT cve, cisa_exploit_add, cisa_action_due, cisa_required_action, cisa_vulnerability_name, euvd_exploit_add FROM kev_stage UNION ALL - SELECT cr.source_cve, ks.cisa_exploit_add, ks.cisa_action_due, ks.cisa_required_action, ks.cisa_vulnerability_name + SELECT cr.source_cve, ks.cisa_exploit_add, ks.cisa_action_due, ks.cisa_required_action, ks.cisa_vulnerability_name, ks.euvd_exploit_add FROM kev_stage ks JOIN cve_relationships cr ON cr.target_cve = ks.cve ) combined @@ -218,11 +174,11 @@ func InsertCISAKEVBulk(ctx context.Context, tx pgx.Tx, entries []CISAKEVEntry) e if err != nil { return fmt.Errorf("could not update cves with kev data: %w", err) } - slog.Debug("InsertCISAKEVBulk: update complete", "rows_updated", tag.RowsAffected()) + slog.Debug("InsertKEVBulk: update complete", "rows_updated", tag.RowsAffected()) return nil } -func applyCISAKEVToStage(ctx context.Context, tx pgx.Tx, entries []CISAKEVEntry) error { +func applyKEVToStage(ctx context.Context, tx pgx.Tx, entries []KEVEntry) error { if len(entries) == 0 { return nil } @@ -232,15 +188,16 @@ func applyCISAKEVToStage(ctx context.Context, tx pgx.Tx, entries []CISAKEVEntry) cisa_exploit_add date, cisa_action_due date, cisa_required_action text, - cisa_vulnerability_name text + cisa_vulnerability_name text, + euvd_exploit_add date ) ON COMMIT DROP`); err != nil { return fmt.Errorf("could not create kev staging table: %w", err) } if _, err := tx.CopyFrom(ctx, pgx.Identifier{"kev_stage"}, - []string{"cve", "cisa_exploit_add", "cisa_action_due", "cisa_required_action", "cisa_vulnerability_name"}, + []string{"cve", "cisa_exploit_add", "cisa_action_due", "cisa_required_action", "cisa_vulnerability_name", "euvd_exploit_add"}, pgx.CopyFromSlice(len(entries), func(i int) ([]any, error) { e := entries[i] - return []any{e.CVE, e.ExploitAddDate, e.ActionDueDate, e.RequiredAction, e.VulnerabilityName}, nil + return []any{e.CVE, e.CISAExploitAddDate, e.ActionDueDate, e.RequiredAction, e.VulnerabilityName, e.EUVDExploitAddDate}, nil })); err != nil { return fmt.Errorf("could not copy kev rows into kev staging table: %w", err) } @@ -250,14 +207,17 @@ func applyCISAKEVToStage(ctx context.Context, tx pgx.Tx, entries []CISAKEVEntry) cisa_exploit_add = ks.cisa_exploit_add, cisa_action_due = ks.cisa_action_due, cisa_required_action = ks.cisa_required_action, - cisa_vulnerability_name = ks.cisa_vulnerability_name + cisa_vulnerability_name = ks.cisa_vulnerability_name, + euvd_exploit_add = ks.euvd_exploit_add FROM ( - SELECT DISTINCT ON (cve) cve, cisa_exploit_add, cisa_action_due, cisa_required_action, cisa_vulnerability_name + SELECT DISTINCT ON (cve) + cve, cisa_exploit_add, cisa_action_due, cisa_required_action, cisa_vulnerability_name, + min(euvd_exploit_add) OVER (PARTITION BY cve) AS euvd_exploit_add FROM ( - SELECT cve, cisa_exploit_add, cisa_action_due, cisa_required_action, cisa_vulnerability_name + SELECT cve, cisa_exploit_add, cisa_action_due, cisa_required_action, cisa_vulnerability_name, euvd_exploit_add FROM kev_stage UNION ALL - SELECT cr.source_cve, ks.cisa_exploit_add, ks.cisa_action_due, ks.cisa_required_action, ks.cisa_vulnerability_name + SELECT cr.source_cve, ks.cisa_exploit_add, ks.cisa_action_due, ks.cisa_required_action, ks.cisa_vulnerability_name, ks.euvd_exploit_add FROM kev_stage ks JOIN cve_relationships cr ON cr.target_cve = ks.cve ) combined diff --git a/vulndb/euvd_kev_service.go b/vulndb/euvd_kev_service.go new file mode 100644 index 000000000..fc04c338a --- /dev/null +++ b/vulndb/euvd_kev_service.go @@ -0,0 +1,96 @@ +package vulndb + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log/slog" + "net/http" + + "github.com/l3montree-dev/devguard/database/models" + "github.com/l3montree-dev/devguard/shared" + "github.com/l3montree-dev/devguard/utils" +) + +type euvdKEVService struct { + cveRepository shared.CveRepository + cveRelationshipRepository shared.CVERelationshipRepository +} + +func NewEUVDKEVService(cveRepository shared.CveRepository, cveRelationshipRepository shared.CVERelationshipRepository) euvdKEVService { + return euvdKEVService{ + cveRepository: cveRepository, + cveRelationshipRepository: cveRelationshipRepository, + } +} + +var euvdKEVURL = "https://euvdservices.enisa.europa.eu/api/kev/dump" + +const ( + euvdSourceID = "eukev_kev" + cisaSourceID = "cisa_kev" +) + +type euvdKEVEntry struct { + CVEID string `json:"cveId"` + EUVDID string `json:"euvdId"` + DateAdded string `json:"dateAdded"` + Sources []string `json:"sources"` +} + +func (service euvdKEVService) Fetch(ctx context.Context) ([]models.CVE, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, euvdKEVURL, nil) + if err != nil { + return nil, err + } + + res, err := utils.EgressClient.Do(req) + if err != nil { + return nil, err + } + defer res.Body.Close() + + body, err := io.ReadAll(res.Body) + if err != nil { + return nil, fmt.Errorf("could not read response body: %w", err) + } + + var euvdKEV []euvdKEVEntry + if err := json.Unmarshal(body, &euvdKEV); err != nil { + return nil, fmt.Errorf("could not parse JSON: %w", err) + } + + results := make([]models.CVE, 0, len(euvdKEV)) + for _, entry := range euvdKEV { + if len(entry.Sources) == 0 { + continue + } + dateAdded, err := parseDate(entry.DateAdded) + if err != nil { + slog.Warn("could not parse dateAdded", "cve", entry.CVEID, "date", entry.DateAdded) + continue + } + + cve := models.CVE{ + CVE: entry.CVEID, + } + + // add exploit add information based on which source(s) are listed + for _, sourceID := range entry.Sources { + switch sourceID { + case euvdSourceID: + cve.EUVDExploitAdd = dateAdded + case cisaSourceID: + cve.CISAExploitAdd = dateAdded + default: + // if the schema changes it should break to force investigation of the schema + return nil, fmt.Errorf("unexpected identifier found in EUVD KEV. CVE-ID: %s, sourceID: %s ", entry.CVEID, sourceID) + } + } + + results = append(results, cve) + } + + return results, nil +} diff --git a/vulndb/euvd_service.go b/vulndb/euvd_service.go new file mode 100644 index 000000000..4fbe1c7b0 --- /dev/null +++ b/vulndb/euvd_service.go @@ -0,0 +1,137 @@ +package vulndb + +import ( + "context" + "encoding/csv" + "fmt" + "log/slog" + "net/http" + "time" + + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" + "github.com/l3montree-dev/devguard/database/models" + "github.com/l3montree-dev/devguard/dtos" + "github.com/l3montree-dev/devguard/shared" +) + +const ( + euvdIDMappingURL = "https://euvdservices.enisa.europa.eu/api/dump/cve-euvd-mapping" // URL to fetch the alias mapping from EUVD-IDs to CVE-IDs + csvEUVDColumnID = "euvd_id" // name of the csv columns, should be stable over all versions;if it changes it should break + csvCVEColumnID = "cve_id" +) + +type euvdService struct { + cveRepository shared.CveRepository + cveRelationshipRepository shared.CVERelationshipRepository + pool *pgxpool.Pool + httpClient *http.Client +} + +func NewEUVDService(cveRepository shared.CveRepository, cveRelationshipRepository shared.CVERelationshipRepository, pool *pgxpool.Pool) euvdService { + return euvdService{ + cveRepository: cveRepository, + cveRelationshipRepository: cveRelationshipRepository, + pool: pool, + httpClient: &http.Client{}, + } +} + +func (service euvdService) importEUVDAliases(ctx context.Context, tx pgx.Tx) ([]models.CVERelationship, error) { + aliasCSV, err := service.fetchEUVDAliases() + if err != nil { + return nil, err + } + + relationships, err := service.convertAliasesToRelationships(aliasCSV) + if err != nil { + return nil, err + } + return service.ResolveAndInsertEUVDRelationships(ctx, tx, relationships) +} + +// after fetching the CVE aliases of the EUVD we want to resolve those 'original' CVEs to their downstream relations +// for CVEs with no downstream alias we only keep them if they exist in the cves so the fk on source_cve holds +func (service euvdService) ResolveAndInsertEUVDRelationships(ctx context.Context, tx pgx.Tx, relationships []models.CVERelationship) ([]models.CVERelationship, error) { + euvdStageTable := "euvd_relationships_stage" + + start := time.Now() + slog.Info("start resolving and inserting euvd relationships into cve_relationships") + if _, err := tx.Exec(ctx, fmt.Sprintf(`CREATE TEMP TABLE %s (LIKE cve_relationships) ON COMMIT DROP`, euvdStageTable)); err != nil { + return nil, fmt.Errorf("could not create euvd stage table: %w", err) + } + + if err := InsertCVERelationshipsBulk(ctx, tx, relationships, euvdStageTable); err != nil { + return nil, fmt.Errorf("could not insert euvd relationships bulk: %w", err) + } + + rows, err := tx.Query(ctx, ` + INSERT INTO cve_relationships (target_cve, source_cve, relationship_type) + -- first resolve the euvd relations via a join to the downstream relations + SELECT euvd.source_cve AS target_cve, cr.source_cve, cr.relationship_type + FROM cve_relationships cr + JOIN euvd_relationships_stage euvd ON euvd.target_cve = cr.target_cve + UNION + -- then combine them with all cves that do not have a relationship but are present in the cves table + SELECT euvd.source_cve AS target_cve, euvd.target_cve AS source_cve, 'euvd' AS relationship_type + FROM euvd_relationships_stage euvd + WHERE NOT EXISTS (SELECT 1 FROM cve_relationships cr WHERE cr.target_cve = euvd.target_cve) + AND EXISTS (SELECT 1 FROM cves c WHERE c.cve = euvd.target_cve) + ON CONFLICT (target_cve, source_cve, relationship_type) DO NOTHING + -- return the resolved rows to be written in the exported gob files + RETURNING target_cve, source_cve, relationship_type`) + if err != nil { + return nil, fmt.Errorf("could not resolve and insert euvd relationships: %w", err) + } + defer rows.Close() + slog.Info("finished inserting euvd relationships", "took", time.Since(start)) + + // convert rows into cveRelationships model + resolved := make([]models.CVERelationship, 0, len(relationships)) + for rows.Next() { + var relation models.CVERelationship + if err := rows.Scan(&relation.TargetCVE, &relation.SourceCVE, &relation.RelationshipType); err != nil { + return nil, fmt.Errorf("could not scan resolved euvd relationship: %w", err) + } + resolved = append(resolved, relation) + } + return resolved, rows.Err() +} + +func (service euvdService) fetchEUVDAliases() ([][]string, error) { + req, err := http.NewRequest("GET", euvdIDMappingURL, nil) + if err != nil { + return nil, fmt.Errorf("could not build request to fetch csv file: %w", err) + } + + resp, err := service.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("could not get csv file from EUVD api: %w", err) + } + defer resp.Body.Close() + + csvReader := csv.NewReader(resp.Body) + return csvReader.ReadAll() +} + +func (service euvdService) convertAliasesToRelationships(aliasesCSV [][]string) ([]models.CVERelationship, error) { + // check the format of the csv file; should break if the format changes so we explicitly investigate the change + if len(aliasesCSV) == 0 || len(aliasesCSV[0]) != 2 || aliasesCSV[0][0] != csvEUVDColumnID || aliasesCSV[0][1] != csvCVEColumnID { + return nil, fmt.Errorf("invalid/unexpected csv format; check the csv file provided by the EUVD") + } + + relationships := make([]models.CVERelationship, 0, len(aliasesCSV)) + for i, row := range aliasesCSV[1:] { // exclude the header row + if len(row) != 2 { + return nil, fmt.Errorf("invalid csv format for row %d, expected length of 2 got: %d", i, len(row)) + } + + relationships = append(relationships, models.CVERelationship{ + SourceCVE: row[0], + TargetCVE: row[1], + RelationshipType: dtos.RelationshipTypeEUVD, // placeholder relationship type before resolving actual relations via the cve_relationships table + }) + } + + return relationships, nil +} diff --git a/vulndb/gob.go b/vulndb/gob.go index 1126e7982..3e713ac3b 100644 --- a/vulndb/gob.go +++ b/vulndb/gob.go @@ -12,17 +12,19 @@ import ( "github.com/klauspost/compress/zstd" "github.com/l3montree-dev/devguard/database/models" + "github.com/l3montree-dev/devguard/utils" "gorm.io/datatypes" ) -// CISAKEVEntry is the gob-safe representation of a CISA KEV record. +// KEVEntry is the gob-safe representation of a CISA KEV record. // Dates are stored as *time.Time to avoid the datatypes.Date gob limitation. -type CISAKEVEntry struct { - CVE string - ExploitAddDate *time.Time - ActionDueDate *time.Time - RequiredAction string - VulnerabilityName string +type KEVEntry struct { + CVE string + CISAExploitAddDate *time.Time + EUVDExploitAddDate *time.Time + ActionDueDate *time.Time + RequiredAction string + VulnerabilityName string } // GobExploit is the gob-safe representation of models.Exploit. @@ -67,15 +69,16 @@ type GobMaliciousPackagesExport struct { // --- CISA KEV conversions --- -func cisaKEVEntriesToGob(cves []models.CVE) []CISAKEVEntry { - out := make([]CISAKEVEntry, 0, len(cves)) +func kevEntriesToGob(cves []models.CVE) []KEVEntry { + out := make([]KEVEntry, 0, len(cves)) for _, c := range cves { - out = append(out, CISAKEVEntry{ - CVE: c.CVE, - ExploitAddDate: dateToTimePtr(c.CISAExploitAdd), - ActionDueDate: dateToTimePtr(c.CISAActionDue), - RequiredAction: *c.CISARequiredAction, - VulnerabilityName: *c.CISAVulnerabilityName, + out = append(out, KEVEntry{ + CVE: c.CVE, + CISAExploitAddDate: dateToTimePtr(c.CISAExploitAdd), + EUVDExploitAddDate: dateToTimePtr(c.EUVDExploitAdd), + ActionDueDate: dateToTimePtr(c.CISAActionDue), + RequiredAction: utils.SafeDereference(c.CISARequiredAction), + VulnerabilityName: utils.SafeDereference(c.CISAVulnerabilityName), }) } return out diff --git a/vulndb/import_debug.go b/vulndb/import_debug.go index fa053cd79..4c78d5ab7 100644 --- a/vulndb/import_debug.go +++ b/vulndb/import_debug.go @@ -23,6 +23,7 @@ import ( "time" "github.com/jackc/pgx/v5" + "github.com/l3montree-dev/devguard/database/models" "github.com/l3montree-dev/devguard/dtos" ) @@ -48,7 +49,7 @@ func diffCVEsByIntegrityHash(ctx context.Context, tx pgx.Tx) error { FROM (SELECT cve, %s AS db_hash, id, cisa_required_action, cisa_vulnerability_name, epss, percentile FROM cves) db JOIN (SELECT cve, %s AS gob_hash, id, cisa_required_action, cisa_vulnerability_name, epss, percentile FROM cves_stage) gob ON db.cve = gob.cve - WHERE db_hash <> gob_hash + WHERE db_hash != gob_hash LIMIT 20 `, hashExpr, hashExpr)) if err != nil { @@ -120,6 +121,15 @@ func showImportDebug(ctx context.Context, tx pgx.Tx, workingDir string, failingT slog.Error("show-diff: could not insert cve_relationships into staging", "err", err) return } + euvdRelationships, err := readAllGobItems[models.CVERelationship](workingDir + "/euvd_relationships.gob") + if err != nil { + slog.Error("show-diff: could not read euvd_relationships.gob", "err", err) + return + } + if err := InsertCVERelationshipsBulk(ctx, tx, euvdRelationships, "cve_relationships_stage"); err != nil { + slog.Error("show-diff: could not insert euvd cve_relationships into staging", "err", err) + return + } if err := insertAffectedComponentsBulk(ctx, tx, vulnRows.AffectedComponents, "affected_components_stage"); err != nil { slog.Error("show-diff: could not insert affected_components into staging", "err", err) return @@ -154,12 +164,12 @@ func showImportDebug(ctx context.Context, tx pgx.Tx, workingDir string, failingT return } - var kevEntries []CISAKEVEntry + var kevEntries []KEVEntry if err := readGobFile(workingDir+"/cisakev.gob", &kevEntries); err != nil { slog.Error("show-diff: could not read cisakev.gob", "err", err) return } - if err := applyCISAKEVToStage(ctx, tx, kevEntries); err != nil { + if err := applyKEVToStage(ctx, tx, kevEntries); err != nil { slog.Error("show-diff: could not apply CISA KEV to staging", "err", err) return } diff --git a/vulndb/osv_service.go b/vulndb/osv_service.go index aaca1a609..85fd8f253 100644 --- a/vulndb/osv_service.go +++ b/vulndb/osv_service.go @@ -64,7 +64,7 @@ type syncSpec struct { // Both SyncAllTables (staging→live) and applyQuickDiff (QuickDiff struct→live) // use these specs so all apply logic lives in one place. var liveTableSpecs = func() []syncSpec { - cveAllCols := []string{"id", "content_hash", "cve", "date_published", "date_last_modified", "description", "cvss", `"references"`, "cisa_exploit_add", "cisa_action_due", "cisa_required_action", "cisa_vulnerability_name", "epss", "percentile", "vector"} + cveAllCols := []string{"id", "content_hash", "cve", "date_published", "date_last_modified", "description", "cvss", `"references"`, "cisa_exploit_add", "cisa_action_due", "cisa_required_action", "cisa_vulnerability_name", "epss", "percentile", "vector", "euvd_exploit_add"} relAllCols := []string{"target_cve", "source_cve", "relationship_type"} acInsertCols := []string{"id", "purl", "ecosystem", "version", "semver_introduced", "semver_fixed", "version_introduced", "version_fixed"} acInsertExprs := []string{"id", "purl", "ecosystem", "version", "semver_introduced::semver", "semver_fixed::semver", "version_introduced", "version_fixed"} @@ -403,7 +403,7 @@ func (s osvService) fetchAndImportOSV(ctx context.Context, tx pgx.Tx, importStar // check if we ran into any errors while fetching if n := fetchFailures.Load(); n > 0 { - return nil, nil, fmt.Errorf("aborting export: %d ids could not be fetched; will retry on next run", n) + return nil, nil, fmt.Errorf("aborting export: %d osv fetch failures; will retry on next run", n) } // double check if we could fetch any data at all @@ -418,9 +418,6 @@ func (s osvService) fetchAndImportOSV(ctx context.Context, tx pgx.Tx, importStar slog.Info("fetched OSV vulns and malware", "entries", len(allOSVVulns), "latest", allOSVVulns[0].ModifiedTimestamp.Format(time.DateTime)) - if err := PrepareBulkInsert(ctx, tx); err != nil { - return nil, nil, fmt.Errorf("could not prepare bulk insert: %w", err) - } if err := CreateStagingTables(ctx, tx); err != nil { return nil, nil, fmt.Errorf("could not create staging tables: %w", err) } @@ -449,13 +446,12 @@ func (s osvService) fetchAndImportOSV(ctx context.Context, tx pgx.Tx, importStar if err := FlushOSVStagingTables(ctx, tx); err != nil { return nil, nil, fmt.Errorf("could not flush osv staging tables: %w", err) } - if err := AddIndexesAndConstraints(ctx, tx); err != nil { - return nil, nil, fmt.Errorf("could not re-add indexes and constraints: %w", err) - } // Delete orphan CVEs and affected_components so the DB state matches what // importers will end up with, and so integrity checksums are valid. - runCleanUpJobs(ctx, tx) + if err := runCleanUpJobs(ctx, tx); err != nil { + return nil, nil, fmt.Errorf("could not run clean up jobs: %w", err) + } // Re-query surviving CVE IDs to filter the gob — no point serializing // entries that were just deleted. @@ -509,11 +505,13 @@ func (s osvService) fetchEcosystemEntriesViaZip(ctx context.Context, zipPushWait zipReader, err := s.getOSVZipContainingEcosystem(ctx, ecosystem) if err != nil { fetchFailures.Add(1) + slog.Error("could not fetch osv zip for ecosystem", "ecosystem", ecosystem, "err", err) return } if len(zipReader.File) == 0 { fetchFailures.Add(1) + slog.Error("osv zip for ecosystem contained no files", "ecosystem", ecosystem) return } @@ -556,7 +554,7 @@ func (s osvService) getOSVZipContainingEcosystem(ctx context.Context, ecosystem return nil, errors.Wrap(err, "could not create request") } - res, err := utils.EgressClient.Do(req) + res, err := utils.NewEgressClient(time.Second * 90).Do(req) if err != nil { return nil, errors.Wrap(err, "could not download zip") } @@ -613,10 +611,10 @@ func InsertCVEsBulk(ctx context.Context, tx pgx.Tx, cves []models.CVE, table str if len(cves) == 0 { return nil } - columnNames := []string{"id", "content_hash", "cve", "date_published", "date_last_modified", "description", "cvss", "references", "cisa_exploit_add", "cisa_action_due", "cisa_required_action", "cisa_vulnerability_name", "epss", "percentile", "vector"} + columnNames := []string{"id", "content_hash", "cve", "date_published", "date_last_modified", "description", "cvss", "references", "cisa_exploit_add", "cisa_action_due", "cisa_required_action", "cisa_vulnerability_name", "epss", "percentile", "vector", "euvd_exploit_add"} _, err := tx.CopyFrom(ctx, pgx.Identifier{table}, columnNames, pgx.CopyFromSlice(len(cves), func(i int) ([]any, error) { row := cves[i] - return []any{row.ID, row.ContentHash, row.CVE, row.DatePublished, row.DateLastModified, row.Description, row.CVSS, row.References, row.CISAExploitAdd, row.CISAActionDue, row.CISARequiredAction, row.CISAVulnerabilityName, row.EPSS, row.Percentile, row.Vector}, nil + return []any{row.ID, row.ContentHash, row.CVE, row.DatePublished, row.DateLastModified, row.Description, row.CVSS, row.References, row.CISAExploitAdd, row.CISAActionDue, row.CISARequiredAction, row.CISAVulnerabilityName, row.EPSS, row.Percentile, row.Vector, row.EUVDExploitAdd}, nil })) if err != nil { return fmt.Errorf("could not copy cve rows into staging table: %w", err) @@ -681,8 +679,8 @@ func FlushOSVStagingTables(ctx context.Context, tx pgx.Tx) error { start := time.Now() if _, err := tx.Exec(ctx, ` - INSERT INTO cves (id, content_hash, cve, date_published, date_last_modified, description, cvss, "references", cisa_exploit_add, cisa_action_due, cisa_required_action, cisa_vulnerability_name, epss, percentile, vector) - SELECT id, content_hash, cve, date_published, date_last_modified, description, cvss, "references", cisa_exploit_add, cisa_action_due, cisa_required_action, cisa_vulnerability_name, epss, percentile, vector + INSERT INTO cves (id, content_hash, cve, date_published, date_last_modified, description, cvss, "references", cisa_exploit_add, cisa_action_due, cisa_required_action, cisa_vulnerability_name, epss, percentile, vector, euvd_exploit_add) + SELECT id, content_hash, cve, date_published, date_last_modified, description, cvss, "references", cisa_exploit_add, cisa_action_due, cisa_required_action, cisa_vulnerability_name, epss, percentile, vector, euvd_exploit_add FROM cves_stage ON CONFLICT (id) DO UPDATE SET content_hash = EXCLUDED.content_hash, @@ -808,7 +806,8 @@ func CreateStagingTables(ctx context.Context, tx pgx.Tx) error { cisa_vulnerability_name text, epss numeric(6,5), percentile numeric(6,5), - vector text + vector text, + euvd_exploit_add date ) ON COMMIT DROP; CREATE TEMP TABLE IF NOT EXISTS cve_relationships_stage ( @@ -1015,8 +1014,8 @@ func AddIndexesAndConstraints(ctx context.Context, tx pgx.Tx) error { // runScopedCleanUpJobs removes orphaned affected_components and CVEs that resulted // from deleting the given pivot rows. Only checks the specific IDs involved rather // than scanning the full tables. -func runCleanUpJobs(ctx context.Context, tx pgx.Tx) { - slog.Info("start running sanity checks") +func runCleanUpJobs(ctx context.Context, tx pgx.Tx) error { + slog.Info("start running clean up jobs") // first delete all cves which have no affected components and also none of their relationships does start := time.Now() _, err := tx.Exec(ctx, ` @@ -1044,13 +1043,24 @@ func runCleanUpJobs(ctx context.Context, tx pgx.Tx) { slog.Info("successfully cleaned up orphan cves", "took", time.Since(start)) } + // after deleting orphan cves make sure to drop any orphaned relationships as well so the fk on source_cve holds start = time.Now() _, err = tx.Exec(ctx, ` - DELETE FROM + DELETE FROM cve_relationships cr + WHERE NOT EXISTS (SELECT 1 FROM cves c WHERE c.cve = cr.source_cve);`) + if err != nil { + slog.Error("could not clean up dangling cve_relationships, continuing...", "error", err) + } else { + slog.Info("successfully cleaned up dangling cve_relationships", "took", time.Since(start)) + } + + start = time.Now() + _, err = tx.Exec(ctx, ` + DELETE FROM affected_components - WHERE NOT EXISTS + WHERE NOT EXISTS ( - SELECT FROM cve_affected_component + SELECT FROM cve_affected_component WHERE affected_component_id = id ) ;`) @@ -1059,6 +1069,8 @@ func runCleanUpJobs(ctx context.Context, tx pgx.Tx) { } else { slog.Info("successfully cleaned up orphan affected components", "took", time.Since(start)) } + + return nil } func shouldIgnoreVulnerabilityID(id string) bool { diff --git a/vulndb/quick_diff.go b/vulndb/quick_diff.go index be9b8cfdf..052ec8373 100644 --- a/vulndb/quick_diff.go +++ b/vulndb/quick_diff.go @@ -68,6 +68,7 @@ type quickDiffCVE struct { References string CISAExploitAdd *time.Time CISAActionDue *time.Time + EUVDExploitAdd *time.Time CISARequiredAction *string CISAVulnerabilityName *string EPSS *float64 @@ -142,7 +143,8 @@ func ComputeQuickDiff(ctx context.Context, tx pgx.Tx, fromVersion time.Time) (*Q c.description, c.cvss, c."references", c.cisa_required_action, c.cisa_vulnerability_name, c.epss, c.percentile, c.vector, c.cisa_exploit_add, - c.cisa_action_due + c.cisa_action_due, + c.euvd_exploit_add FROM cves c WHERE NOT EXISTS (SELECT 1 FROM _snap_cves s WHERE s.id = c.id) `) @@ -159,7 +161,8 @@ func ComputeQuickDiff(ctx context.Context, tx pgx.Tx, fromVersion time.Time) (*Q c.description, c.cvss, c."references", c.cisa_required_action, c.cisa_vulnerability_name, c.epss, c.percentile, c.vector, c.cisa_exploit_add, - c.cisa_action_due + c.cisa_action_due, + c.euvd_exploit_add FROM cves c JOIN _snap_cves s ON s.id = c.id WHERE s.content_hash != c.content_hash @@ -390,25 +393,25 @@ func computeDiffFromQuickDiff(ctx context.Context, tx pgx.Tx, diff *QuickDiff) e return fmt.Errorf("computeDiffFromQuickDiff: copy _diff_del_cves: %w", err) } - cvePlain := []string{"id", "content_hash", "cve", "date_published", "date_last_modified", "description", "cvss", "references", "cisa_exploit_add", "cisa_action_due", "cisa_required_action", "cisa_vulnerability_name", "epss", "percentile", "vector"} - if err := createLike("_diff_ins_cves", "cves", `id, content_hash, cve, date_published, date_last_modified, description, cvss, "references", cisa_exploit_add, cisa_action_due, cisa_required_action, cisa_vulnerability_name, epss, percentile, vector`); err != nil { + cvePlain := []string{"id", "content_hash", "cve", "date_published", "date_last_modified", "description", "cvss", "references", "cisa_exploit_add", "cisa_action_due", "cisa_required_action", "cisa_vulnerability_name", "epss", "percentile", "vector", "euvd_exploit_add"} + if err := createLike("_diff_ins_cves", "cves", `id, content_hash, cve, date_published, date_last_modified, description, cvss, "references", cisa_exploit_add, cisa_action_due, cisa_required_action, cisa_vulnerability_name, epss, percentile, vector, euvd_exploit_add`); err != nil { return fmt.Errorf("computeDiffFromQuickDiff: create _diff_ins_cves: %w", err) } if len(diff.CVEsInserted) > 0 { if _, err := tx.CopyFrom(ctx, pgx.Identifier{"_diff_ins_cves"}, cvePlain, pgx.CopyFromSlice(len(diff.CVEsInserted), func(i int) ([]any, error) { c := diff.CVEsInserted[i] - return []any{c.ID, c.ContentHash, c.CVE, c.DatePublished, c.DateLastModified, c.Description, c.CVSS, c.References, c.CISAExploitAdd, c.CISAActionDue, c.CISARequiredAction, c.CISAVulnerabilityName, c.EPSS, c.Percentile, c.Vector}, nil + return []any{c.ID, c.ContentHash, c.CVE, c.DatePublished, c.DateLastModified, c.Description, c.CVSS, c.References, c.CISAExploitAdd, c.CISAActionDue, c.CISARequiredAction, c.CISAVulnerabilityName, c.EPSS, c.Percentile, c.Vector, c.EUVDExploitAdd}, nil })); err != nil { return fmt.Errorf("computeDiffFromQuickDiff: copy _diff_ins_cves: %w", err) } } - if err := createLike("_diff_upd_cves", "cves", `id, content_hash, cve, date_published, date_last_modified, description, cvss, "references", cisa_exploit_add, cisa_action_due, cisa_required_action, cisa_vulnerability_name, epss, percentile, vector`); err != nil { + if err := createLike("_diff_upd_cves", "cves", `id, content_hash, cve, date_published, date_last_modified, description, cvss, "references", cisa_exploit_add, cisa_action_due, cisa_required_action, cisa_vulnerability_name, epss, percentile, vector, euvd_exploit_add`); err != nil { return fmt.Errorf("computeDiffFromQuickDiff: create _diff_upd_cves: %w", err) } if len(diff.CVEsUpdated) > 0 { if _, err := tx.CopyFrom(ctx, pgx.Identifier{"_diff_upd_cves"}, cvePlain, pgx.CopyFromSlice(len(diff.CVEsUpdated), func(i int) ([]any, error) { c := diff.CVEsUpdated[i] - return []any{c.ID, c.ContentHash, c.CVE, c.DatePublished, c.DateLastModified, c.Description, c.CVSS, c.References, c.CISAExploitAdd, c.CISAActionDue, c.CISARequiredAction, c.CISAVulnerabilityName, c.EPSS, c.Percentile, c.Vector}, nil + return []any{c.ID, c.ContentHash, c.CVE, c.DatePublished, c.DateLastModified, c.Description, c.CVSS, c.References, c.CISAExploitAdd, c.CISAActionDue, c.CISARequiredAction, c.CISAVulnerabilityName, c.EPSS, c.Percentile, c.Vector, c.EUVDExploitAdd}, nil })); err != nil { return fmt.Errorf("computeDiffFromQuickDiff: copy _diff_upd_cves: %w", err) } @@ -629,7 +632,7 @@ func collectCVERows(rows pgx.Rows) ([]quickDiffCVE, error) { &c.ID, &c.ContentHash, &c.CVE, &c.DatePublished, &c.DateLastModified, &c.Description, &c.CVSS, &c.References, &c.CISARequiredAction, &c.CISAVulnerabilityName, &c.EPSS, &c.Percentile, &c.Vector, - &c.CISAExploitAdd, &c.CISAActionDue, + &c.CISAExploitAdd, &c.CISAActionDue, &c.EUVDExploitAdd, ); err != nil { return nil, err } diff --git a/vulndb/vulndb_service.go b/vulndb/vulndb_service.go index 36c9a47ca..7c02dbf33 100644 --- a/vulndb/vulndb_service.go +++ b/vulndb/vulndb_service.go @@ -27,6 +27,7 @@ import ( ) const vulnDBArchiveName = "vulndb.tar.zst" +const vulnDBURI = "ghcr.io/l3montree-dev/devguard/vulndb/v3" var _ shared.VulnDBService = (*VulnDBService)(nil) @@ -39,8 +40,10 @@ const debugImport = false // and malicious packages. type VulnDBService struct { osv osvService + euvdService euvdService epss epssService cisaKEV cisaKEVService + euvdKEV euvdKEVService githubExploits *githubExploitDBService exploitDB exploitDBService maliciousPackages *MaliciousPackageChecker @@ -59,8 +62,10 @@ func NewVulnDBService( ) *VulnDBService { return &VulnDBService{ osv: NewOSVService(affectedCmpRepository, cveRepository, cveRelationshipRepository, pool), + euvdService: NewEUVDService(cveRepository, cveRelationshipRepository, pool), epss: NewEPSSService(cveRepository, cveRelationshipRepository), cisaKEV: NewCISAKEVService(cveRepository, cveRelationshipRepository), + euvdKEV: NewEUVDKEVService(cveRepository, cveRelationshipRepository), githubExploits: NewGithubExploitDBService(exploitRepository), exploitDB: NewExploitDBService(exploitRepository), maliciousPackages: maliciousPackageChecker, @@ -133,21 +138,43 @@ func (s *VulnDBService) exportRC(ctx context.Context, computeDiff bool) error { return fmt.Errorf("could not truncate malicious package tables: %w", err) } + // prepare the tables for bulk insert before any loading begins + if err := PrepareBulkInsert(ctx, tx); err != nil { + return fmt.Errorf("could not prepare bulk insert: %w", err) + } + // OSV must run first: it populates the DB (including cleanup) so we know // which CVE IDs exist before fetching the other sources. osvEntries, survivingCVEs, err := s.osv.fetchAndImportOSV(ctx, tx, start) if err != nil { return fmt.Errorf("OSV fetch failed: %w", err) } + + // then we can add the additional data sources + // load the EUVD aliases into the cve_relationship table + euvdRelationships, err := s.euvdService.importEUVDAliases(ctx, tx) + if err != nil { + return fmt.Errorf("could not import CVE-ID aliases from EUVD: %w", err) + } + + if err := AddIndexesAndConstraints(ctx, tx); err != nil { + return fmt.Errorf("could not re-add indexes and constraints: %w", err) + } + if err := writeGobFileItems(osvEntries, "osv.gob"); err != nil { return fmt.Errorf("could not write OSV gob: %w", err) } slog.Info("wrote osv.gob", "entries", len(osvEntries)) + if err := writeGobFileItems(euvdRelationships, "euvd_relationships.gob"); err != nil { + return fmt.Errorf("could not write EUVD gob: %w", err) + } + slog.Info("wrote euvd relationships data", "entries", len(euvdRelationships)) + // Fetch the remaining sources in parallel (network only — no DB writes yet). var ( epssData map[string]dtos.EPSS - kevEntries []CISAKEVEntry + kevEntries []KEVEntry allExploits []models.Exploit ) group, groupCtx := errgroup.WithContext(ctx) @@ -168,20 +195,31 @@ func (s *VulnDBService) exportRC(ctx context.Context, computeDiff bool) error { }) group.Go(func() error { - slog.Info("start fetching CISA KEV data") + slog.Info("start fetching KEV data") kevFetchCtx, kevCancel := context.WithTimeout(groupCtx, 30*time.Second) defer kevCancel() - kevCVEs, err := s.cisaKEV.Fetch(kevFetchCtx) + cisaKEVCVEs, err := s.cisaKEV.Fetch(kevFetchCtx) if err != nil { return fmt.Errorf("could not fetch CISA KEV data: %w", err) } - filtered := kevCVEs[:0] - for _, c := range kevCVEs { + slog.Info("successfully fetched CISA KEV data") + + euvdKEVCVEs, err := s.euvdKEV.Fetch(ctx) + if err != nil { + return fmt.Errorf("could not fetch EUVD KEV data: %w", err) + } + slog.Info("successfully fetched EUVD KEV data") + + allKEVCVEs := mergeKEVInformation(cisaKEVCVEs, euvdKEVCVEs) + + filtered := make([]models.CVE, 0, len(allKEVCVEs)) + for _, c := range allKEVCVEs { if _, ok := survivingCVEs[c.CVE]; ok { filtered = append(filtered, c) } } - kevEntries = cisaKEVEntriesToGob(filtered) + + kevEntries = kevEntriesToGob(filtered) return nil }) @@ -224,9 +262,9 @@ func (s *VulnDBService) exportRC(ctx context.Context, computeDiff bool) error { if err := InsertEPSSBulk(ctx, tx, epssData); err != nil { return fmt.Errorf("could not write EPSS data: %w", err) } - slog.Info("writing CISA KEV data to database") - if err := InsertCISAKEVBulk(ctx, tx, kevEntries); err != nil { - return fmt.Errorf("could not write CISA KEV data: %w", err) + slog.Info("writing KEV data to database") + if err := InsertKEVBulk(ctx, tx, kevEntries); err != nil { + return fmt.Errorf("could not write KEV data: %w", err) } slog.Info("writing exploit data to database") if err := insertExploitsBulk(ctx, tx, allExploits, "exploits_stage"); err != nil { @@ -293,6 +331,7 @@ func (s *VulnDBService) exportRC(ctx context.Context, computeDiff bool) error { archiveFiles := []string{ "osv.gob", + "euvd_relationships.gob", "epss.gob", "cisakev.gob", "exploits.gob", @@ -457,9 +496,14 @@ func (s *VulnDBService) populateDBFromGobsStream(ctx context.Context, tx pgx.Tx, var ( epssData map[string]dtos.EPSS - kevEntries []CISAKEVEntry + kevEntries []KEVEntry ) + euvdRelationships, err := readAllGobItems[models.CVERelationship](workingDir + "/euvd_relationships.gob") + if err != nil { + return fmt.Errorf("could not read euvd relationships gob: %w", err) + } + vulndbChan := make(chan vulndbRows, 4) exploitChan := make(chan []models.Exploit, 4) malPkgChan := make(chan malRows, 4) @@ -513,7 +557,7 @@ func (s *VulnDBService) populateDBFromGobsStream(ctx context.Context, tx pgx.Tx, }) group.Go(func() error { - return streamToDatabase(groupCtx, tx, vulndbChan, exploitChan, malPkgChan) + return streamToDatabase(groupCtx, tx, vulndbChan, exploitChan, malPkgChan, euvdRelationships) }) if err := group.Wait(); err != nil { @@ -527,7 +571,7 @@ func (s *VulnDBService) populateDBFromGobsStream(ctx context.Context, tx pgx.Tx, slog.Info("applied epss data", "entries", len(epssData), "took", time.Since(t)) t = time.Now() - if err := InsertCISAKEVBulk(ctx, tx, kevEntries); err != nil { + if err := InsertKEVBulk(ctx, tx, kevEntries); err != nil { return fmt.Errorf("could not apply CISA KEV data: %w", err) } slog.Info("applied cisa kev data", "entries", len(kevEntries), "took", time.Since(t)) @@ -542,10 +586,11 @@ func (s *VulnDBService) populateDBFromGobsBulk(ctx context.Context, tx pgx.Tx, w group, _ := errgroup.WithContext(ctx) var ( - osvEntries []OSVEntry - epssData map[string]dtos.EPSS - kevEntries []CISAKEVEntry - gobExploit []GobExploit + osvEntries []OSVEntry + euvdRelationships []models.CVERelationship + epssData map[string]dtos.EPSS + kevEntries []KEVEntry + gobExploit []GobExploit ) group.Go(func() error { @@ -558,6 +603,16 @@ func (s *VulnDBService) populateDBFromGobsBulk(ctx context.Context, tx pgx.Tx, w slog.Info("decoded osv.gob", "entries", len(osvEntries), "took", time.Since(t)) return nil }) + group.Go(func() error { + t := time.Now() + var err error + euvdRelationships, err = readAllGobItems[models.CVERelationship](workingDir + "/euvd_relationships.gob") + if err != nil { + return fmt.Errorf("could not read EUVD gob: %w", err) + } + slog.Info("decoded euvd_relationships.gob", "entries", len(euvdRelationships), "took", time.Since(t)) + return nil + }) group.Go(func() error { t := time.Now() if err := readGobFile(workingDir+"/epss.gob", &epssData); err != nil { @@ -593,7 +648,7 @@ func (s *VulnDBService) populateDBFromGobsBulk(ctx context.Context, tx pgx.Tx, w malRows := gobOSVToMalTransformer(osvEntries) exploits := gobExploitFilterTransformer(gobExploit) - if err := writeToDatabase(ctx, tx, vulnRows, exploits, malRows, epssData, kevEntries); err != nil { + if err := writeToDatabase(ctx, tx, vulnRows, exploits, malRows, epssData, kevEntries, euvdRelationships); err != nil { return err } return nil @@ -608,7 +663,7 @@ func heapMB() uint64 { return m.HeapAlloc / 1024 / 1024 } -func writeToDatabase(ctx context.Context, tx pgx.Tx, rows vulndbRows, exploits []models.Exploit, mal malRows, epssData map[string]dtos.EPSS, kevEntries []CISAKEVEntry) error { +func writeToDatabase(ctx context.Context, tx pgx.Tx, rows vulndbRows, exploits []models.Exploit, mal malRows, epssData map[string]dtos.EPSS, kevEntries []KEVEntry, euvdRelationships []models.CVERelationship) error { slog.Info("start writing rows to database", "heap_alloc_mb", heapMB()) start := time.Now() @@ -636,6 +691,12 @@ func writeToDatabase(ctx context.Context, tx pgx.Tx, rows vulndbRows, exploits [ } slog.Info("copied cve_relationships to staging", "count", len(rows.CVERelationships), "took", time.Since(t), "heap_alloc_mb", heapMB()) + t = time.Now() + if err := InsertCVERelationshipsBulk(ctx, tx, euvdRelationships, "cve_relationships_stage"); err != nil { + return fmt.Errorf("could not copy euvd cve relationships to staging: %w", err) + } + slog.Info("copied euvd cve_relationships to staging", "count", len(euvdRelationships), "took", time.Since(t), "heap_alloc_mb", heapMB()) + t = time.Now() if err := insertAffectedComponentsBulk(ctx, tx, rows.AffectedComponents, "affected_components_stage"); err != nil { return fmt.Errorf("could not copy affected_components to staging: %w", err) @@ -667,7 +728,7 @@ func writeToDatabase(ctx context.Context, tx pgx.Tx, rows vulndbRows, exploits [ slog.Info("inserted epss", "count", len(epssData), "took", time.Since(t), "heap_alloc_mb", heapMB()) t = time.Now() - if err := InsertCISAKEVBulk(ctx, tx, kevEntries); err != nil { + if err := InsertKEVBulk(ctx, tx, kevEntries); err != nil { return fmt.Errorf("could not insert cisa kev: %w", err) } slog.Info("inserted cisa_kev", "count", len(kevEntries), "took", time.Since(t), "heap_alloc_mb", heapMB()) @@ -741,11 +802,11 @@ func (s *VulnDBService) tryApplyQuickDiff(ctx context.Context, tx pgx.Tx, workin return false, fmt.Errorf("quick-diff: could not apply epss: %w", err) } - var kevEntries []CISAKEVEntry + var kevEntries []KEVEntry if err := readGobFile(workingDir+"/cisakev.gob", &kevEntries); err != nil { return false, fmt.Errorf("quick-diff: could not read cisakev.gob: %w", err) } - if err := InsertCISAKEVBulk(ctx, tx, kevEntries); err != nil { + if err := InsertKEVBulk(ctx, tx, kevEntries); err != nil { return false, fmt.Errorf("quick-diff: could not apply cisa kev: %w", err) } @@ -857,8 +918,7 @@ func pullVulnDBDebug(ctx context.Context) (string, string, error) { } func pullVulnDBFromOCI(ctx context.Context) (string, string, error) { - reg := "ghcr.io/l3montree-dev/devguard/vulndb/v2" - repo, err := remote.NewRepository(reg) + repo, err := remote.NewRepository(vulnDBURI) if err != nil { return "", "", fmt.Errorf("could not connect to remote repository: %w", err) } @@ -907,7 +967,7 @@ func pullVulnDBFromOCI(ctx context.Context) (string, string, error) { // streamToDatabase drains all three input channels in a single goroutine, writes all rows // into staging tables, then syncs to live tables. When direct=true (empty DB) it uses // flushStagingTables (simple INSERT) instead of SyncAllTables (expensive EXCEPT diff). -func streamToDatabase(ctx context.Context, tx pgx.Tx, vulnRowsIn <-chan vulndbRows, exploitsIn <-chan []models.Exploit, malPkgIn <-chan malRows) error { +func streamToDatabase(ctx context.Context, tx pgx.Tx, vulnRowsIn <-chan vulndbRows, exploitsIn <-chan []models.Exploit, malPkgIn <-chan malRows, euvdRelationships []models.CVERelationship) error { slog.Info("start writing rows to database") start := time.Now() @@ -1013,6 +1073,11 @@ func streamToDatabase(ctx context.Context, tx pgx.Tx, vulnRowsIn <-chan vulndbRo } } + if err := InsertCVERelationshipsBulk(ctx, tx, euvdRelationships, "cve_relationships_stage"); err != nil { + return fmt.Errorf("could not insert euvd cve relationships: %w", err) + } + relationshipCount += len(euvdRelationships) + if err := SyncAllTables(ctx, tx); err != nil { return fmt.Errorf("could not sync staging tables to live: %w", err) } @@ -1029,3 +1094,33 @@ func streamToDatabase(ctx context.Context, tx pgx.Tx, vulnRowsIn <-chan vulndbRo ) return nil } + +// builds the union of 2 KEV slices - if we have duplicate entries from the EUVD we overwrite the CISA data with the EUVD data +func mergeKEVInformation(cisaKEV, euvdKEV []models.CVE) []models.CVE { + cveIDToKev := make(map[string]models.CVE, len(cisaKEV)+len(euvdKEV)) + + // first fill the map with all KEV data from the CISA + for i := range cisaKEV { + cveIDToKev[cisaKEV[i].CVE] = cisaKEV[i] + } + + // then for each KEV entry from the EUVD check if it already exists and if it does merge existing information + for _, kev := range euvdKEV { + if cve, ok := cveIDToKev[kev.CVE]; ok { + cve.EUVDExploitAdd = kev.EUVDExploitAdd + if kev.CISAExploitAdd != nil { + cve.CISAExploitAdd = kev.CISAExploitAdd + } + cveIDToKev[kev.CVE] = cve + } else { + cveIDToKev[kev.CVE] = kev + } + } + + // at the end build a slice from the map + unionSlice := make([]models.CVE, 0, len(cveIDToKev)) + for _, cve := range cveIDToKev { + unionSlice = append(unionSlice, cve) + } + return unionSlice +}