1616package vulndb
1717
1818import (
19- "archive/tar"
20- "compress/gzip"
2119 "context"
22- "encoding/json"
2320 "fmt"
24- "io"
2521 "log/slog"
2622 "net/http"
2723 "strings"
28- "sync"
2924 "time"
3025
3126 "github.com/jackc/pgx/v5"
@@ -50,12 +45,11 @@ type MaliciousPackageChecker struct {
5045 httpClient * http.Client
5146}
5247
53- type malRow struct {
48+ type malRows struct {
5449 pkgs []models.MaliciousPackage
5550 comps []models.MaliciousAffectedComponent
5651}
5752
58-
5953func NewMaliciousPackageChecker (
6054 repository * repositories.MaliciousPackageRepository ,
6155) (* MaliciousPackageChecker , error ) {
@@ -68,145 +62,6 @@ func NewMaliciousPackageChecker(
6862
6963// FetchAll downloads the malicious packages archive and returns all parsed packages
7064// and affected components without touching the database.
71- func (c * MaliciousPackageChecker ) FetchAll (ctx context.Context ) ([]models.MaliciousPackage , []models.MaliciousAffectedComponent , error ) {
72- slog .Info ("Downloading malicious packages archive" , "url" , c .repoURL )
73- req , err := http .NewRequestWithContext (ctx , http .MethodGet , c .repoURL , nil )
74- if err != nil {
75- return nil , nil , fmt .Errorf ("failed to create download request: %w" , err )
76- }
77- resp , err := c .httpClient .Do (req )
78- if err != nil {
79- return nil , nil , fmt .Errorf ("failed to download archive: %w" , err )
80- }
81- defer resp .Body .Close ()
82-
83- if resp .StatusCode != http .StatusOK {
84- return nil , nil , fmt .Errorf ("failed to download archive: HTTP %d" , resp .StatusCode )
85- }
86-
87- gzr , err := gzip .NewReader (resp .Body )
88- if err != nil {
89- return nil , nil , fmt .Errorf ("failed to create gzip reader: %w" , err )
90- }
91- defer gzr .Close ()
92-
93- tr := tar .NewReader (gzr )
94- ecosystems := []string {"npm" , "go" , "maven" , "pypi" , "crates.io" }
95-
96- processWG := & sync.WaitGroup {}
97- collectWG := & sync.WaitGroup {}
98-
99- fileJobs := make (chan []byte , malPkgNumOfGoRoutines * 20 )
100- resultJobs := make (chan processingResults , BatchSize * 2 )
101-
102- for range malPkgNumOfGoRoutines {
103- processWG .Add (1 )
104- go processMaliciousPackageFile (processWG , fileJobs , resultJobs )
105- }
106-
107- var (
108- packages []models.MaliciousPackage
109- components []models.MaliciousAffectedComponent
110- mu sync.Mutex
111- )
112- collectWG .Add (1 )
113- go func () {
114- defer collectWG .Done ()
115- for r := range resultJobs {
116- // pre-compute component IDs so they are stable in the gob file
117- for i := range r .AffectedComponents {
118- if r .AffectedComponents [i ].ID == "" {
119- r .AffectedComponents [i ].ID = r .AffectedComponents [i ].CalculateHash ()
120- }
121- }
122- mu .Lock ()
123- packages = append (packages , r .Package )
124- components = append (components , r .AffectedComponents ... )
125- mu .Unlock ()
126- }
127- }()
128-
129- for {
130- header , err := tr .Next ()
131- if err != nil {
132- if err == io .EOF {
133- break
134- }
135- return nil , nil , fmt .Errorf ("failed to read tar: %w" , err )
136- }
137- if ! strings .HasSuffix (header .Name , ".json" ) || header .Typeflag != tar .TypeReg {
138- continue
139- }
140- isTarget := false
141- for _ , eco := range ecosystems {
142- if strings .Contains (header .Name , "/osv/malicious/" + eco + "/" ) {
143- isTarget = true
144- break
145- }
146- }
147- if ! isTarget {
148- continue
149- }
150- data , err := io .ReadAll (tr )
151- if err != nil {
152- slog .Debug ("Failed to read file from tar" , "name" , header .Name , "error" , err )
153- continue
154- }
155- fileJobs <- data
156- }
157-
158- close (fileJobs )
159- processWG .Wait ()
160- close (resultJobs )
161- collectWG .Wait ()
162-
163- slog .Info ("Fetched malicious packages" , "packages" , len (packages ), "components" , len (components ))
164- return packages , components , nil
165- }
166-
167- type processingResults struct {
168- Package models.MaliciousPackage
169- AffectedComponents []models.MaliciousAffectedComponent
170- }
171-
172- // this function grabs json file contents from the jobs channel and builds the package as well as the affected components from it. These are then sent to the db worker function
173- func processMaliciousPackageFile (waitGroup * sync.WaitGroup , jobs chan []byte , results chan processingResults ) {
174- defer waitGroup .Done ()
175- for data := range jobs {
176- var entry dtos.OSV
177- if err := json .Unmarshal (data , & entry ); err != nil {
178- slog .Debug ("Failed to unmarshal JSON" , "error" , err )
179- continue
180- }
181-
182- if entry .ID == "" {
183- slog .Warn ("Skipping malicious package with empty ID" , "summary" , entry .Summary )
184- continue
185- }
186-
187- if len (entry .Affected ) == 0 {
188- continue
189- }
190-
191- // Create malicious package record
192- pkg := models.MaliciousPackage {
193- ID : entry .ID ,
194- Summary : entry .Summary ,
195- Details : entry .Details ,
196- Published : entry .Published ,
197- Modified : entry .Modified ,
198- }
199-
200- // Create affected components
201- components := transformer .MaliciousAffectedComponentFromOSV (entry , entry .ID )
202- // send both as a job to the db writer function
203- results <- processingResults {
204- Package : pkg ,
205- AffectedComponents : components ,
206- }
207- }
208- }
209-
21065func buildFakePackages () ([]models.MaliciousPackage , []models.MaliciousAffectedComponent ) {
21166 testPackages := map [string ][]string {
21267 "npm" : {"fake-malicious-npm-package" , "@fake-org/malicious-package" },
@@ -223,8 +78,8 @@ func buildFakePackages() ([]models.MaliciousPackage, []models.MaliciousAffectedC
22378 for ecosystem , pkgNames := range testPackages {
22479 for _ , pkgName := range pkgNames {
22580 normalizedPkgName := strings .NewReplacer ("/" , "-" , "@" , "-" , ":" , "-" , "." , "-" ).Replace (pkgName )
226- fakeID := fmt .Sprintf ("FAKE-TEST-%s-%s" , strings .ToUpper (ecosystem ), strings .ToUpper (normalizedPkgName ))
227- fakeEntry := dtos.OSV {
81+ fakeID := fmt .Sprintf ("MAL- FAKE-TEST-%s-%s" , strings .ToUpper (ecosystem ), strings .ToUpper (normalizedPkgName ))
82+ fakeEntry := & dtos.OSV {
22883 ID : fakeID ,
22984 Summary : fmt .Sprintf ("Fake malicious %s package for testing" , ecosystem ),
23085 Details : "This is a fake malicious package entry used for testing the dependency proxy" ,
0 commit comments