Skip to content

Commit ec60ee9

Browse files
committed
upload with gcs in go
1 parent 6c15d89 commit ec60ee9

2 files changed

Lines changed: 73 additions & 54 deletions

File tree

go/cmd/first_package_finder/main.go

Lines changed: 73 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,20 @@ package main
44
import (
55
"bufio"
66
"compress/gzip"
7+
"context"
78
"encoding/csv"
89
"encoding/json"
910
"errors"
1011
"flag"
1112
"fmt"
12-
"log"
13+
"log/slog"
1314
"net/http"
1415
"os"
1516
"path/filepath"
1617
"strings"
1718
"time"
19+
20+
"cloud.google.com/go/storage"
1821
)
1922

2023
const (
@@ -39,6 +42,16 @@ var (
3942
firstSnapshotDate = time.Date(2005, 3, 12, 0, 0, 0, 0, time.UTC)
4043
)
4144

45+
// HTTPError represents an error from an HTTP request, including the status code.
46+
type HTTPError struct {
47+
StatusCode int
48+
URL string
49+
}
50+
51+
func (e *HTTPError) Error() string {
52+
return fmt.Sprintf("HTTP %d for URL: %s", e.StatusCode, e.URL)
53+
}
54+
4255
func convertDatetimeToStrDatetime(t time.Time) string {
4356
return t.UTC().Format("20060102T150405Z")
4457
}
@@ -56,7 +69,8 @@ type DebianVersionInfo struct {
5669
}
5770

5871
func retrieveCodenameToVersion() (map[string]*DebianVersionInfo, error) {
59-
resp, err := http.Get(debianReleaseVersionsURL)
72+
client := &http.Client{Timeout: 30 * time.Second}
73+
resp, err := client.Get(debianReleaseVersionsURL)
6074
if err != nil {
6175
return nil, err
6276
}
@@ -116,7 +130,7 @@ func retrieveCodenameToVersion() (map[string]*DebianVersionInfo, error) {
116130

117131
releaseDate, err := time.Parse("2006-01-02", releaseStr)
118132
if err != nil {
119-
log.Printf("Warning: failed to parse date %s for series %s", releaseStr, series)
133+
slog.Warn("Failed to parse date", "date", releaseStr, "series", series)
120134
continue
121135
}
122136

@@ -141,15 +155,16 @@ func parseCreatedDatesAndSetTime(date time.Time) time.Time {
141155

142156
func loadSources(date time.Time, dist string) (map[string]string, error) {
143157
url := getDebianSourcesURL(date, dist)
144-
//nolint:gosec // URL is constructed from trusted source
145-
resp, err := http.Get(url)
158+
159+
client := &http.Client{Timeout: 30 * time.Second}
160+
resp, err := client.Get(url)
146161
if err != nil {
147162
return nil, err
148163
}
149164
defer resp.Body.Close()
150165

151166
if resp.StatusCode != http.StatusOK {
152-
return nil, fmt.Errorf("HTTP %d", resp.StatusCode)
167+
return nil, &HTTPError{StatusCode: resp.StatusCode, URL: url}
153168
}
154169

155170
gzReader, err := gzip.NewReader(resp.Body)
@@ -191,18 +206,19 @@ func loadFirstPackages() (map[string]*DebianVersionInfo, error) {
191206
date := parseCreatedDatesAndSetTime(info.Release)
192207
for i := 0; i <= firstReleaseLookahead; i++ {
193208
actualDate := date.Add(time.Duration(i) * 24 * time.Hour)
194-
log.Printf("attempting load of version %s at %s", series, actualDate)
209+
slog.Info("Attempting load of version", "series", series, "date", actualDate)
195210

196211
sources, err := loadSources(actualDate, series)
197212
if err == nil {
198213
info.Sources = sources
199-
log.Printf("loaded version %s at %s", series, actualDate)
214+
slog.Info("Loaded version", "series", series, "date", actualDate)
200215

201216
break
202217
}
203218

204-
if !strings.Contains(err.Error(), "HTTP 404") {
205-
log.Printf("Error loading sources for %s at %s: %v", series, actualDate, err)
219+
var httpErr *HTTPError
220+
if !errors.As(err, &httpErr) || httpErr.StatusCode != http.StatusNotFound {
221+
slog.Error("Error loading sources", "series", series, "date", actualDate, "err", err)
206222
}
207223

208224
if actualDate.After(time.Now()) {
@@ -216,38 +232,75 @@ func loadFirstPackages() (map[string]*DebianVersionInfo, error) {
216232

217233
func main() {
218234
var outputDir string
235+
var uploadToGCS bool
236+
var outputBucket string
237+
219238
flag.StringVar(&outputDir, "o", "first_package_output", "Output folder")
220239
flag.StringVar(&outputDir, "output-dir", "first_package_output", "Output folder")
240+
flag.BoolVar(&uploadToGCS, "upload-to-gcs", false, "Upload to GCS")
241+
flag.StringVar(&outputBucket, "output-bucket", "debian-osv", "Output bucket")
221242
flag.Parse()
222243

223-
log.SetFlags(log.LstdFlags | log.Lshortfile)
244+
flag.Parse()
224245

225246
codenameToVersion, err := loadFirstPackages()
226247
if err != nil {
227-
log.Fatalf("Failed to load first packages: %v", err)
248+
slog.Error("Failed to load first packages", "err", err)
249+
os.Exit(1)
228250
}
229251

230-
log.Println("first_package loaded, begin writing out data")
252+
slog.Info("first_package loaded, begin writing out data")
231253

232-
if err := os.MkdirAll(outputDir, 0755); err != nil {
233-
log.Fatalf("Failed to create output directory: %v", err)
254+
var outBkt *storage.BucketHandle
255+
var ctx context.Context
256+
if uploadToGCS {
257+
ctx = context.Background()
258+
storageClient, err := storage.NewClient(ctx)
259+
if err != nil {
260+
slog.Error("Failed to create storage client", "err", err)
261+
os.Exit(1)
262+
}
263+
outBkt = storageClient.Bucket(outputBucket)
264+
} else {
265+
if err := os.MkdirAll(outputDir, 0755); err != nil {
266+
slog.Error("Failed to create output directory", "err", err)
267+
os.Exit(1)
268+
}
234269
}
235270

236271
for _, info := range codenameToVersion {
237272
if info.Sources == nil {
238273
continue
239274
}
240275

241-
outPath := filepath.Join(outputDir, info.Version+".json")
242276
b, err := json.Marshal(info.Sources)
243277
if err != nil {
244-
log.Printf("Failed to marshal sources for %s: %v", info.Version, err)
278+
slog.Error("Failed to marshal sources", "version", info.Version, "err", err)
245279
continue
246280
}
247281

248-
//nolint:gosec // 0644 is fine for public vulnerability data
249-
if err := os.WriteFile(outPath, b, 0644); err != nil {
250-
log.Printf("Failed to write to %s: %v", outPath, err)
282+
if uploadToGCS {
283+
objName := filepath.Join(outputDir, info.Version+".json")
284+
obj := outBkt.Object(objName)
285+
wc := obj.NewWriter(ctx)
286+
wc.ContentType = "application/json"
287+
if _, err := wc.Write(b); err != nil {
288+
slog.Error("Failed to write to GCS object", "objName", objName, "err", err)
289+
wc.Close()
290+
291+
continue
292+
}
293+
if err := wc.Close(); err != nil {
294+
slog.Error("Failed to close GCS writer", "objName", objName, "err", err)
295+
}
296+
slog.Info("Uploaded to GCS", "objName", objName)
297+
} else {
298+
outPath := filepath.Join(outputDir, info.Version+".json")
299+
//nolint:gosec // 0644 is fine for public vulnerability data
300+
if err := os.WriteFile(outPath, b, 0644); err != nil {
301+
slog.Error("Failed to write to file", "outPath", outPath, "err", err)
302+
}
251303
}
252304
}
305+
slog.Info("Finished")
253306
}

go/cmd/first_package_finder/run_first_package_finder.sh

Lines changed: 0 additions & 34 deletions
This file was deleted.

0 commit comments

Comments
 (0)