From 0847d855bb82da64ad568d96c0516f1bac8423be Mon Sep 17 00:00:00 2001 From: "Calvin A. Allen" Date: Tue, 16 Dec 2025 13:07:50 -0500 Subject: [PATCH 1/5] feat(infra): add binary mirroring workflows and tool Add infrastructure to mirror all runtime binaries to builds.dtvem.io: - scripts/mirror-binaries: Go tool to download from upstream and upload to R2 - Supports parallel downloads with configurable workers - Verifies SHA256 checksums when available - Sync-only mode to skip files already in R2 - Dry-run mode for testing - mirror-all.yml: Bulk migration workflow - Matrix strategy to parallelize by runtime - Can run for specific runtime or all at once - Supports dry-run mode - mirror-sync.yml: Periodic sync workflow - Triggers after manifest deployments - Only uploads new files not already in R2 - Can also be triggered manually This enables dtvem to be the single source for all runtime binaries, improving reliability and consistency. Part of #159 --- .github/workflows/mirror-all.yml | 80 +++++++ .github/workflows/mirror-sync.yml | 70 ++++++ scripts/mirror-binaries/go.mod | 27 +++ scripts/mirror-binaries/go.sum | 36 +++ scripts/mirror-binaries/main.go | 372 ++++++++++++++++++++++++++++++ 5 files changed, 585 insertions(+) create mode 100644 .github/workflows/mirror-all.yml create mode 100644 .github/workflows/mirror-sync.yml create mode 100644 scripts/mirror-binaries/go.mod create mode 100644 scripts/mirror-binaries/go.sum create mode 100644 scripts/mirror-binaries/main.go diff --git a/.github/workflows/mirror-all.yml b/.github/workflows/mirror-all.yml new file mode 100644 index 0000000..bdb4ab7 --- /dev/null +++ b/.github/workflows/mirror-all.yml @@ -0,0 +1,80 @@ +name: Mirror All Binaries + +on: + workflow_dispatch: + inputs: + runtime: + description: 'Runtime to mirror (node, python, ruby, or all)' + required: true + default: 'all' + type: choice + options: + - all + - node + - python + - ruby + dry_run: + description: 'Dry run (report only, no uploads)' + required: false + default: false + type: boolean + +jobs: + mirror: + name: Mirror ${{ matrix.runtime }} + runs-on: ubuntu-latest + timeout-minutes: 360 # 6 hours max + strategy: + fail-fast: false + matrix: + runtime: ${{ inputs.runtime == 'all' && fromJson('["node", "python", "ruby"]') || fromJson(format('["{0}"]', inputs.runtime)) }} + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version-file: 'go.mod' + + - name: Build mirror tool + run: | + cd scripts/mirror-binaries + go build -o mirror-binaries . + + - name: Mirror binaries (dry run) + if: inputs.dry_run + run: | + ./scripts/mirror-binaries/mirror-binaries \ + --runtime=${{ matrix.runtime }} \ + --manifest-dir=src/internal/manifest/data \ + --dry-run + + - name: Mirror binaries + if: ${{ !inputs.dry_run }} + env: + R2_ENDPOINT: https://${{ secrets.CLOUDFLARE_ACCOUNT_ID }}.r2.cloudflarestorage.com + R2_BUCKET: ${{ secrets.CLOUDFLARE_R2_BUILDS_BUCKET }} + R2_ACCESS_KEY: ${{ secrets.CLOUDFLARE_R2_ACCESS_KEY_ID }} + R2_SECRET_KEY: ${{ secrets.CLOUDFLARE_R2_SECRET_ACCESS_KEY }} + run: | + ./scripts/mirror-binaries/mirror-binaries \ + --runtime=${{ matrix.runtime }} \ + --manifest-dir=src/internal/manifest/data \ + --r2-endpoint="$R2_ENDPOINT" \ + --r2-bucket="$R2_BUCKET" \ + --r2-access-key="$R2_ACCESS_KEY" \ + --r2-secret-key="$R2_SECRET_KEY" \ + --workers=20 + + - name: Generate summary + if: always() + run: | + echo "## Mirror Results for ${{ matrix.runtime }}" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + if [ "${{ inputs.dry_run }}" = "true" ]; then + echo "**Mode:** Dry run (no uploads)" >> $GITHUB_STEP_SUMMARY + else + echo "**Mode:** Live upload to R2" >> $GITHUB_STEP_SUMMARY + fi diff --git a/.github/workflows/mirror-sync.yml b/.github/workflows/mirror-sync.yml new file mode 100644 index 0000000..7a28e81 --- /dev/null +++ b/.github/workflows/mirror-sync.yml @@ -0,0 +1,70 @@ +name: Mirror Sync + +on: + # Run after manifest updates are deployed + workflow_run: + workflows: ["Deploy Manifests"] + types: + - completed + # Manual trigger + workflow_dispatch: + inputs: + runtime: + description: 'Runtime to sync (node, python, ruby, or all)' + required: true + default: 'all' + type: choice + options: + - all + - node + - python + - ruby + +jobs: + sync: + name: Sync ${{ matrix.runtime }} + runs-on: ubuntu-latest + if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }} + timeout-minutes: 120 # 2 hours max for sync + strategy: + fail-fast: false + matrix: + runtime: ${{ (github.event_name == 'workflow_dispatch' && inputs.runtime == 'all') && fromJson('["node", "python", "ruby"]') || (github.event_name == 'workflow_dispatch' && fromJson(format('["{0}"]', inputs.runtime))) || fromJson('["node", "python", "ruby"]') }} + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version-file: 'go.mod' + + - name: Build mirror tool + run: | + cd scripts/mirror-binaries + go build -o mirror-binaries . + + - name: Sync new binaries + env: + R2_ENDPOINT: https://${{ secrets.CLOUDFLARE_ACCOUNT_ID }}.r2.cloudflarestorage.com + R2_BUCKET: ${{ secrets.CLOUDFLARE_R2_BUILDS_BUCKET }} + R2_ACCESS_KEY: ${{ secrets.CLOUDFLARE_R2_ACCESS_KEY_ID }} + R2_SECRET_KEY: ${{ secrets.CLOUDFLARE_R2_SECRET_ACCESS_KEY }} + run: | + ./scripts/mirror-binaries/mirror-binaries \ + --runtime=${{ matrix.runtime }} \ + --manifest-dir=src/internal/manifest/data \ + --r2-endpoint="$R2_ENDPOINT" \ + --r2-bucket="$R2_BUCKET" \ + --r2-access-key="$R2_ACCESS_KEY" \ + --r2-secret-key="$R2_SECRET_KEY" \ + --sync-only \ + --workers=10 + + - name: Generate summary + if: always() + run: | + echo "## Sync Results for ${{ matrix.runtime }}" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "Synced new binaries not already present in R2." >> $GITHUB_STEP_SUMMARY diff --git a/scripts/mirror-binaries/go.mod b/scripts/mirror-binaries/go.mod new file mode 100644 index 0000000..c856aae --- /dev/null +++ b/scripts/mirror-binaries/go.mod @@ -0,0 +1,27 @@ +module github.com/dtvem/dtvem/scripts/mirror-binaries + +go 1.23.0 + +require ( + github.com/aws/aws-sdk-go-v2 v1.32.6 + github.com/aws/aws-sdk-go-v2/config v1.28.6 + github.com/aws/aws-sdk-go-v2/credentials v1.17.47 + github.com/aws/aws-sdk-go-v2/service/s3 v1.71.0 +) + +require ( + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.7 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.21 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.25 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.25 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.25 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.1 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.4.6 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.6 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.6 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.24.7 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.6 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.33.2 // indirect + github.com/aws/smithy-go v1.22.1 // indirect +) diff --git a/scripts/mirror-binaries/go.sum b/scripts/mirror-binaries/go.sum new file mode 100644 index 0000000..8c425bd --- /dev/null +++ b/scripts/mirror-binaries/go.sum @@ -0,0 +1,36 @@ +github.com/aws/aws-sdk-go-v2 v1.32.6 h1:7BokKRgRPuGmKkFMhEg/jSul+tB9VvXhcViILtfG8b4= +github.com/aws/aws-sdk-go-v2 v1.32.6/go.mod h1:P5WJBrYqqbWVaOxgH0X/FYYD47/nooaPOZPlQdmiN2U= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.7 h1:lL7IfaFzngfx0ZwUGOZdsFFnQ5uLvR0hWqqhyE7Q9M8= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.7/go.mod h1:QraP0UcVlQJsmHfioCrveWOC1nbiWUl3ej08h4mXWoc= +github.com/aws/aws-sdk-go-v2/config v1.28.6 h1:D89IKtGrs/I3QXOLNTH93NJYtDhm8SYa9Q5CsPShmyo= +github.com/aws/aws-sdk-go-v2/config v1.28.6/go.mod h1:GDzxJ5wyyFSCoLkS+UhGB0dArhb9mI+Co4dHtoTxbko= +github.com/aws/aws-sdk-go-v2/credentials v1.17.47 h1:48bA+3/fCdi2yAwVt+3COvmatZ6jUDNkDTIsqDiMUdw= +github.com/aws/aws-sdk-go-v2/credentials v1.17.47/go.mod h1:+KdckOejLW3Ks3b0E3b5rHsr2f9yuORBum0WPnE5o5w= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.21 h1:AmoU1pziydclFT/xRV+xXE/Vb8fttJCLRPv8oAkprc0= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.21/go.mod h1:AjUdLYe4Tgs6kpH4Bv7uMZo7pottoyHMn4eTcIcneaY= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.25 h1:s/fF4+yDQDoElYhfIVvSNyeCydfbuTKzhxSXDXCPasU= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.25/go.mod h1:IgPfDv5jqFIzQSNbUEMoitNooSMXjRSDkhXv8jiROvU= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.25 h1:ZntTCl5EsYnhN/IygQEUugpdwbhdkom9uHcbCftiGgA= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.25/go.mod h1:DBdPrgeocww+CSl1C8cEV8PN1mHMBhuCDLpXezyvWkE= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 h1:VaRN3TlFdd6KxX1x3ILT5ynH6HvKgqdiXoTxAF4HQcQ= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1/go.mod h1:FbtygfRFze9usAadmnGJNc8KsP346kEe+y2/oyhGAGc= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.25 h1:r67ps7oHCYnflpgDy2LZU0MAQtQbYIOqNNnqGO6xQkE= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.25/go.mod h1:GrGY+Q4fIokYLtjCVB/aFfCVL6hhGUFl8inD18fDalE= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.1 h1:iXtILhvDxB6kPvEXgsDhGaZCSC6LQET5ZHSdJozeI0Y= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.1/go.mod h1:9nu0fVANtYiAePIBh2/pFUSwtJ402hLnp854CNoDOeE= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.4.6 h1:HCpPsWqmYQieU7SS6E9HXfdAMSud0pteVXieJmcpIRI= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.4.6/go.mod h1:ngUiVRCco++u+soRRVBIvBZxSMMvOVMXA4PJ36JLfSw= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.6 h1:50+XsN70RS7dwJ2CkVNXzj7U2L1HKP8nqTd3XWEXBN4= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.6/go.mod h1:WqgLmwY7so32kG01zD8CPTJWVWM+TzJoOVHwTg4aPug= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.6 h1:BbGDtTi0T1DYlmjBiCr/le3wzhA37O8QTC5/Ab8+EXk= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.6/go.mod h1:hLMJt7Q8ePgViKupeymbqI0la+t9/iYFBjxQCFwuAwI= +github.com/aws/aws-sdk-go-v2/service/s3 v1.71.0 h1:nyuzXooUNJexRT0Oy0UQY6AhOzxPxhtt4DcBIHyCnmw= +github.com/aws/aws-sdk-go-v2/service/s3 v1.71.0/go.mod h1:sT/iQz8JK3u/5gZkT+Hmr7GzVZehUMkRZpOaAwYXeGY= +github.com/aws/aws-sdk-go-v2/service/sso v1.24.7 h1:rLnYAfXQ3YAccocshIH5mzNNwZBkBo+bP6EhIxak6Hw= +github.com/aws/aws-sdk-go-v2/service/sso v1.24.7/go.mod h1:ZHtuQJ6t9A/+YDuxOLnbryAmITtr8UysSny3qcyvJTc= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.6 h1:JnhTZR3PiYDNKlXy50/pNeix9aGMo6lLpXwJ1mw8MD4= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.6/go.mod h1:URronUEGfXZN1VpdktPSD1EkAL9mfrV+2F4sjH38qOY= +github.com/aws/aws-sdk-go-v2/service/sts v1.33.2 h1:s4074ZO1Hk8qv65GqNXqDjmkf4HSQqJukaLuuW0TpDA= +github.com/aws/aws-sdk-go-v2/service/sts v1.33.2/go.mod h1:mVggCnIWoM09jP71Wh+ea7+5gAp53q+49wDFs1SW5z8= +github.com/aws/smithy-go v1.22.1 h1:/HPHZQ0g7f4eUeK6HKglFz8uwVfZKgoI25rb/J+dnro= +github.com/aws/smithy-go v1.22.1/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= diff --git a/scripts/mirror-binaries/main.go b/scripts/mirror-binaries/main.go new file mode 100644 index 0000000..527f0a9 --- /dev/null +++ b/scripts/mirror-binaries/main.go @@ -0,0 +1,372 @@ +package main + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "encoding/json" + "flag" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/credentials" + "github.com/aws/aws-sdk-go-v2/service/s3" +) + +// Manifest represents the structure of a runtime manifest +type Manifest struct { + Versions map[string]map[string]*Download `json:"versions"` +} + +// Download represents a single download entry +type Download struct { + URL string `json:"url"` + SHA256 string `json:"sha256,omitempty"` +} + +// MirrorJob represents a single file to mirror +type MirrorJob struct { + Runtime string + Version string + Platform string + URL string + SHA256 string + R2Key string +} + +// Stats tracks mirroring statistics +type Stats struct { + Total int64 + Skipped int64 + Mirrored int64 + Failed int64 + BytesDown int64 +} + +var ( + runtime = flag.String("runtime", "", "Runtime to mirror (node, python, ruby, or all)") + dryRun = flag.Bool("dry-run", false, "Report what would be done without doing it") + syncOnly = flag.Bool("sync-only", false, "Only mirror files not already in R2") + manifestDir = flag.String("manifest-dir", "src/internal/manifest/data", "Directory containing manifest files") + r2Endpoint = flag.String("r2-endpoint", "", "R2 endpoint URL") + r2Bucket = flag.String("r2-bucket", "", "R2 bucket name") + r2AccessKey = flag.String("r2-access-key", "", "R2 access key ID") + r2SecretKey = flag.String("r2-secret-key", "", "R2 secret access key") + workers = flag.Int("workers", 10, "Number of parallel workers") + retries = flag.Int("retries", 3, "Number of retries for failed downloads") +) + +func main() { + flag.Parse() + + if *runtime == "" { + fmt.Fprintln(os.Stderr, "Error: --runtime is required (node, python, ruby, or all)") + os.Exit(1) + } + + if !*dryRun { + if *r2Endpoint == "" || *r2Bucket == "" || *r2AccessKey == "" || *r2SecretKey == "" { + fmt.Fprintln(os.Stderr, "Error: R2 credentials required (--r2-endpoint, --r2-bucket, --r2-access-key, --r2-secret-key)") + os.Exit(1) + } + } + + runtimes := []string{*runtime} + if *runtime == "all" { + runtimes = []string{"node", "python", "ruby"} + } + + // Initialize S3 client for R2 + var s3Client *s3.Client + var existingKeys map[string]bool + + if !*dryRun { + var err error + s3Client, err = createS3Client() + if err != nil { + fmt.Fprintf(os.Stderr, "Error creating S3 client: %v\n", err) + os.Exit(1) + } + + if *syncOnly { + fmt.Println("Fetching existing files from R2...") + existingKeys, err = listExistingKeys(s3Client) + if err != nil { + fmt.Fprintf(os.Stderr, "Error listing R2 contents: %v\n", err) + os.Exit(1) + } + fmt.Printf("Found %d existing files in R2\n", len(existingKeys)) + } + } + + // Collect all jobs + var jobs []MirrorJob + for _, rt := range runtimes { + manifestPath := filepath.Join(*manifestDir, rt+".json") + rtJobs, err := loadJobs(rt, manifestPath) + if err != nil { + fmt.Fprintf(os.Stderr, "Error loading manifest for %s: %v\n", rt, err) + os.Exit(1) + } + jobs = append(jobs, rtJobs...) + } + + fmt.Printf("Total jobs to process: %d\n", len(jobs)) + + if *dryRun { + fmt.Println("\n[DRY RUN] Would mirror the following files:") + for _, job := range jobs { + fmt.Printf(" %s -> %s\n", job.URL, job.R2Key) + } + fmt.Printf("\nTotal: %d files\n", len(jobs)) + return + } + + // Filter jobs if sync-only + if *syncOnly && existingKeys != nil { + var filtered []MirrorJob + for _, job := range jobs { + if !existingKeys[job.R2Key] { + filtered = append(filtered, job) + } + } + skipped := len(jobs) - len(filtered) + fmt.Printf("Skipping %d files already in R2, %d remaining\n", skipped, len(filtered)) + jobs = filtered + } + + if len(jobs) == 0 { + fmt.Println("No files to mirror") + return + } + + // Process jobs with worker pool + stats := processJobs(s3Client, jobs) + + // Print summary + fmt.Println("\n=== Mirror Summary ===") + fmt.Printf("Total: %d\n", stats.Total) + fmt.Printf("Mirrored: %d\n", stats.Mirrored) + fmt.Printf("Skipped: %d\n", stats.Skipped) + fmt.Printf("Failed: %d\n", stats.Failed) + fmt.Printf("Bytes: %d MB\n", stats.BytesDown/(1024*1024)) + + if stats.Failed > 0 { + os.Exit(1) + } +} + +func createS3Client() (*s3.Client, error) { + cfg, err := config.LoadDefaultConfig(context.Background(), + config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider( + *r2AccessKey, + *r2SecretKey, + "", + )), + config.WithRegion("auto"), + ) + if err != nil { + return nil, err + } + + client := s3.NewFromConfig(cfg, func(o *s3.Options) { + o.BaseEndpoint = aws.String(*r2Endpoint) + }) + + return client, nil +} + +func listExistingKeys(client *s3.Client) (map[string]bool, error) { + keys := make(map[string]bool) + paginator := s3.NewListObjectsV2Paginator(client, &s3.ListObjectsV2Input{ + Bucket: r2Bucket, + }) + + for paginator.HasMorePages() { + page, err := paginator.NextPage(context.Background()) + if err != nil { + return nil, err + } + for _, obj := range page.Contents { + keys[*obj.Key] = true + } + } + + return keys, nil +} + +func loadJobs(runtime, manifestPath string) ([]MirrorJob, error) { + data, err := os.ReadFile(manifestPath) + if err != nil { + return nil, err + } + + var manifest Manifest + if err := json.Unmarshal(data, &manifest); err != nil { + return nil, err + } + + var jobs []MirrorJob + for version, platforms := range manifest.Versions { + for platform, dl := range platforms { + if dl == nil || dl.URL == "" { + continue + } + + // Determine file extension from URL + ext := getExtension(dl.URL) + r2Key := fmt.Sprintf("%s/%s/%s%s", runtime, version, platform, ext) + + jobs = append(jobs, MirrorJob{ + Runtime: runtime, + Version: version, + Platform: platform, + URL: dl.URL, + SHA256: dl.SHA256, + R2Key: r2Key, + }) + } + } + + return jobs, nil +} + +func getExtension(url string) string { + // Handle common archive extensions + if strings.HasSuffix(url, ".tar.gz") { + return ".tar.gz" + } + if strings.HasSuffix(url, ".tar.xz") { + return ".tar.xz" + } + if strings.HasSuffix(url, ".tar.bz2") { + return ".tar.bz2" + } + if strings.HasSuffix(url, ".zip") { + return ".zip" + } + if strings.HasSuffix(url, ".7z") { + return ".7z" + } + // Fallback: extract from URL + base := filepath.Base(url) + if idx := strings.Index(base, "."); idx != -1 { + return base[idx:] + } + return "" +} + +func processJobs(client *s3.Client, jobs []MirrorJob) *Stats { + stats := &Stats{Total: int64(len(jobs))} + jobChan := make(chan MirrorJob, len(jobs)) + var wg sync.WaitGroup + + // Start workers + for i := 0; i < *workers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for job := range jobChan { + if err := mirrorFile(client, job, stats); err != nil { + fmt.Fprintf(os.Stderr, "Error mirroring %s: %v\n", job.R2Key, err) + atomic.AddInt64(&stats.Failed, 1) + } else { + atomic.AddInt64(&stats.Mirrored, 1) + } + } + }() + } + + // Queue jobs + for _, job := range jobs { + jobChan <- job + } + close(jobChan) + + wg.Wait() + return stats +} + +func mirrorFile(client *s3.Client, job MirrorJob, stats *Stats) error { + var lastErr error + + for attempt := 1; attempt <= *retries; attempt++ { + if attempt > 1 { + fmt.Printf("Retry %d/%d for %s\n", attempt, *retries, job.R2Key) + time.Sleep(time.Duration(attempt) * time.Second) + } + + err := doMirror(client, job, stats) + if err == nil { + return nil + } + lastErr = err + } + + return lastErr +} + +func doMirror(client *s3.Client, job MirrorJob, stats *Stats) error { + // Download file + httpClient := &http.Client{Timeout: 10 * time.Minute} + resp, err := httpClient.Get(job.URL) + if err != nil { + return fmt.Errorf("download failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("download failed: HTTP %d", resp.StatusCode) + } + + // Read body and calculate checksum + body, err := io.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("read failed: %w", err) + } + + atomic.AddInt64(&stats.BytesDown, int64(len(body))) + + // Verify checksum if provided + if job.SHA256 != "" { + hash := sha256.Sum256(body) + actual := hex.EncodeToString(hash[:]) + if actual != job.SHA256 { + return fmt.Errorf("checksum mismatch: expected %s, got %s", job.SHA256, actual) + } + } + + // Determine content type + contentType := "application/octet-stream" + if strings.HasSuffix(job.R2Key, ".tar.gz") { + contentType = "application/gzip" + } else if strings.HasSuffix(job.R2Key, ".zip") { + contentType = "application/zip" + } else if strings.HasSuffix(job.R2Key, ".tar.xz") { + contentType = "application/x-xz" + } + + // Upload to R2 + _, err = client.PutObject(context.Background(), &s3.PutObjectInput{ + Bucket: r2Bucket, + Key: aws.String(job.R2Key), + Body: strings.NewReader(string(body)), + ContentType: aws.String(contentType), + CacheControl: aws.String("public, max-age=31536000, immutable"), + }) + if err != nil { + return fmt.Errorf("upload failed: %w", err) + } + + fmt.Printf("Mirrored: %s (%d bytes)\n", job.R2Key, len(body)) + return nil +} From 84a135f7454651a78e82831f527fa0188f87ed57 Mon Sep 17 00:00:00 2001 From: "Calvin A. Allen" Date: Tue, 16 Dec 2025 13:52:57 -0500 Subject: [PATCH 2/5] feat(infra): add manifest generator and checksum source tracking - Add generate-manifests-from-r2 tool to scan R2 bucket and generate manifests - Update mirror tool to write .meta.json files with checksum source - Add sha256_source field to manifest schema ("upstream" or "dtvem") - Add SHA256Source field to Go manifest.Download struct - Update mirror-sync workflow to weekly schedule (not manifest-triggered) - Add generate-manifests-from-r2.yml workflow --- .../workflows/generate-manifests-from-r2.yml | 125 ++++++++ .github/workflows/mirror-sync.yml | 19 +- schemas/manifest.schema.json | 17 +- scripts/generate-manifests-from-r2/go.mod | 27 ++ scripts/generate-manifests-from-r2/go.sum | 36 +++ scripts/generate-manifests-from-r2/main.go | 299 ++++++++++++++++++ scripts/mirror-binaries/main.go | 141 ++++++--- src/internal/manifest/manifest.go | 6 + 8 files changed, 615 insertions(+), 55 deletions(-) create mode 100644 .github/workflows/generate-manifests-from-r2.yml create mode 100644 scripts/generate-manifests-from-r2/go.mod create mode 100644 scripts/generate-manifests-from-r2/go.sum create mode 100644 scripts/generate-manifests-from-r2/main.go diff --git a/.github/workflows/generate-manifests-from-r2.yml b/.github/workflows/generate-manifests-from-r2.yml new file mode 100644 index 0000000..61f8288 --- /dev/null +++ b/.github/workflows/generate-manifests-from-r2.yml @@ -0,0 +1,125 @@ +name: Generate Manifests from R2 + +on: + # Trigger after mirror sync completes + workflow_run: + workflows: ["Mirror Sync"] + types: + - completed + # Manual trigger + workflow_dispatch: + inputs: + runtime: + description: 'Runtime to generate (node, python, ruby, or all)' + required: true + default: 'all' + type: choice + options: + - all + - node + - python + - ruby + dry_run: + description: 'Dry run (report only, no file changes)' + required: false + default: false + type: boolean + +jobs: + generate: + name: Generate Manifests + runs-on: ubuntu-latest + if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }} + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version-file: 'go.mod' + + - name: Build manifest generator + run: | + cd scripts/generate-manifests-from-r2 + go build -o generate-manifests . + + - name: Generate manifests (dry run) + if: ${{ github.event_name == 'workflow_dispatch' && inputs.dry_run }} + env: + R2_ENDPOINT: https://${{ secrets.CLOUDFLARE_ACCOUNT_ID }}.r2.cloudflarestorage.com + R2_BUCKET: ${{ secrets.CLOUDFLARE_R2_BUILDS_BUCKET }} + R2_ACCESS_KEY: ${{ secrets.CLOUDFLARE_R2_ACCESS_KEY_ID }} + R2_SECRET_KEY: ${{ secrets.CLOUDFLARE_R2_SECRET_ACCESS_KEY }} + run: | + RUNTIME="${{ inputs.runtime || 'all' }}" + ./scripts/generate-manifests-from-r2/generate-manifests \ + --runtime="$RUNTIME" \ + --output-dir=src/internal/manifest/data \ + --base-url="https://builds.dtvem.io" \ + --r2-endpoint="$R2_ENDPOINT" \ + --r2-bucket="$R2_BUCKET" \ + --r2-access-key="$R2_ACCESS_KEY" \ + --r2-secret-key="$R2_SECRET_KEY" \ + --dry-run + + - name: Generate manifests + if: ${{ github.event_name != 'workflow_dispatch' || !inputs.dry_run }} + env: + R2_ENDPOINT: https://${{ secrets.CLOUDFLARE_ACCOUNT_ID }}.r2.cloudflarestorage.com + R2_BUCKET: ${{ secrets.CLOUDFLARE_R2_BUILDS_BUCKET }} + R2_ACCESS_KEY: ${{ secrets.CLOUDFLARE_R2_ACCESS_KEY_ID }} + R2_SECRET_KEY: ${{ secrets.CLOUDFLARE_R2_SECRET_ACCESS_KEY }} + run: | + RUNTIME="${{ inputs.runtime || 'all' }}" + ./scripts/generate-manifests-from-r2/generate-manifests \ + --runtime="$RUNTIME" \ + --output-dir=src/internal/manifest/data \ + --base-url="https://builds.dtvem.io" \ + --r2-endpoint="$R2_ENDPOINT" \ + --r2-bucket="$R2_BUCKET" \ + --r2-access-key="$R2_ACCESS_KEY" \ + --r2-secret-key="$R2_SECRET_KEY" + + - name: Check for changes + id: check-changes + if: ${{ github.event_name != 'workflow_dispatch' || !inputs.dry_run }} + run: | + git diff --quiet src/internal/manifest/data/ || echo "changed=true" >> $GITHUB_OUTPUT + + - name: Create Pull Request + if: ${{ steps.check-changes.outputs.changed == 'true' }} + uses: peter-evans/create-pull-request@v7 + with: + token: ${{ secrets.GITHUB_TOKEN }} + commit-message: 'chore(manifest): regenerate manifests from R2' + title: 'chore(manifest): regenerate manifests from R2' + body: | + Regenerated manifests from binaries hosted on `builds.dtvem.io`. + + This PR was created by the [Generate Manifests from R2 workflow](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}). + + ## Changes + - URLs now point to `builds.dtvem.io` (our hosted binaries) + - Includes `sha256_source` field indicating checksum origin ("upstream" or "dtvem") + + Please review the changes before merging. + branch: chore/regenerate-manifests-from-r2 + delete-branch: true + labels: | + automated + manifest + + - name: Generate summary + if: always() + run: | + echo "## Manifest Generation" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + if [ "${{ inputs.dry_run }}" = "true" ]; then + echo "**Mode:** Dry run (no changes made)" >> $GITHUB_STEP_SUMMARY + elif [ "${{ steps.check-changes.outputs.changed }}" = "true" ]; then + echo "**Result:** Changes detected, PR created" >> $GITHUB_STEP_SUMMARY + else + echo "**Result:** No changes detected" >> $GITHUB_STEP_SUMMARY + fi diff --git a/.github/workflows/mirror-sync.yml b/.github/workflows/mirror-sync.yml index 7a28e81..cd79456 100644 --- a/.github/workflows/mirror-sync.yml +++ b/.github/workflows/mirror-sync.yml @@ -1,11 +1,9 @@ name: Mirror Sync on: - # Run after manifest updates are deployed - workflow_run: - workflows: ["Deploy Manifests"] - types: - - completed + # Run weekly to catch new upstream versions + schedule: + - cron: '0 4 * * 0' # Every Sunday at 4 AM UTC # Manual trigger workflow_dispatch: inputs: @@ -24,12 +22,11 @@ jobs: sync: name: Sync ${{ matrix.runtime }} runs-on: ubuntu-latest - if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }} - timeout-minutes: 120 # 2 hours max for sync + timeout-minutes: 180 # 3 hours max for sync strategy: fail-fast: false matrix: - runtime: ${{ (github.event_name == 'workflow_dispatch' && inputs.runtime == 'all') && fromJson('["node", "python", "ruby"]') || (github.event_name == 'workflow_dispatch' && fromJson(format('["{0}"]', inputs.runtime))) || fromJson('["node", "python", "ruby"]') }} + runtime: ${{ (github.event_name == 'workflow_dispatch' && inputs.runtime != 'all') && fromJson(format('["{0}"]', inputs.runtime)) || fromJson('["node", "python", "ruby"]') }} steps: - name: Checkout @@ -45,7 +42,7 @@ jobs: cd scripts/mirror-binaries go build -o mirror-binaries . - - name: Sync new binaries + - name: Sync new binaries to R2 env: R2_ENDPOINT: https://${{ secrets.CLOUDFLARE_ACCOUNT_ID }}.r2.cloudflarestorage.com R2_BUCKET: ${{ secrets.CLOUDFLARE_R2_BUILDS_BUCKET }} @@ -67,4 +64,6 @@ jobs: run: | echo "## Sync Results for ${{ matrix.runtime }}" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY - echo "Synced new binaries not already present in R2." >> $GITHUB_STEP_SUMMARY + echo "Synced new binaries from upstream that were not already in R2." >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "To generate updated manifests, run the 'Generate Manifests from R2' workflow." >> $GITHUB_STEP_SUMMARY diff --git a/schemas/manifest.schema.json b/schemas/manifest.schema.json index 497af4a..c7ce412 100644 --- a/schemas/manifest.schema.json +++ b/schemas/manifest.schema.json @@ -47,7 +47,7 @@ "download": { "type": "object", "description": "Download information for a pre-built binary", - "required": ["url", "sha256"], + "required": ["url"], "additionalProperties": false, "properties": { "url": { @@ -59,6 +59,11 @@ "type": "string", "pattern": "^[a-fA-F0-9]{64}$", "description": "SHA256 checksum (64 hex characters)" + }, + "sha256_source": { + "type": "string", + "enum": ["upstream", "dtvem"], + "description": "Origin of the SHA256 checksum: 'upstream' if from the original provider, 'dtvem' if generated by us during mirroring" } } } @@ -69,13 +74,15 @@ "versions": { "3.13.1": { "windows-amd64": { - "url": "https://www.python.org/ftp/python/3.13.1/python-3.13.1-embed-amd64.zip", - "sha256": "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2" + "url": "https://builds.dtvem.io/python/3.13.1/windows-amd64.zip", + "sha256": "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2", + "sha256_source": "upstream" }, "darwin-arm64": null, "linux-amd64": { - "url": "https://github.com/astral-sh/python-build-standalone/releases/download/20251209/cpython-3.13.1.tar.gz", - "sha256": "b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3" + "url": "https://builds.dtvem.io/python/3.13.1/linux-amd64.tar.gz", + "sha256": "b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3", + "sha256_source": "dtvem" } } } diff --git a/scripts/generate-manifests-from-r2/go.mod b/scripts/generate-manifests-from-r2/go.mod new file mode 100644 index 0000000..5e385d4 --- /dev/null +++ b/scripts/generate-manifests-from-r2/go.mod @@ -0,0 +1,27 @@ +module github.com/dtvem/dtvem/scripts/generate-manifests-from-r2 + +go 1.23.0 + +require ( + github.com/aws/aws-sdk-go-v2 v1.32.6 + github.com/aws/aws-sdk-go-v2/config v1.28.6 + github.com/aws/aws-sdk-go-v2/credentials v1.17.47 + github.com/aws/aws-sdk-go-v2/service/s3 v1.71.0 +) + +require ( + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.7 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.21 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.25 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.25 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.25 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.1 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.4.6 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.6 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.6 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.24.7 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.6 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.33.2 // indirect + github.com/aws/smithy-go v1.22.1 // indirect +) diff --git a/scripts/generate-manifests-from-r2/go.sum b/scripts/generate-manifests-from-r2/go.sum new file mode 100644 index 0000000..8c425bd --- /dev/null +++ b/scripts/generate-manifests-from-r2/go.sum @@ -0,0 +1,36 @@ +github.com/aws/aws-sdk-go-v2 v1.32.6 h1:7BokKRgRPuGmKkFMhEg/jSul+tB9VvXhcViILtfG8b4= +github.com/aws/aws-sdk-go-v2 v1.32.6/go.mod h1:P5WJBrYqqbWVaOxgH0X/FYYD47/nooaPOZPlQdmiN2U= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.7 h1:lL7IfaFzngfx0ZwUGOZdsFFnQ5uLvR0hWqqhyE7Q9M8= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.7/go.mod h1:QraP0UcVlQJsmHfioCrveWOC1nbiWUl3ej08h4mXWoc= +github.com/aws/aws-sdk-go-v2/config v1.28.6 h1:D89IKtGrs/I3QXOLNTH93NJYtDhm8SYa9Q5CsPShmyo= +github.com/aws/aws-sdk-go-v2/config v1.28.6/go.mod h1:GDzxJ5wyyFSCoLkS+UhGB0dArhb9mI+Co4dHtoTxbko= +github.com/aws/aws-sdk-go-v2/credentials v1.17.47 h1:48bA+3/fCdi2yAwVt+3COvmatZ6jUDNkDTIsqDiMUdw= +github.com/aws/aws-sdk-go-v2/credentials v1.17.47/go.mod h1:+KdckOejLW3Ks3b0E3b5rHsr2f9yuORBum0WPnE5o5w= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.21 h1:AmoU1pziydclFT/xRV+xXE/Vb8fttJCLRPv8oAkprc0= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.21/go.mod h1:AjUdLYe4Tgs6kpH4Bv7uMZo7pottoyHMn4eTcIcneaY= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.25 h1:s/fF4+yDQDoElYhfIVvSNyeCydfbuTKzhxSXDXCPasU= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.25/go.mod h1:IgPfDv5jqFIzQSNbUEMoitNooSMXjRSDkhXv8jiROvU= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.25 h1:ZntTCl5EsYnhN/IygQEUugpdwbhdkom9uHcbCftiGgA= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.25/go.mod h1:DBdPrgeocww+CSl1C8cEV8PN1mHMBhuCDLpXezyvWkE= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 h1:VaRN3TlFdd6KxX1x3ILT5ynH6HvKgqdiXoTxAF4HQcQ= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1/go.mod h1:FbtygfRFze9usAadmnGJNc8KsP346kEe+y2/oyhGAGc= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.25 h1:r67ps7oHCYnflpgDy2LZU0MAQtQbYIOqNNnqGO6xQkE= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.25/go.mod h1:GrGY+Q4fIokYLtjCVB/aFfCVL6hhGUFl8inD18fDalE= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.1 h1:iXtILhvDxB6kPvEXgsDhGaZCSC6LQET5ZHSdJozeI0Y= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.1/go.mod h1:9nu0fVANtYiAePIBh2/pFUSwtJ402hLnp854CNoDOeE= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.4.6 h1:HCpPsWqmYQieU7SS6E9HXfdAMSud0pteVXieJmcpIRI= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.4.6/go.mod h1:ngUiVRCco++u+soRRVBIvBZxSMMvOVMXA4PJ36JLfSw= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.6 h1:50+XsN70RS7dwJ2CkVNXzj7U2L1HKP8nqTd3XWEXBN4= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.6/go.mod h1:WqgLmwY7so32kG01zD8CPTJWVWM+TzJoOVHwTg4aPug= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.6 h1:BbGDtTi0T1DYlmjBiCr/le3wzhA37O8QTC5/Ab8+EXk= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.6/go.mod h1:hLMJt7Q8ePgViKupeymbqI0la+t9/iYFBjxQCFwuAwI= +github.com/aws/aws-sdk-go-v2/service/s3 v1.71.0 h1:nyuzXooUNJexRT0Oy0UQY6AhOzxPxhtt4DcBIHyCnmw= +github.com/aws/aws-sdk-go-v2/service/s3 v1.71.0/go.mod h1:sT/iQz8JK3u/5gZkT+Hmr7GzVZehUMkRZpOaAwYXeGY= +github.com/aws/aws-sdk-go-v2/service/sso v1.24.7 h1:rLnYAfXQ3YAccocshIH5mzNNwZBkBo+bP6EhIxak6Hw= +github.com/aws/aws-sdk-go-v2/service/sso v1.24.7/go.mod h1:ZHtuQJ6t9A/+YDuxOLnbryAmITtr8UysSny3qcyvJTc= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.6 h1:JnhTZR3PiYDNKlXy50/pNeix9aGMo6lLpXwJ1mw8MD4= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.6/go.mod h1:URronUEGfXZN1VpdktPSD1EkAL9mfrV+2F4sjH38qOY= +github.com/aws/aws-sdk-go-v2/service/sts v1.33.2 h1:s4074ZO1Hk8qv65GqNXqDjmkf4HSQqJukaLuuW0TpDA= +github.com/aws/aws-sdk-go-v2/service/sts v1.33.2/go.mod h1:mVggCnIWoM09jP71Wh+ea7+5gAp53q+49wDFs1SW5z8= +github.com/aws/smithy-go v1.22.1 h1:/HPHZQ0g7f4eUeK6HKglFz8uwVfZKgoI25rb/J+dnro= +github.com/aws/smithy-go v1.22.1/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= diff --git a/scripts/generate-manifests-from-r2/main.go b/scripts/generate-manifests-from-r2/main.go new file mode 100644 index 0000000..f6b6402 --- /dev/null +++ b/scripts/generate-manifests-from-r2/main.go @@ -0,0 +1,299 @@ +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "io" + "os" + "path/filepath" + "regexp" + "sort" + "strings" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/credentials" + "github.com/aws/aws-sdk-go-v2/service/s3" +) + +// BinaryMeta represents metadata stored alongside each binary +type BinaryMeta struct { + SHA256 string `json:"sha256"` + SHA256Source string `json:"sha256_source"` // "upstream" or "dtvem" + SourceURL string `json:"source_url"` + MirroredAt string `json:"mirrored_at"` + Size int64 `json:"size"` +} + +// ManifestDownload represents a download entry in the manifest +type ManifestDownload struct { + URL string `json:"url"` + SHA256 string `json:"sha256,omitempty"` + SHA256Source string `json:"sha256_source,omitempty"` +} + +// Manifest represents the output manifest structure +type Manifest struct { + Versions map[string]map[string]*ManifestDownload `json:"versions"` +} + +var ( + runtimeFlag = flag.String("runtime", "", "Runtime to generate (node, python, ruby, or all)") + outputDir = flag.String("output-dir", "src/internal/manifest/data", "Output directory for manifests") + baseURL = flag.String("base-url", "https://builds.dtvem.io", "Base URL for binary downloads") + r2Endpoint = flag.String("r2-endpoint", "", "R2 endpoint URL") + r2Bucket = flag.String("r2-bucket", "", "R2 bucket name") + r2AccessKey = flag.String("r2-access-key", "", "R2 access key ID") + r2SecretKey = flag.String("r2-secret-key", "", "R2 secret access key") + dryRun = flag.Bool("dry-run", false, "Report what would be generated without writing files") +) + +// metaKeyPattern matches paths like "node/20.18.0/linux-amd64.meta.json" +var metaKeyPattern = regexp.MustCompile(`^([^/]+)/([^/]+)/([^/]+)\.meta\.json$`) + +func main() { + flag.Parse() + + if *runtimeFlag == "" { + fmt.Fprintln(os.Stderr, "Error: --runtime is required (node, python, ruby, or all)") + os.Exit(1) + } + + if *r2Endpoint == "" || *r2Bucket == "" || *r2AccessKey == "" || *r2SecretKey == "" { + fmt.Fprintln(os.Stderr, "Error: R2 credentials required (--r2-endpoint, --r2-bucket, --r2-access-key, --r2-secret-key)") + os.Exit(1) + } + + runtimes := []string{*runtimeFlag} + if *runtimeFlag == "all" { + runtimes = []string{"node", "python", "ruby"} + } + + // Create S3 client + s3Client, err := createS3Client() + if err != nil { + fmt.Fprintf(os.Stderr, "Error creating S3 client: %v\n", err) + os.Exit(1) + } + + // Generate manifest for each runtime + for _, runtime := range runtimes { + fmt.Printf("Generating manifest for %s...\n", runtime) + + manifest, err := generateManifest(s3Client, runtime) + if err != nil { + fmt.Fprintf(os.Stderr, "Error generating manifest for %s: %v\n", runtime, err) + os.Exit(1) + } + + if *dryRun { + fmt.Printf("\n[DRY RUN] Would generate %s.json with %d versions\n", runtime, len(manifest.Versions)) + // Print sample of versions + count := 0 + for version, platforms := range manifest.Versions { + if count >= 5 { + fmt.Printf(" ... and %d more versions\n", len(manifest.Versions)-5) + break + } + fmt.Printf(" %s: %d platforms\n", version, len(platforms)) + count++ + } + continue + } + + // Write manifest file + outputPath := filepath.Join(*outputDir, runtime+".json") + if err := writeManifest(manifest, outputPath); err != nil { + fmt.Fprintf(os.Stderr, "Error writing manifest for %s: %v\n", runtime, err) + os.Exit(1) + } + + fmt.Printf(" Written %s with %d versions\n", outputPath, len(manifest.Versions)) + } + + fmt.Println("\nManifest generation complete!") +} + +func createS3Client() (*s3.Client, error) { + cfg, err := config.LoadDefaultConfig(context.Background(), + config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider( + *r2AccessKey, + *r2SecretKey, + "", + )), + config.WithRegion("auto"), + ) + if err != nil { + return nil, err + } + + client := s3.NewFromConfig(cfg, func(o *s3.Options) { + o.BaseEndpoint = aws.String(*r2Endpoint) + }) + + return client, nil +} + +func generateManifest(client *s3.Client, runtime string) (*Manifest, error) { + manifest := &Manifest{ + Versions: make(map[string]map[string]*ManifestDownload), + } + + // List all .meta.json files for this runtime + prefix := runtime + "/" + paginator := s3.NewListObjectsV2Paginator(client, &s3.ListObjectsV2Input{ + Bucket: r2Bucket, + Prefix: aws.String(prefix), + }) + + metaFiles := []string{} + for paginator.HasMorePages() { + page, err := paginator.NextPage(context.Background()) + if err != nil { + return nil, fmt.Errorf("listing objects: %w", err) + } + + for _, obj := range page.Contents { + key := *obj.Key + if strings.HasSuffix(key, ".meta.json") { + metaFiles = append(metaFiles, key) + } + } + } + + fmt.Printf(" Found %d metadata files\n", len(metaFiles)) + + // Process each metadata file + for _, metaKey := range metaFiles { + // Parse the key to extract runtime, version, platform + matches := metaKeyPattern.FindStringSubmatch(metaKey) + if matches == nil { + fmt.Printf(" Warning: skipping invalid meta key: %s\n", metaKey) + continue + } + + rt := matches[1] + version := matches[2] + platform := matches[3] + + if rt != runtime { + continue + } + + // Download and parse metadata + meta, err := downloadMeta(client, metaKey) + if err != nil { + fmt.Printf(" Warning: failed to read metadata for %s: %v\n", metaKey, err) + continue + } + + // Determine the binary file extension from source URL + ext := getExtension(meta.SourceURL) + binaryURL := fmt.Sprintf("%s/%s/%s/%s%s", *baseURL, runtime, version, platform, ext) + + // Add to manifest + if manifest.Versions[version] == nil { + manifest.Versions[version] = make(map[string]*ManifestDownload) + } + + manifest.Versions[version][platform] = &ManifestDownload{ + URL: binaryURL, + SHA256: meta.SHA256, + SHA256Source: meta.SHA256Source, + } + } + + return manifest, nil +} + +func downloadMeta(client *s3.Client, key string) (*BinaryMeta, error) { + resp, err := client.GetObject(context.Background(), &s3.GetObjectInput{ + Bucket: r2Bucket, + Key: aws.String(key), + }) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + data, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + var meta BinaryMeta + if err := json.Unmarshal(data, &meta); err != nil { + return nil, err + } + + return &meta, nil +} + +func getExtension(url string) string { + // Handle common archive extensions + if strings.HasSuffix(url, ".tar.gz") { + return ".tar.gz" + } + if strings.HasSuffix(url, ".tar.xz") { + return ".tar.xz" + } + if strings.HasSuffix(url, ".tar.bz2") { + return ".tar.bz2" + } + if strings.HasSuffix(url, ".zip") { + return ".zip" + } + if strings.HasSuffix(url, ".7z") { + return ".7z" + } + // Fallback: extract from URL + base := filepath.Base(url) + if idx := strings.Index(base, "."); idx != -1 { + return base[idx:] + } + return "" +} + +func writeManifest(manifest *Manifest, path string) error { + // Sort versions for consistent output + sortedManifest := &Manifest{ + Versions: make(map[string]map[string]*ManifestDownload), + } + + // Get sorted version keys + versions := make([]string, 0, len(manifest.Versions)) + for v := range manifest.Versions { + versions = append(versions, v) + } + sort.Strings(versions) + + for _, v := range versions { + platforms := manifest.Versions[v] + sortedManifest.Versions[v] = make(map[string]*ManifestDownload) + + // Get sorted platform keys + platformKeys := make([]string, 0, len(platforms)) + for p := range platforms { + platformKeys = append(platformKeys, p) + } + sort.Strings(platformKeys) + + for _, p := range platformKeys { + sortedManifest.Versions[v][p] = platforms[p] + } + } + + data, err := json.MarshalIndent(sortedManifest, "", " ") + if err != nil { + return err + } + + // Ensure output directory exists + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return err + } + + return os.WriteFile(path, data, 0644) +} diff --git a/scripts/mirror-binaries/main.go b/scripts/mirror-binaries/main.go index 527f0a9..d43b5bc 100644 --- a/scripts/mirror-binaries/main.go +++ b/scripts/mirror-binaries/main.go @@ -1,6 +1,7 @@ package main import ( + "bytes" "context" "crypto/sha256" "encoding/hex" @@ -33,27 +34,39 @@ type Download struct { SHA256 string `json:"sha256,omitempty"` } +// BinaryMeta represents metadata stored alongside each binary +type BinaryMeta struct { + SHA256 string `json:"sha256"` + SHA256Source string `json:"sha256_source"` // "upstream" or "dtvem" + SourceURL string `json:"source_url"` + MirroredAt string `json:"mirrored_at"` + Size int64 `json:"size"` +} + // MirrorJob represents a single file to mirror type MirrorJob struct { - Runtime string - Version string - Platform string - URL string - SHA256 string - R2Key string + Runtime string + Version string + Platform string + URL string + UpstreamSHA256 string // Checksum from upstream manifest (may be empty) + R2Key string + MetaKey string } // Stats tracks mirroring statistics type Stats struct { - Total int64 - Skipped int64 - Mirrored int64 - Failed int64 - BytesDown int64 + Total int64 + Skipped int64 + Mirrored int64 + Failed int64 + BytesDown int64 + UpstreamChecksum int64 + GeneratedChecksum int64 } var ( - runtime = flag.String("runtime", "", "Runtime to mirror (node, python, ruby, or all)") + runtimeFlag = flag.String("runtime", "", "Runtime to mirror (node, python, ruby, or all)") dryRun = flag.Bool("dry-run", false, "Report what would be done without doing it") syncOnly = flag.Bool("sync-only", false, "Only mirror files not already in R2") manifestDir = flag.String("manifest-dir", "src/internal/manifest/data", "Directory containing manifest files") @@ -68,7 +81,7 @@ var ( func main() { flag.Parse() - if *runtime == "" { + if *runtimeFlag == "" { fmt.Fprintln(os.Stderr, "Error: --runtime is required (node, python, ruby, or all)") os.Exit(1) } @@ -80,8 +93,8 @@ func main() { } } - runtimes := []string{*runtime} - if *runtime == "all" { + runtimes := []string{*runtimeFlag} + if *runtimeFlag == "all" { runtimes = []string{"node", "python", "ruby"} } @@ -124,18 +137,29 @@ func main() { if *dryRun { fmt.Println("\n[DRY RUN] Would mirror the following files:") + withChecksum := 0 + withoutChecksum := 0 for _, job := range jobs { - fmt.Printf(" %s -> %s\n", job.URL, job.R2Key) + checksumStatus := "(will generate checksum)" + if job.UpstreamSHA256 != "" { + checksumStatus = "(has upstream checksum)" + withChecksum++ + } else { + withoutChecksum++ + } + fmt.Printf(" %s -> %s %s\n", job.URL, job.R2Key, checksumStatus) } - fmt.Printf("\nTotal: %d files\n", len(jobs)) + fmt.Printf("\nTotal: %d files (%d with upstream checksum, %d will generate)\n", + len(jobs), withChecksum, withoutChecksum) return } - // Filter jobs if sync-only + // Filter jobs if sync-only (check for .meta.json to determine if mirrored) if *syncOnly && existingKeys != nil { var filtered []MirrorJob for _, job := range jobs { - if !existingKeys[job.R2Key] { + // Check for metadata file existence (indicates successful mirror) + if !existingKeys[job.MetaKey] { filtered = append(filtered, job) } } @@ -154,11 +178,13 @@ func main() { // Print summary fmt.Println("\n=== Mirror Summary ===") - fmt.Printf("Total: %d\n", stats.Total) - fmt.Printf("Mirrored: %d\n", stats.Mirrored) - fmt.Printf("Skipped: %d\n", stats.Skipped) - fmt.Printf("Failed: %d\n", stats.Failed) - fmt.Printf("Bytes: %d MB\n", stats.BytesDown/(1024*1024)) + fmt.Printf("Total: %d\n", stats.Total) + fmt.Printf("Mirrored: %d\n", stats.Mirrored) + fmt.Printf("Skipped: %d\n", stats.Skipped) + fmt.Printf("Failed: %d\n", stats.Failed) + fmt.Printf("Bytes downloaded: %d MB\n", stats.BytesDown/(1024*1024)) + fmt.Printf("Upstream checksums: %d\n", stats.UpstreamChecksum) + fmt.Printf("Generated checksums: %d\n", stats.GeneratedChecksum) if stats.Failed > 0 { os.Exit(1) @@ -225,14 +251,16 @@ func loadJobs(runtime, manifestPath string) ([]MirrorJob, error) { // Determine file extension from URL ext := getExtension(dl.URL) r2Key := fmt.Sprintf("%s/%s/%s%s", runtime, version, platform, ext) + metaKey := fmt.Sprintf("%s/%s/%s.meta.json", runtime, version, platform) jobs = append(jobs, MirrorJob{ - Runtime: runtime, - Version: version, - Platform: platform, - URL: dl.URL, - SHA256: dl.SHA256, - R2Key: r2Key, + Runtime: runtime, + Version: version, + Platform: platform, + URL: dl.URL, + UpstreamSHA256: dl.SHA256, + R2Key: r2Key, + MetaKey: metaKey, }) } } @@ -336,13 +364,21 @@ func doMirror(client *s3.Client, job MirrorJob, stats *Stats) error { atomic.AddInt64(&stats.BytesDown, int64(len(body))) - // Verify checksum if provided - if job.SHA256 != "" { - hash := sha256.Sum256(body) - actual := hex.EncodeToString(hash[:]) - if actual != job.SHA256 { - return fmt.Errorf("checksum mismatch: expected %s, got %s", job.SHA256, actual) + // Calculate actual checksum + hash := sha256.Sum256(body) + actualChecksum := hex.EncodeToString(hash[:]) + + // Determine checksum source and verify if upstream provided one + var checksumSource string + if job.UpstreamSHA256 != "" { + if actualChecksum != job.UpstreamSHA256 { + return fmt.Errorf("checksum mismatch: expected %s, got %s", job.UpstreamSHA256, actualChecksum) } + checksumSource = "upstream" + atomic.AddInt64(&stats.UpstreamChecksum, 1) + } else { + checksumSource = "dtvem" + atomic.AddInt64(&stats.GeneratedChecksum, 1) } // Determine content type @@ -355,18 +391,43 @@ func doMirror(client *s3.Client, job MirrorJob, stats *Stats) error { contentType = "application/x-xz" } - // Upload to R2 + // Upload binary to R2 _, err = client.PutObject(context.Background(), &s3.PutObjectInput{ Bucket: r2Bucket, Key: aws.String(job.R2Key), - Body: strings.NewReader(string(body)), + Body: bytes.NewReader(body), ContentType: aws.String(contentType), CacheControl: aws.String("public, max-age=31536000, immutable"), }) if err != nil { - return fmt.Errorf("upload failed: %w", err) + return fmt.Errorf("upload binary failed: %w", err) + } + + // Create and upload metadata + meta := BinaryMeta{ + SHA256: actualChecksum, + SHA256Source: checksumSource, + SourceURL: job.URL, + MirroredAt: time.Now().UTC().Format(time.RFC3339), + Size: int64(len(body)), + } + + metaJSON, err := json.MarshalIndent(meta, "", " ") + if err != nil { + return fmt.Errorf("marshal metadata failed: %w", err) + } + + _, err = client.PutObject(context.Background(), &s3.PutObjectInput{ + Bucket: r2Bucket, + Key: aws.String(job.MetaKey), + Body: bytes.NewReader(metaJSON), + ContentType: aws.String("application/json"), + CacheControl: aws.String("public, max-age=300"), // Short cache for metadata + }) + if err != nil { + return fmt.Errorf("upload metadata failed: %w", err) } - fmt.Printf("Mirrored: %s (%d bytes)\n", job.R2Key, len(body)) + fmt.Printf("Mirrored: %s (%d bytes, checksum: %s)\n", job.R2Key, len(body), checksumSource) return nil } diff --git a/src/internal/manifest/manifest.go b/src/internal/manifest/manifest.go index 14f909d..ee22741 100644 --- a/src/internal/manifest/manifest.go +++ b/src/internal/manifest/manifest.go @@ -27,6 +27,12 @@ type Download struct { // SHA256 is the hex-encoded SHA256 checksum for verification SHA256 string `json:"sha256"` + + // SHA256Source indicates the origin of the checksum: + // "upstream" - checksum provided by the original runtime provider + // "dtvem" - checksum generated by dtvem during mirroring + // Empty string for legacy manifests without this field + SHA256Source string `json:"sha256_source,omitempty"` } // Availability represents whether a version is available for a platform. From 649a9893ac6d7b61f9926167ffe4f32f1616b2a9 Mon Sep 17 00:00:00 2001 From: "Calvin A. Allen" Date: Tue, 16 Dec 2025 13:59:08 -0500 Subject: [PATCH 3/5] chore(ci): change mirror-sync schedule to daily --- .github/workflows/mirror-sync.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/mirror-sync.yml b/.github/workflows/mirror-sync.yml index cd79456..66dc6c0 100644 --- a/.github/workflows/mirror-sync.yml +++ b/.github/workflows/mirror-sync.yml @@ -1,9 +1,9 @@ name: Mirror Sync on: - # Run weekly to catch new upstream versions + # Run daily to catch new upstream versions schedule: - - cron: '0 4 * * 0' # Every Sunday at 4 AM UTC + - cron: '0 4 * * *' # Every day at 4 AM UTC # Manual trigger workflow_dispatch: inputs: From 28be034c5cf1ae1599cd5c5d46fdc932e455f71f Mon Sep 17 00:00:00 2001 From: "Calvin A. Allen" Date: Tue, 16 Dec 2025 14:02:36 -0500 Subject: [PATCH 4/5] chore(ci): trigger manifest generation after mirror-all workflow --- .github/workflows/generate-manifests-from-r2.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/generate-manifests-from-r2.yml b/.github/workflows/generate-manifests-from-r2.yml index 61f8288..b2abf7d 100644 --- a/.github/workflows/generate-manifests-from-r2.yml +++ b/.github/workflows/generate-manifests-from-r2.yml @@ -1,9 +1,9 @@ name: Generate Manifests from R2 on: - # Trigger after mirror sync completes + # Trigger after mirror workflows complete workflow_run: - workflows: ["Mirror Sync"] + workflows: ["Mirror Sync", "Mirror All Binaries"] types: - completed # Manual trigger From 76a95c760571c47b8c144f8bc5cbc8bc5268e6eb Mon Sep 17 00:00:00 2001 From: "Calvin A. Allen" Date: Tue, 16 Dec 2025 14:05:06 -0500 Subject: [PATCH 5/5] chore(ci): deploy manifests to R2 immediately after generation --- .../workflows/generate-manifests-from-r2.yml | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/.github/workflows/generate-manifests-from-r2.yml b/.github/workflows/generate-manifests-from-r2.yml index b2abf7d..8c534eb 100644 --- a/.github/workflows/generate-manifests-from-r2.yml +++ b/.github/workflows/generate-manifests-from-r2.yml @@ -82,6 +82,27 @@ jobs: --r2-access-key="$R2_ACCESS_KEY" \ --r2-secret-key="$R2_SECRET_KEY" + - name: Deploy manifests to R2 + if: ${{ github.event_name != 'workflow_dispatch' || !inputs.dry_run }} + env: + R2_ENDPOINT: https://${{ secrets.CLOUDFLARE_ACCOUNT_ID }}.r2.cloudflarestorage.com + R2_BUCKET: ${{ secrets.CLOUDFLARE_R2_MANIFESTS_BUCKET }} + run: | + aws configure set aws_access_key_id ${{ secrets.CLOUDFLARE_R2_ACCESS_KEY_ID }} + aws configure set aws_secret_access_key ${{ secrets.CLOUDFLARE_R2_SECRET_ACCESS_KEY }} + aws configure set default.region auto + + echo "Deploying manifests to R2..." + for file in src/internal/manifest/data/*.json; do + filename=$(basename "$file") + echo "Uploading $filename..." + aws s3 cp "$file" "s3://${R2_BUCKET}/${filename}" \ + --endpoint-url "${R2_ENDPOINT}" \ + --content-type "application/json" \ + --cache-control "public, max-age=300" + done + echo "Manifests deployed to R2!" + - name: Check for changes id: check-changes if: ${{ github.event_name != 'workflow_dispatch' || !inputs.dry_run }} @@ -119,7 +140,9 @@ jobs: if [ "${{ inputs.dry_run }}" = "true" ]; then echo "**Mode:** Dry run (no changes made)" >> $GITHUB_STEP_SUMMARY elif [ "${{ steps.check-changes.outputs.changed }}" = "true" ]; then - echo "**Result:** Changes detected, PR created" >> $GITHUB_STEP_SUMMARY + echo "**Result:** Changes detected" >> $GITHUB_STEP_SUMMARY + echo "- Manifests deployed to R2 (live immediately)" >> $GITHUB_STEP_SUMMARY + echo "- PR created for embedded manifests" >> $GITHUB_STEP_SUMMARY else echo "**Result:** No changes detected" >> $GITHUB_STEP_SUMMARY fi