Skip to content

Commit 4718563

Browse files
czar0Copilot
andauthored
feat(cre-local): enable secrets management via vault don (#22075)
* feat(cre-local): enable secrets management via vault don * fix: version matching and error handling - fix typo in secretsNamesConfig comment - add check for workflowRegistryVersion and capabilitiesRegistryVersion - allow more than 100 don via pagination - fetch vault capability id from helper - increase timeout for vault requests - return error value from sendToVaultGateway method * fix: go lint * refactor: add retries to req gateway and skip if cap cfg is set * fix: go lint * refactor: retry-go for vault calls and split update logic for vault config * refactor: dynamic check for vault config propagation * refactor: workflow * fix: apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * refactor: remove capability address empty check * refactor: merge fields in update config * refactor: move business logic to lib cre dir * fix: make gomodtidy * refactor: revert config changes * refactor: revert config changes --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 8c138b6 commit 4718563

14 files changed

Lines changed: 903 additions & 278 deletions

File tree

core/scripts/cre/environment/environment/examples.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ func deployAndVerifyExampleWorkflow(cmdContext context.Context, rpcURL string, w
153153
_ = os.Remove(configFilePath)
154154
}()
155155

156-
deployErr := compileCopyAndRegisterWorkflow(cmdContext, workflowFilePath, workflowName, "", workflowRegistryAddress, "", creworkflow.DefaultWorkflowNodePattern, creworkflow.DefaultWorkflowTargetDir, configFilePath, "", "", rpcURL, contractsVersion, contractsVersion, workflowDonID)
156+
deployErr := compileCopyAndRegisterWorkflow(cmdContext, workflowFilePath, workflowName, "", workflowRegistryAddress, "", creworkflow.DefaultWorkflowNodePattern, creworkflow.DefaultWorkflowTargetDir, configFilePath, "", "", rpcURL, "", contractsVersion, nil, workflowDonID)
157157
if deployErr != nil {
158158
return errors.Wrap(deployErr, "failed to deploy example workflow")
159159
}

core/scripts/cre/environment/environment/setup.go

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ func init() {
5151
SetupCmd.Flags().StringVarP(&config.ConfigPath, "config", "c", DefaultSetupConfigPath, "Path to the TOML configuration file")
5252
SetupCmd.Flags().BoolVarP(&noPrompt, "no-prompt", "y", false, "Automatically accept defaults and do not prompt for user input")
5353
SetupCmd.Flags().BoolVarP(&purge, "purge", "p", false, "Purge all existing images and re-download/re-build them")
54+
SetupCmd.Flags().BoolVarP(&config.Build, "build", "b", false, "Build images locally instead of pulling from ECR (useful on Apple Silicon)")
5455
SetupCmd.Flags().BoolVar(&withBilling, "with-billing", false, "Include billing service in the setup")
5556

5657
EnvironmentCmd.AddCommand(SetupCmd)
@@ -135,6 +136,7 @@ const (
135136
// SetupConfig represents the configuration for the setup command
136137
type SetupConfig struct {
137138
ConfigPath string
139+
Build bool // when true, images are built locally instead of pulled from ECR
138140
}
139141

140142
type BuildConfig struct {
@@ -267,6 +269,13 @@ func (c BuildConfig) Build(ctx context.Context) (localImage string, err error) {
267269
}()
268270
}
269271

272+
// When building on a non-amd64 host, override the TARGETOS/TARGETARCH build
273+
// args so the Go binary is compiled for the correct architecture. Many
274+
// Dockerfiles in this project declare `ARG TARGETARCH=amd64` which defaults
275+
// to amd64 regardless of --platform. Passing --build-arg makes the cache key
276+
// differ from the amd64 entry, forcing a fresh compilation.
277+
overrideArch := runtime.GOARCH != "amd64"
278+
270279
// Save current directory and change to working directory
271280
currentDir, err := os.Getwd()
272281
if err != nil {
@@ -289,10 +298,20 @@ func (c BuildConfig) Build(ctx context.Context) (localImage string, err error) {
289298
}
290299

291300
// Build Docker image
292-
args := []string{"build", "-t", c.LocalImage, "-f", c.Dockerfile, c.DockerCtx}
301+
args := []string{"build", "--platform", "linux/" + runtime.GOARCH}
302+
if overrideArch {
303+
// Override TARGETOS/TARGETARCH build args so Dockerfiles with
304+
// `ARG TARGETARCH=amd64` compile the correct binary. This also changes
305+
// the cache key, causing Docker to recompile instead of reusing an
306+
// amd64-cached layer.
307+
args = append(args, "--build-arg", "TARGETOS=linux", "--build-arg", "TARGETARCH="+runtime.GOARCH)
308+
}
309+
args = append(args, "-t", c.LocalImage, "-f", c.Dockerfile)
293310
if c.RequireGithubToken {
294311
args = append(args, "--build-arg", "GITHUB_TOKEN="+os.Getenv("GITHUB_TOKEN"))
295312
}
313+
// Context must be the final positional argument.
314+
args = append(args, c.DockerCtx)
296315

297316
cmd := exec.CommandContext(ctx, "docker", args...)
298317
cmd.Stdout = os.Stdout
@@ -393,7 +412,7 @@ func (c ImageConfig) Ensure(ctx context.Context, dockerClient *client.Client, aw
393412
logger.Info().Msgf("🔍 %s image not found.", name)
394413
logger.Info().Msgf("Would you like to Pull (requires AWS SSO) or build the %s image? (P/b) [B]", name)
395414

396-
var input = PullOption // Default to Pull
415+
var input = defaultOption // default controlled by the caller (PullOption or BuildOption)
397416
if !noPrompt {
398417
_, err := fmt.Scanln(&input)
399418
if err != nil {
@@ -515,12 +534,17 @@ func RunSetup(ctx context.Context, config SetupConfig, noPrompt, purge, withBill
515534
}
516535
}
517536

537+
defaultOption := PullOption
538+
if config.Build {
539+
defaultOption = BuildOption
540+
}
541+
518542
jdConfig := ImageConfig{
519543
BuildConfig: cfg.JobDistributor.BuildConfig,
520544
PullConfig: cfg.JobDistributor.PullConfig,
521545
}
522546

523-
jdLocalImage, jdErr := jdConfig.Ensure(ctx, dockerClient, cfg.General.AWSProfile, noPrompt, PullOption, purge)
547+
jdLocalImage, jdErr := jdConfig.Ensure(ctx, dockerClient, cfg.General.AWSProfile, noPrompt, defaultOption, purge)
524548
if jdErr != nil {
525549
setupErr = errors.Wrap(jdErr, "failed to ensure Job Distributor image")
526550
return
@@ -534,7 +558,7 @@ func RunSetup(ctx context.Context, config SetupConfig, noPrompt, purge, withBill
534558
}
535559

536560
var err error
537-
chipRouterLocalImage, err = chipRouterConfig.Ensure(ctx, dockerClient, cfg.General.AWSProfile, noPrompt, PullOption, purge)
561+
chipRouterLocalImage, err = chipRouterConfig.Ensure(ctx, dockerClient, cfg.General.AWSProfile, noPrompt, defaultOption, purge)
538562
if err != nil {
539563
setupErr = errors.Wrap(err, "failed to ensure Chip Router image")
540564
return
@@ -551,7 +575,7 @@ func RunSetup(ctx context.Context, config SetupConfig, noPrompt, purge, withBill
551575
}
552576

553577
var err error
554-
chipIngressLocalImage, err = chipConfig.Ensure(ctx, dockerClient, cfg.General.AWSProfile, noPrompt, PullOption, purge)
578+
chipIngressLocalImage, err = chipConfig.Ensure(ctx, dockerClient, cfg.General.AWSProfile, noPrompt, defaultOption, purge)
555579
if err != nil {
556580
setupErr = errors.Wrap(err, "failed to ensure Atlas Chip Ingress image")
557581
return
@@ -568,7 +592,7 @@ func RunSetup(ctx context.Context, config SetupConfig, noPrompt, purge, withBill
568592
}
569593

570594
var err error
571-
chipConfigLocalImage, err = chipConfig.Ensure(ctx, dockerClient, cfg.General.AWSProfile, noPrompt, PullOption, purge)
595+
chipConfigLocalImage, err = chipConfig.Ensure(ctx, dockerClient, cfg.General.AWSProfile, noPrompt, defaultOption, purge)
572596
if err != nil {
573597
setupErr = errors.Wrap(err, "failed to ensure Atlas Chip Config image")
574598
return

core/scripts/cre/environment/environment/state_resolver.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,45 @@ func (r *LocalCREStateResolver) WorkflowRegistryOutput() (*cre.WorkflowRegistryO
160160
return &out, nil
161161
}
162162

163+
// WorkflowDONNodeInfo returns the shared PostgreSQL port and worker node count for the
164+
// workflow DON as recorded in the local CRE state file. These values are used by
165+
// waitForVaultConfigPropagation to poll each node's registry_syncer_states table.
166+
func (r *LocalCREStateResolver) WorkflowDONNodeInfo() (dbPort int, nodeCount int, err error) {
167+
if r.cfg.Infra == nil {
168+
return 0, 0, errors.New("infra section is missing from local CRE state file")
169+
}
170+
if r.cfg.Infra.IsKubernetes() {
171+
return 0, 0, errors.New("direct DB polling is not supported for Kubernetes provider; vault config propagation requires a static wait on Kubernetes")
172+
}
173+
174+
donMeta, err := r.WorkflowDONMetadata()
175+
if err != nil {
176+
return 0, 0, errors.Wrap(err, "failed to get workflow DON metadata")
177+
}
178+
179+
// Find the NodeSet whose name matches the workflow DON name.
180+
var nodeSet *cre.NodeSet
181+
for _, ns := range r.cfg.NodeSets {
182+
if ns.Name == donMeta.Name {
183+
nodeSet = ns
184+
break
185+
}
186+
}
187+
if nodeSet == nil {
188+
return 0, 0, fmt.Errorf("no nodeset found for workflow DON %q in local CRE state", donMeta.Name)
189+
}
190+
if nodeSet.DbInput == nil {
191+
return 0, 0, fmt.Errorf("nodeset %q has no DbInput in local CRE state", donMeta.Name)
192+
}
193+
194+
workers, err := donMeta.Workers()
195+
if err != nil {
196+
return 0, 0, errors.Wrap(err, "failed to get workflow DON workers")
197+
}
198+
199+
return nodeSet.DbInput.Port, len(workers), nil
200+
}
201+
163202
func semverFromFlag(version string) (*semver.Version, error) {
164203
parsed, err := semver.NewVersion(version)
165204
if err != nil {

0 commit comments

Comments
 (0)