feat: Update GCP Batch support

lbeckman314 · lbeckman314 · commit 00ab0c48b2b3 · 2026-05-18T18:35:29.000-07:00
diff --git a/compute/gcp_batch/backend.go b/compute/gcp_batch/backend.go
@@ -198,7 +198,7 @@ func (b *Backend) Submit(task *tes.Task) error {
 		}
 	}
 
-	// Mount all buckets to `/mnt/share/<BUCKET>` as volumes in the GCP Job Request
+	// Mount all buckets to `/mnt/disks/<BUCKET>` as volumes in the GCP Job Request
 	var volumes []*batchpb.Volume
 	for bucketName := range buckets {
 		volumes = append(volumes, &batchpb.Volume{
@@ -211,23 +211,83 @@ func (b *Backend) Submit(task *tes.Task) error {
 		})
 	}
 
+	// Build a path map: user-specified path → /mnt/disks/<bucket>/<object> so
+	// that executor commands referencing those paths are rewritten before
+	// submission. This avoids symlinks, which are unreliable across containers
+	// on COS (Container-Optimized OS) VMs where each container has an isolated
+	// filesystem.
+	if err := detectPathCollisions(task.Inputs, task.Outputs); err != nil {
+		return fmt.Errorf("GCP Batch path collision: %w", err)
+	}
+
+	pathMap := make(map[string]string) // userPath → mountedPath
+	for _, input := range task.Inputs {
+		if input.Path == "" || input.Url == "" {
+			continue
+		}
+		if err := validatePath(input.Path); err != nil {
+			return fmt.Errorf("invalid input path: %w", err)
+		}
+		bucket, objectPath := extractGCSPath(input.Url)
+		if bucket == "" {
+			continue
+		}
+		pathMap[input.Path] = fmt.Sprintf("/mnt/disks/%s/%s", bucket, objectPath)
+	}
+	for _, output := range task.Outputs {
+		if output.Path == "" || output.Url == "" {
+			continue
+		}
+		if err := validatePath(output.Path); err != nil {
+			return fmt.Errorf("invalid output path: %w", err)
+		}
+		bucket, objectPath := extractGCSPath(output.Url)
+		if bucket == "" {
+			continue
+		}
+		pathMap[output.Path] = fmt.Sprintf("/mnt/disks/%s/%s", bucket, objectPath)
+	}
+
+	// rewriteArg replaces all occurrences of known user paths within a string
+	// with their /mnt/disks/... equivalents. This handles both standalone path
+	// arguments and paths embedded inside shell script strings.
+	rewriteArg := func(s string) string {
+		for userPath, mountedPath := range pathMap {
+			s = strings.ReplaceAll(s, userPath, mountedPath)
+		}
+		return s
+	}
+
 	// Runnables
 	var runnables []*batchpb.Runnable
 
 	for _, executor := range task.Executors {
-		cmd := strings.Join(executor.Command, " ")
+		var commands []string
+		for _, arg := range executor.Command {
+			commands = append(commands, rewriteArg(arg))
+		}
 
-		if executor.Stdout != "" {
-			// Redirect command output to the specified file path
-			cmd = fmt.Sprintf("%s | tee %s", cmd, executor.Stdout)
+		// Wrap in a shell only when stdout/stdin/stderr redirection is needed.
+		if executor.Stdout != "" || executor.Stdin != "" || executor.Stderr != "" {
+			cmd := strings.Join(commands, " ")
+			if executor.Stdout != "" {
+				cmd = fmt.Sprintf("%s | tee %s", cmd, rewriteArg(executor.Stdout))
+			}
+			commands = []string{"sh", "-c", cmd}
+		}
+
+		container := &batchpb.Runnable_Container{
+			ImageUri: executor.Image,
+			Commands: commands,
+		}
+
+		if executor.Workdir != "" {
+			container.Options = fmt.Sprintf("--workdir %s", executor.Workdir)
 		}
 
 		runnable := &batchpb.Runnable{
 			Executable: &batchpb.Runnable_Container_{
-				Container: &batchpb.Runnable_Container{
-					ImageUri: executor.Image,
-					Commands: []string{"sh", "-c", cmd},
-				},
+				Container: container,
 			},
 		}
 
diff --git a/compute/gcp_batch/backend_test.go b/compute/gcp_batch/backend_test.go
@@ -256,10 +256,15 @@ func TestSubmit_MultipleInputsOutputs(t *testing.T) {
 		t.Errorf("Expected 3 volumes, got %d", len(volumes))
 	}
 
-	// Verify symlink commands are present in the generated command
+	// Paths are rewritten directly in executor commands — no separate setup runnable.
 	runnables := capturedReq.Job.TaskGroups[0].TaskSpec.Runnables
 	if len(runnables) != 1 {
-		t.Fatalf("Expected 1 runnable, got %d", len(runnables))
+		t.Fatalf("Expected 1 runnable (executor only), got %d", len(runnables))
+	}
+	// The executor command ["echo", "test"] doesn't reference any I/O paths, so
+	// verify correctness via the volumes instead — all 3 buckets must be mounted.
+	if len(volumes) != 3 {
+		t.Errorf("Expected 3 volumes for 3 unique buckets, got %d", len(volumes))
 	}
 }
 
@@ -311,10 +316,24 @@ func TestSubmit_MultipleExecutors(t *testing.T) {
 		t.Fatalf("Submit() error = %v", err)
 	}
 
-	// Should create 2 runnables, one per executor
+	// Should create 1 runnable per executor; paths are rewritten inline, no setup runnable.
 	runnables := capturedReq.Job.TaskGroups[0].TaskSpec.Runnables
 	if len(runnables) != 2 {
-		t.Fatalf("Expected 2 runnables, got %d", len(runnables))
+		t.Fatalf("Expected 2 runnables (1 per executor), got %d", len(runnables))
+	}
+	// Both executors reference /data/input.txt which maps to gs://bucket/input.txt.
+	for i, r := range runnables {
+		cmds := r.GetContainer().Commands
+		found := false
+		for _, c := range cmds {
+			if strings.Contains(c, "/mnt/disks/bucket/input.txt") {
+				found = true
+				break
+			}
+		}
+		if !found {
+			t.Errorf("Runnable %d: expected rewritten path /mnt/disks/bucket/input.txt in commands %v", i, cmds)
+		}
 	}
 }
 
@@ -407,10 +426,63 @@ func TestSubmit_NoInputsOutputs(t *testing.T) {
 		t.Errorf("Expected 0 volumes, got %d", len(volumes))
 	}
 
-	// Command should still work, just no symlinks
-	cmd := capturedReq.Job.TaskGroups[0].TaskSpec.Runnables[0].GetContainer().Commands[2]
-	if !strings.Contains(cmd, "echo hello") {
-		t.Error("Executor command not present")
+	// Command should be passed directly (no sh -c wrapping when no redirection needed)
+	cmds := capturedReq.Job.TaskGroups[0].TaskSpec.Runnables[0].GetContainer().Commands
+	if len(cmds) != 2 || cmds[0] != "echo" || cmds[1] != "hello" {
+		t.Errorf("Expected direct command [echo hello], got %v", cmds)
+	}
+}
+
+// Test Submit with executor Workdir sets --workdir docker option
+func TestSubmit_ExecutorWorkdir(t *testing.T) {
+	log := logger.NewLogger("test", logger.DefaultConfig())
+	conf := &config.GCPBatch{
+		Project:  "test-project",
+		Location: "us-west1",
+	}
+
+	var capturedReq *batchpb.CreateJobRequest
+	mockClient := &mockClient{
+		CreateJobFunc: func(req *batchpb.CreateJobRequest) (*batchpb.Job, error) {
+			capturedReq = req
+			return &batchpb.Job{Name: "test-job", Uid: "test-uid"}, nil
+		},
+	}
+
+	backend := &Backend{
+		client: mockClient,
+		conf:   conf,
+		log:    log,
+		event:  &noopEventWriter{},
+	}
+
+	task := &tes.Task{
+		Id: "task1",
+		Executors: []*tes.Executor{
+			{Image: "alpine", Command: []string{"echo", "test"}, Workdir: "/work"},
+			{Image: "alpine", Command: []string{"echo", "no-workdir"}},
+		},
+	}
+
+	err := backend.Submit(task)
+	if err != nil {
+		t.Fatalf("Submit() error = %v", err)
+	}
+
+	runnables := capturedReq.Job.TaskGroups[0].TaskSpec.Runnables
+	// No inputs/outputs so no setup runnable; just the 2 executor runnables.
+	if len(runnables) != 2 {
+		t.Fatalf("Expected 2 runnables, got %d", len(runnables))
+	}
+
+	opts0 := runnables[0].GetContainer().Options
+	if !strings.Contains(opts0, "--workdir") || !strings.Contains(opts0, "/work") {
+		t.Errorf("Expected --workdir /work in Options, got %q", opts0)
+	}
+
+	opts1 := runnables[1].GetContainer().Options
+	if opts1 != "" {
+		t.Errorf("Expected empty Options for executor without Workdir, got %q", opts1)
 	}
 }
 
@@ -482,11 +554,12 @@ func TestSubmit_CommandConstruction(t *testing.T) {
 		t.Fatalf("Submit failed: %v", err)
 	}
 
-	// Verify the command was properly quoted
-	cmd := capturedReq.Job.TaskGroups[0].TaskSpec.Runnables[0].GetContainer().Commands[2]
-
-	// Should contain properly escaped quotes, not broken by spaces
-	if !strings.Contains(cmd, "python -c") {
-		t.Errorf("Command should contain 'python -c', got: %s", cmd)
+	// Commands are passed directly without shell wrapping, so the original args are preserved.
+	cmds := capturedReq.Job.TaskGroups[0].TaskSpec.Runnables[0].GetContainer().Commands
+	if len(cmds) != 3 || cmds[0] != "python" || cmds[1] != "-c" {
+		t.Errorf("Expected direct command [python -c <script>], got %v", cmds)
+	}
+	if !strings.Contains(cmds[2], "Hello World") {
+		t.Errorf("Expected script body in Commands[2], got: %s", cmds[2])
 	}
 }