Skip to content

Commit ff1156a

Browse files
authored
control-service: fix data job image building (#2832)
Why Changing Python versions without updating gitCommitSha doesn't trigger the build, causing data job issues due to different VDK images. What We improved the logic to check whether the Python version has changed, and if it has, the image building is triggered. Testing done: Unit tests. Signed-off-by: Miroslav Ivanov miroslavi@vmware.com --------- Signed-off-by: Miroslav Ivanov miroslavi@vmware.com Co-authored-by: github-actions <>
1 parent 24f1336 commit ff1156a

6 files changed

Lines changed: 129 additions & 21 deletions

File tree

projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/deploy/DeploymentService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ public void updateDeployment(
183183
jobDeployment.getDataJobName(), jobDeployment.getGitCommitSha());
184184

185185
if (jobImageBuilder.buildImage(
186-
imageName, dataJob, toDesiredDataJobDeployment(jobDeployment), sendNotification)) {
186+
imageName, dataJob, toDesiredDataJobDeployment(jobDeployment), null, sendNotification)) {
187187
log.info(
188188
"Image {} has been built. Will now schedule job {} for execution",
189189
imageName,

projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/deploy/DeploymentServiceV2.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,8 @@ public void updateDeployment(
160160
dockerRegistryService.dataJobImage(
161161
desiredJobDeployment.getDataJobName(), desiredJobDeployment.getGitCommitSha());
162162

163-
if (jobImageBuilder.buildImage(imageName, dataJob, desiredJobDeployment, sendNotification)) {
163+
if (jobImageBuilder.buildImage(
164+
imageName, dataJob, desiredJobDeployment, actualJobDeployment, sendNotification)) {
164165
ActualDataJobDeployment actualJobDeploymentResult =
165166
jobImageDeployer.scheduleJob(
166167
dataJob,

projects/control-service/projects/pipelines_control_service/src/main/java/com/vmware/taurus/service/deploy/JobImageBuilder.java

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import com.vmware.taurus.exception.KubernetesException;
1212
import com.vmware.taurus.service.credentials.AWSCredentialsService;
1313
import com.vmware.taurus.service.kubernetes.ControlKubernetesService;
14+
import com.vmware.taurus.service.model.ActualDataJobDeployment;
1415
import com.vmware.taurus.service.model.DataJob;
1516
import com.vmware.taurus.service.model.DesiredDataJobDeployment;
1617
import io.kubernetes.client.openapi.ApiException;
@@ -113,7 +114,8 @@ public JobImageBuilder(
113114
*
114115
* @param imageName Full name of the image to build.
115116
* @param dataJob Information about the data job.
116-
* @param jobDeployment Information about the data job deployment.
117+
* @param desiredDataJobDeployment Information about the desired data job deployment.
118+
* @param actualDataJobDeployment Information about the actual data job deployment.
117119
* @param sendNotification
118120
* @return True if build and push was successful. False otherwise.
119121
* @throws ApiException
@@ -123,7 +125,8 @@ public JobImageBuilder(
123125
public boolean buildImage(
124126
String imageName,
125127
DataJob dataJob,
126-
DesiredDataJobDeployment jobDeployment,
128+
DesiredDataJobDeployment desiredDataJobDeployment,
129+
ActualDataJobDeployment actualDataJobDeployment,
127130
Boolean sendNotification)
128131
throws ApiException, IOException, InterruptedException {
129132
// TODO: refactor and hide AWS details behind DockerRegistryService?
@@ -145,17 +148,22 @@ public boolean buildImage(
145148
}
146149
}
147150

148-
if (jobDeployment.getPythonVersion() == null) {
151+
if (desiredDataJobDeployment.getPythonVersion() == null) {
149152
log.warn("Missing pythonVersion. Data Job cannot be deployed.");
150153
return false;
151154
}
152155

153-
if (dockerRegistryService.dataJobImageExists(imageName, credentials)) {
156+
// Rebuild the image if the Python version changes but the gitCommitSha remains the same.
157+
if ((actualDataJobDeployment == null
158+
|| desiredDataJobDeployment
159+
.getPythonVersion()
160+
.equals(actualDataJobDeployment.getPythonVersion()))
161+
&& dockerRegistryService.dataJobImageExists(imageName, credentials)) {
154162
log.trace("Data Job image {} already exists and nothing else to do.", imageName);
155163
return true;
156164
}
157165

158-
String builderJobName = getBuilderJobName(jobDeployment.getDataJobName());
166+
String builderJobName = getBuilderJobName(desiredDataJobDeployment.getDataJobName());
159167

160168
log.debug("Check if old builder job {} exists", builderJobName);
161169
if (controlKubernetesService.listJobs().contains(builderJobName)) {
@@ -187,13 +195,14 @@ public boolean buildImage(
187195
registryUsername,
188196
registryPassword,
189197
builderAwsSessionToken);
190-
var envs = getBuildParameters(dataJob, jobDeployment);
191-
String builderImage = supportedPythonVersions.getBuilderImage(jobDeployment.getPythonVersion());
198+
var envs = getBuildParameters(dataJob, desiredDataJobDeployment);
199+
String builderImage =
200+
supportedPythonVersions.getBuilderImage(desiredDataJobDeployment.getPythonVersion());
192201

193202
log.info(
194203
"Creating builder job {} for data job version {}",
195204
builderJobName,
196-
jobDeployment.getGitCommitSha());
205+
desiredDataJobDeployment.getGitCommitSha());
197206
controlKubernetesService.createJob(
198207
builderJobName,
199208
builderImage,
@@ -215,7 +224,7 @@ public boolean buildImage(
215224
log.debug(
216225
"Waiting for builder job {} for data job version {}",
217226
builderJobName,
218-
jobDeployment.getGitCommitSha());
227+
desiredDataJobDeployment.getGitCommitSha());
219228

220229
var condition =
221230
controlKubernetesService.watchJob(
@@ -234,7 +243,7 @@ public boolean buildImage(
234243
}
235244
if (!condition.isSuccess()) {
236245
notificationHelper.verifyBuilderResult(
237-
builderJobName, dataJob, jobDeployment, condition, logs, sendNotification);
246+
builderJobName, dataJob, desiredDataJobDeployment, condition, logs, sendNotification);
238247
} else {
239248
log.info("Builder job {} finished successfully. Will delete it now", builderJobName);
240249
log.info(

projects/control-service/projects/pipelines_control_service/src/test/java/com/vmware/taurus/service/deploy/DeploymentServiceTest.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ public void updateDeployment_newDeploymentCreated()
158158
TEST_JOB_IMAGE_NAME,
159159
testDataJob,
160160
DeploymentModelConverter.toDesiredDataJobDeployment(jobDeployment),
161+
null,
161162
true))
162163
.thenReturn(true);
163164

@@ -170,6 +171,7 @@ public void updateDeployment_newDeploymentCreated()
170171
TEST_JOB_IMAGE_NAME,
171172
testDataJob,
172173
DeploymentModelConverter.toDesiredDataJobDeployment(jobDeployment),
174+
null,
173175
true);
174176
verify(kubernetesService)
175177
.createCronJob(
@@ -207,6 +209,7 @@ public void updateDeployment_existingDeploymentUpdated()
207209
TEST_JOB_IMAGE_NAME,
208210
testDataJob,
209211
DeploymentModelConverter.toDesiredDataJobDeployment(jobDeployment),
212+
null,
210213
true))
211214
.thenReturn(true);
212215
when(kubernetesService.listCronJobs()).thenReturn(Set.of(TEST_CRONJOB_NAME));
@@ -220,6 +223,7 @@ public void updateDeployment_existingDeploymentUpdated()
220223
TEST_JOB_IMAGE_NAME,
221224
testDataJob,
222225
DeploymentModelConverter.toDesiredDataJobDeployment(jobDeployment),
226+
null,
223227
true);
224228
verify(kubernetesService)
225229
.updateCronJob(
@@ -254,6 +258,7 @@ public void updateDeployment_failedToBuildImage_deploymentSkipped()
254258
TEST_JOB_IMAGE_NAME,
255259
testDataJob,
256260
DeploymentModelConverter.toDesiredDataJobDeployment(jobDeployment),
261+
null,
257262
true))
258263
.thenReturn(false);
259264

@@ -266,6 +271,7 @@ public void updateDeployment_failedToBuildImage_deploymentSkipped()
266271
TEST_JOB_IMAGE_NAME,
267272
testDataJob,
268273
DeploymentModelConverter.toDesiredDataJobDeployment(jobDeployment),
274+
null,
269275
true);
270276
verify(kubernetesService, never())
271277
.updateCronJob(

projects/control-service/projects/pipelines_control_service/src/test/java/com/vmware/taurus/service/deploy/DeploymentServiceV2TestIT.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ public void updateDeployment_withDesiredDeploymentStatusNone_shouldStartDeployme
5656
updateDeployment(DeploymentStatus.NONE, 1, true);
5757

5858
Mockito.verify(jobImageBuilder, Mockito.times(1))
59-
.buildImage(Mockito.any(), Mockito.any(), Mockito.any(), Mockito.eq(true));
59+
.buildImage(Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any(), Mockito.eq(true));
6060
}
6161

6262
@Test
@@ -66,7 +66,7 @@ public void updateDeployment_withDesiredDeploymentStatusNone_shouldStartDeployme
6666
updateDeployment(DeploymentStatus.NONE, 1, false);
6767

6868
Mockito.verify(jobImageBuilder, Mockito.times(1))
69-
.buildImage(Mockito.any(), Mockito.any(), Mockito.any(), Mockito.eq(false));
69+
.buildImage(Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any(), Mockito.eq(false));
7070
}
7171

7272
private void updateDeployment(
@@ -86,7 +86,8 @@ private void updateDeployment(
8686
DataJob dataJob = new DataJob();
8787
dataJob.setJobConfig(new JobConfig());
8888
Mockito.when(
89-
jobImageBuilder.buildImage(Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any()))
89+
jobImageBuilder.buildImage(
90+
Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any()))
9091
.thenReturn(false);
9192

9293
deploymentService.updateDeployment(

projects/control-service/projects/pipelines_control_service/src/test/java/com/vmware/taurus/service/deploy/JobImageBuilderTest.java

Lines changed: 97 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import com.vmware.taurus.service.credentials.AWSCredentialsService;
2121
import com.vmware.taurus.service.credentials.AWSCredentialsService.AWSCredentialsDTO;
2222
import com.vmware.taurus.service.kubernetes.ControlKubernetesService;
23+
import com.vmware.taurus.service.model.ActualDataJobDeployment;
2324
import com.vmware.taurus.service.model.DataJob;
2425
import com.vmware.taurus.service.model.DesiredDataJobDeployment;
2526
import com.vmware.taurus.service.model.JobConfig;
@@ -99,7 +100,7 @@ public void buildImage_notExist_success() throws InterruptedException, ApiExcept
99100
jobDeployment.setEnabled(true);
100101
jobDeployment.setPythonVersion("3.7");
101102

102-
var result = jobImageBuilder.buildImage("test-image", testDataJob, jobDeployment, true);
103+
var result = jobImageBuilder.buildImage("test-image", testDataJob, jobDeployment, null, true);
103104

104105
verify(kubernetesService)
105106
.createJob(
@@ -143,7 +144,8 @@ public void buildImage_builderRunning_oldBuilderDeleted()
143144
jobDeployment.setEnabled(true);
144145
jobDeployment.setPythonVersion("3.7");
145146

146-
var result = jobImageBuilder.buildImage(TEST_IMAGE_NAME, testDataJob, jobDeployment, true);
147+
var result =
148+
jobImageBuilder.buildImage(TEST_IMAGE_NAME, testDataJob, jobDeployment, null, true);
147149

148150
verify(kubernetesService, times(2)).deleteJob(TEST_BUILDER_IMAGE_NAME);
149151
verify(kubernetesService)
@@ -179,7 +181,8 @@ public void buildImage_imageExists_buildSkipped()
179181
jobDeployment.setEnabled(true);
180182
jobDeployment.setPythonVersion("3.7");
181183

182-
var result = jobImageBuilder.buildImage(TEST_IMAGE_NAME, testDataJob, jobDeployment, true);
184+
var result =
185+
jobImageBuilder.buildImage(TEST_IMAGE_NAME, testDataJob, jobDeployment, null, true);
183186

184187
verify(kubernetesService, never())
185188
.createJob(
@@ -221,7 +224,7 @@ public void buildImage_jobFailed_failure()
221224
jobDeployment.setEnabled(true);
222225
jobDeployment.setPythonVersion("3.7");
223226

224-
var result = jobImageBuilder.buildImage("test-image", testDataJob, jobDeployment, true);
227+
var result = jobImageBuilder.buildImage("test-image", testDataJob, jobDeployment, null, true);
225228

226229
verify(kubernetesService)
227230
.createJob(
@@ -273,7 +276,7 @@ public void buildImage_jobFailed_failure()
273276

274277
ArgumentCaptor<Map<String, String>> captor = ArgumentCaptor.forClass(Map.class);
275278

276-
var result = jobImageBuilder.buildImage("test-image", testDataJob, jobDeployment, true);
279+
var result = jobImageBuilder.buildImage("test-image", testDataJob, jobDeployment, null, true);
277280

278281
verify(kubernetesService)
279282
.createJob(
@@ -310,7 +313,7 @@ public void buildImage_PythonVersionNull_shouldNotCreateCronjob()
310313
jobDeployment.setGitCommitSha("test-commit");
311314
jobDeployment.setEnabled(true);
312315

313-
var result = jobImageBuilder.buildImage("test-image", testDataJob, jobDeployment, true);
316+
var result = jobImageBuilder.buildImage("test-image", testDataJob, jobDeployment, null, true);
314317

315318
verify(supportedPythonVersions, never()).isPythonVersionSupported("3.11");
316319
verify(supportedPythonVersions, never()).getJobBaseImage("3.11");
@@ -337,6 +340,94 @@ public void buildImage_PythonVersionNull_shouldNotCreateCronjob()
337340
Assertions.assertFalse(result);
338341
}
339342

343+
@Test
344+
public void buildImage_imageExistsAndEqualPythonVersions_shouldSkipBuild()
345+
throws InterruptedException, ApiException, IOException {
346+
when(dockerRegistryService.dataJobImageExists(eq(TEST_IMAGE_NAME), Mockito.any()))
347+
.thenReturn(true);
348+
349+
DesiredDataJobDeployment jobDeployment = new DesiredDataJobDeployment();
350+
jobDeployment.setDataJobName(TEST_JOB_NAME);
351+
jobDeployment.setGitCommitSha("test-commit");
352+
jobDeployment.setEnabled(true);
353+
jobDeployment.setPythonVersion("3.7");
354+
355+
ActualDataJobDeployment actualDataJobDeployment = new ActualDataJobDeployment();
356+
actualDataJobDeployment.setPythonVersion("3.7");
357+
358+
var result =
359+
jobImageBuilder.buildImage(
360+
TEST_IMAGE_NAME, testDataJob, jobDeployment, actualDataJobDeployment, true);
361+
362+
verify(kubernetesService, never())
363+
.createJob(
364+
anyString(),
365+
anyString(),
366+
anyBoolean(),
367+
anyBoolean(),
368+
any(),
369+
any(),
370+
any(),
371+
any(),
372+
any(),
373+
any(),
374+
any(),
375+
anyLong(),
376+
anyLong(),
377+
anyLong(),
378+
anyString(),
379+
anyString());
380+
verify(notificationHelper, never())
381+
.verifyBuilderResult(anyString(), any(), any(), any(), anyString(), anyBoolean());
382+
Assertions.assertTrue(result);
383+
}
384+
385+
@Test
386+
public void buildImage_imageExistsAndDifferentPythonVersions_shouldSucceed()
387+
throws InterruptedException, ApiException, IOException {
388+
when(kubernetesService.listJobs()).thenReturn(Collections.emptySet());
389+
var builderJobResult =
390+
new KubernetesService.JobStatusCondition(true, "type", "test-reason", "test-message", 0);
391+
when(kubernetesService.watchJob(any(), anyInt(), any())).thenReturn(builderJobResult);
392+
when(supportedPythonVersions.getJobBaseImage(any())).thenReturn("python:3.7-slim");
393+
when(supportedPythonVersions.getBuilderImage(any())).thenReturn(TEST_BUILDER_IMAGE_NAME);
394+
395+
DesiredDataJobDeployment jobDeployment = new DesiredDataJobDeployment();
396+
jobDeployment.setDataJobName(TEST_JOB_NAME);
397+
jobDeployment.setGitCommitSha("test-commit");
398+
jobDeployment.setEnabled(true);
399+
jobDeployment.setPythonVersion("3.8");
400+
401+
ActualDataJobDeployment actualDataJobDeployment = new ActualDataJobDeployment();
402+
actualDataJobDeployment.setPythonVersion("3.7");
403+
404+
var result =
405+
jobImageBuilder.buildImage(
406+
TEST_IMAGE_NAME, testDataJob, jobDeployment, actualDataJobDeployment, true);
407+
408+
verify(kubernetesService)
409+
.createJob(
410+
eq(TEST_BUILDER_JOB_NAME),
411+
eq(TEST_BUILDER_IMAGE_NAME),
412+
eq(false),
413+
eq(false),
414+
any(),
415+
any(),
416+
any(),
417+
any(),
418+
any(),
419+
any(),
420+
any(),
421+
anyLong(),
422+
anyLong(),
423+
anyLong(),
424+
any(),
425+
any());
426+
427+
verify(kubernetesService).deleteJob(TEST_BUILDER_JOB_NAME);
428+
Assertions.assertTrue(result);
429+
}
430+
340431
private static Map<String, Map<String, String>> generateSupportedPythonVersionsConf() {
341432
return Map.of(
342433
"3.10", Map.of("baseImage", "python:3.10-slim", "vdkImage", "test_vdk_image_3.10"),

0 commit comments

Comments
 (0)