Skip to content

Commit 061cf28

Browse files
Add stopped state support for pipeline status and stop/cancel AI agents (#27098)
* Add stopped state support for pipeline status and stop/cancel AI agents * Update generated TypeScript types * Stop specific AI agent run by workflow UID instead of killing all runs * Fix HTTP 415 on stop by appending runId as URL param instead of POST body * Address gitar review: validate UUID format, document limit, fix HTTP 415 * Fix killIngestionRun delegation and code review issues * fix lint-src * Improvements around stop/cancel run error handling and robustness * Fix per-run log fetching * gitarbot fixes * add unit test and playwright * address PR review: remove Argo refs, deduplicate stop logic --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent f625881 commit 061cf28

17 files changed

Lines changed: 650 additions & 49 deletions

File tree

openmetadata-service/src/main/java/org/openmetadata/service/clients/pipeline/MeteredPipelineServiceClient.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,25 @@ public Map<String, String> getLastIngestionLogs(
136136
GET_LOGS, () -> this.decoratedClient.getLastIngestionLogs(ingestionPipeline, after));
137137
}
138138

139+
@Override
140+
public Map<String, String> getIngestionLogs(
141+
IngestionPipeline ingestionPipeline, String after, String runId) {
142+
return executeWithMetering(
143+
GET_LOGS, () -> this.decoratedClient.getIngestionLogs(ingestionPipeline, after, runId));
144+
}
145+
139146
public PipelineServiceClientResponse killIngestion(IngestionPipeline ingestionPipeline) {
140147
return this.respondWithMetering(
141148
KILL, () -> this.decoratedClient.killIngestion(ingestionPipeline));
142149
}
143150

151+
@Override
152+
public PipelineServiceClientResponse killIngestionRun(
153+
IngestionPipeline ingestionPipeline, String runId) {
154+
return this.respondWithMetering(
155+
KILL, () -> this.decoratedClient.killIngestionRun(ingestionPipeline, runId));
156+
}
157+
144158
@Override
145159
public String getPlatform() {
146160
return this.decoratedClient.getPlatform();

openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DataInsightSystemChartRepository.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -549,8 +549,13 @@ private AppRunRecord convertPipelineStatusToAppRun(App app, PipelineStatus pipel
549549
case SUCCESS -> AppRunRecord.Status.SUCCESS;
550550
case FAILED, PARTIAL_SUCCESS -> AppRunRecord.Status.FAILED;
551551
case RUNNING -> AppRunRecord.Status.RUNNING;
552+
case STOPPED -> AppRunRecord.Status.STOPPED;
552553
})
553-
.withConfig(pipelineStatus.getConfig());
554+
.withConfig(pipelineStatus.getConfig())
555+
.withProperties(
556+
pipelineStatus.getRunId() != null
557+
? Map.of("pipelineRunId", pipelineStatus.getRunId())
558+
: null);
554559
}
555560

556561
/**

openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/IngestionPipelineRepository.java

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -702,18 +702,58 @@ public PipelineStatus getLatestPipelineStatus(IngestionPipeline ingestionPipelin
702702
}
703703

704704
public PipelineStatus getPipelineStatus(String ingestionPipelineFQN, UUID pipelineStatusRunId) {
705+
return getPipelineStatus(ingestionPipelineFQN, pipelineStatusRunId.toString());
706+
}
707+
708+
public PipelineStatus getPipelineStatus(String ingestionPipelineFQN, String runId) {
705709
IngestionPipeline ingestionPipeline = findByName(ingestionPipelineFQN, Include.NON_DELETED);
706710
return JsonUtils.readValue(
707711
daoCollection
708712
.entityExtensionTimeSeriesDao()
709713
.getExtensionByKey(
710714
RUN_ID_EXTENSION_KEY,
711-
pipelineStatusRunId.toString(),
715+
runId,
712716
ingestionPipeline.getFullyQualifiedName(),
713717
PIPELINE_STATUS_EXTENSION),
714718
PipelineStatus.class);
715719
}
716720

721+
/**
722+
* Upsert only the time-series record for a specific run without overwriting the pipeline-level
723+
* current status. Use this when stopping a specific run while other runs may still be active.
724+
* Inserts a new record if none exists for the runId, otherwise updates the existing one.
725+
*/
726+
@Transaction
727+
public void updatePipelineStatusByRunId(String fqn, PipelineStatus pipelineStatus) {
728+
IngestionPipeline ingestionPipeline = findByName(fqn, Include.NON_DELETED);
729+
String pipelineFqn = ingestionPipeline.getFullyQualifiedName();
730+
String json = JsonUtils.pojoToJson(pipelineStatus);
731+
PipelineStatus storedPipelineStatus =
732+
JsonUtils.readValue(
733+
daoCollection
734+
.entityExtensionTimeSeriesDao()
735+
.getLatestExtensionByKey(
736+
RUN_ID_EXTENSION_KEY,
737+
pipelineStatus.getRunId(),
738+
pipelineFqn,
739+
PIPELINE_STATUS_EXTENSION),
740+
PipelineStatus.class);
741+
if (storedPipelineStatus != null) {
742+
daoCollection
743+
.entityExtensionTimeSeriesDao()
744+
.updateExtensionByKey(
745+
RUN_ID_EXTENSION_KEY,
746+
pipelineStatus.getRunId(),
747+
pipelineFqn,
748+
PIPELINE_STATUS_EXTENSION,
749+
json);
750+
} else {
751+
daoCollection
752+
.entityExtensionTimeSeriesDao()
753+
.insert(pipelineFqn, PIPELINE_STATUS_EXTENSION, PIPELINE_STATUS_JSON_SCHEMA, json);
754+
}
755+
}
756+
717757
@Transaction
718758
public IngestionPipeline deletePipelineStatusByRunId(UUID ingestionPipelineId, UUID runId) {
719759
IngestionPipeline ingestionPipeline = find(ingestionPipelineId, Include.NON_DELETED);

openmetadata-service/src/main/java/org/openmetadata/service/resources/apps/AppResource.java

Lines changed: 160 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import java.util.Map;
4848
import java.util.UUID;
4949
import java.util.concurrent.ExecutorService;
50+
import java.util.concurrent.TimeUnit;
5051
import lombok.extern.slf4j.Slf4j;
5152
import org.openmetadata.schema.ServiceEntityInterface;
5253
import org.openmetadata.schema.api.data.RestoreEntity;
@@ -59,6 +60,7 @@
5960
import org.openmetadata.schema.entity.services.ingestionPipelines.IngestionPipeline;
6061
import org.openmetadata.schema.entity.services.ingestionPipelines.PipelineServiceClientResponse;
6162
import org.openmetadata.schema.entity.services.ingestionPipelines.PipelineStatus;
63+
import org.openmetadata.schema.entity.services.ingestionPipelines.PipelineStatusType;
6264
import org.openmetadata.schema.services.connections.metadata.OpenMetadataConnection;
6365
import org.openmetadata.schema.type.EntityHistory;
6466
import org.openmetadata.schema.type.EntityReference;
@@ -94,6 +96,7 @@
9496
import org.openmetadata.service.util.DeleteEntityResponse;
9597
import org.openmetadata.service.util.EntityUtil;
9698
import org.openmetadata.service.util.OpenMetadataConnectionBuilder;
99+
import org.openmetadata.service.util.PipelineStatusUtils;
97100
import org.openmetadata.service.util.RestUtil;
98101
import org.openmetadata.service.util.WebsocketNotificationHandler;
99102
import org.quartz.SchedulerException;
@@ -374,8 +377,13 @@ protected static AppRunRecord convertPipelineStatus(App app, PipelineStatus pipe
374377
case SUCCESS -> AppRunRecord.Status.SUCCESS;
375378
case FAILED, PARTIAL_SUCCESS -> AppRunRecord.Status.FAILED;
376379
case RUNNING -> AppRunRecord.Status.RUNNING;
380+
case STOPPED -> AppRunRecord.Status.STOPPED;
377381
})
378-
.withConfig(pipelineStatus.getConfig());
382+
.withConfig(pipelineStatus.getConfig())
383+
.withProperties(
384+
pipelineStatus.getRunId() != null
385+
? Map.of("pipelineRunId", pipelineStatus.getRunId())
386+
: null);
379387
}
380388

381389
private ResultList<AppRunRecord> sortRunsByStartTime(ResultList<AppRunRecord> runs) {
@@ -527,7 +535,14 @@ public Response getLastLogs(
527535
schema = @Schema(type = "string"))
528536
@QueryParam("after")
529537
@DefaultValue("")
530-
String after) {
538+
String after,
539+
@Parameter(
540+
description =
541+
"Pipeline run ID to fetch logs for a specific run. "
542+
+ "If not provided, returns logs for the latest run.",
543+
schema = @Schema(type = "string"))
544+
@QueryParam("runId")
545+
String runId) {
531546
App installation = repository.getByName(uriInfo, name, repository.getFields("id,pipelines"));
532547
if (installation.getAppType().equals(AppType.Internal)) {
533548
AppRunRecord latestRun = repository.getLatestAppRunsOptional(installation).orElse(null);
@@ -544,7 +559,7 @@ public Response getLastLogs(
544559
ingestionPipelineRepository.get(
545560
uriInfo, pipelineRef.getId(), ingestionPipelineRepository.getFields(FIELD_OWNERS));
546561
return Response.ok(
547-
pipelineServiceClient.getLastIngestionLogs(ingestionPipeline, after),
562+
pipelineServiceClient.getIngestionLogs(ingestionPipeline, after, runId),
548563
MediaType.APPLICATION_JSON_TYPE)
549564
.build();
550565
}
@@ -1227,7 +1242,12 @@ public Response stopApplicationRun(
12271242
@Context SecurityContext securityContext,
12281243
@Parameter(description = "Name of the App", schema = @Schema(type = "string"))
12291244
@PathParam("name")
1230-
String name) {
1245+
String name,
1246+
@Parameter(
1247+
description = "Pipeline run ID to stop a specific run",
1248+
schema = @Schema(type = "string"))
1249+
@QueryParam("runId")
1250+
String runId) {
12311251
EntityUtil.Fields fields = getFields(String.format("%s,bot,pipelines", FIELD_OWNERS));
12321252
App app = repository.getByName(uriInfo, name, fields);
12331253
OperationContext operationContext = new OperationContext(entityType, MetadataOperation.TRIGGER);
@@ -1241,17 +1261,148 @@ public Response stopApplicationRun(
12411261
.entity("Application stop in progress. Please check status via.")
12421262
.build();
12431263
} else {
1244-
if (!app.getPipelines().isEmpty()) {
1245-
IngestionPipeline ingestionPipeline = getIngestionPipeline(uriInfo, securityContext, app);
1246-
PipelineServiceClientResponse response =
1247-
pipelineServiceClient.killIngestion(ingestionPipeline);
1248-
return Response.status(response.getCode()).entity(response).build();
1264+
if (nullOrEmpty(app.getPipelines())) {
1265+
throw new BadRequestException(
1266+
String.format(
1267+
"Application [%s] supports interrupts but has no associated pipeline configured.",
1268+
name));
1269+
}
1270+
IngestionPipeline ingestionPipeline = getIngestionPipeline(uriInfo, securityContext, app);
1271+
if (runId != null && !runId.isBlank()) {
1272+
return stopSpecificRun(uriInfo, ingestionPipeline, runId);
1273+
} else {
1274+
return stopAllRuns(app, ingestionPipeline);
12491275
}
12501276
}
12511277
}
12521278
throw new BadRequestException("Application does not support Interrupts.");
12531279
}
12541280

1281+
private Response stopSpecificRun(
1282+
UriInfo uriInfo, IngestionPipeline ingestionPipeline, String runId) {
1283+
markPipelineStatusAsStopped(ingestionPipeline, runId);
1284+
PipelineServiceClientResponse killResponse;
1285+
try {
1286+
killResponse = pipelineServiceClient.killIngestionRun(ingestionPipeline, runId);
1287+
} catch (Exception e) {
1288+
LOG.error(
1289+
"Kill request for run [{}] on pipeline [{}] failed after DB update. Workflow may still be running.",
1290+
runId,
1291+
ingestionPipeline.getFullyQualifiedName(),
1292+
e);
1293+
return Response.status(Response.Status.BAD_GATEWAY)
1294+
.entity(
1295+
new PipelineServiceClientResponse()
1296+
.withCode(Response.Status.BAD_GATEWAY.getStatusCode())
1297+
.withReason(e.getMessage())
1298+
.withPlatform(pipelineServiceClient.getPlatform()))
1299+
.build();
1300+
}
1301+
return toStopResponse(killResponse);
1302+
}
1303+
1304+
private Response stopAllRuns(App app, IngestionPipeline ingestionPipeline) {
1305+
Long runStartTime =
1306+
repository
1307+
.getLatestAppRunsOptional(app, ingestionPipeline.getService().getId())
1308+
.map(AppRunRecord::getStartTime)
1309+
.orElse(null);
1310+
markLatestPipelineStatusAsStopped(ingestionPipeline, runStartTime);
1311+
PipelineServiceClientResponse killResponse;
1312+
try {
1313+
killResponse = pipelineServiceClient.killIngestion(ingestionPipeline);
1314+
} catch (Exception e) {
1315+
LOG.error(
1316+
"Kill request for pipeline [{}] failed after DB update. Workflows may still be running.",
1317+
ingestionPipeline.getFullyQualifiedName(),
1318+
e);
1319+
return Response.status(Response.Status.BAD_GATEWAY)
1320+
.entity(
1321+
new PipelineServiceClientResponse()
1322+
.withCode(Response.Status.BAD_GATEWAY.getStatusCode())
1323+
.withReason(e.getMessage())
1324+
.withPlatform(pipelineServiceClient.getPlatform()))
1325+
.build();
1326+
}
1327+
return toStopResponse(killResponse);
1328+
}
1329+
1330+
private void markPipelineStatusAsStopped(IngestionPipeline ingestionPipeline, String runId) {
1331+
IngestionPipelineRepository ingestionPipelineRepository =
1332+
(IngestionPipelineRepository) Entity.getEntityRepository(Entity.INGESTION_PIPELINE);
1333+
try {
1334+
PipelineStatus status =
1335+
ingestionPipelineRepository.getPipelineStatus(
1336+
ingestionPipeline.getFullyQualifiedName(), runId);
1337+
if (status == null) {
1338+
LOG.warn(
1339+
"Pipeline status not found in DB for run [{}] on pipeline [{}]. Proceeding with kill but DB state will remain inconsistent.",
1340+
runId,
1341+
ingestionPipeline.getFullyQualifiedName());
1342+
return;
1343+
}
1344+
if (!PipelineStatusUtils.isTerminalState(status.getPipelineState())) {
1345+
status.setPipelineState(PipelineStatusType.STOPPED);
1346+
status.setEndDate(System.currentTimeMillis());
1347+
// Use updatePipelineStatusByRunId instead of addPipelineStatus to avoid overwriting
1348+
// the pipeline-level current status. When stopping a specific run, other runs may still
1349+
// be active and their status should not be affected.
1350+
ingestionPipelineRepository.updatePipelineStatusByRunId(
1351+
ingestionPipeline.getFullyQualifiedName(), status);
1352+
}
1353+
} catch (Exception e) {
1354+
LOG.error(
1355+
"Failed to mark run [{}] as STOPPED in DB for pipeline [{}]. Kill will proceed but DB status may remain inconsistent.",
1356+
runId,
1357+
ingestionPipeline.getFullyQualifiedName(),
1358+
e);
1359+
}
1360+
}
1361+
1362+
private void markLatestPipelineStatusAsStopped(
1363+
IngestionPipeline ingestionPipeline, Long runStartTime) {
1364+
IngestionPipelineRepository ingestionPipelineRepository =
1365+
(IngestionPipelineRepository) Entity.getEntityRepository(Entity.INGESTION_PIPELINE);
1366+
long now = System.currentTimeMillis();
1367+
long startTs = runStartTime != null ? runStartTime : now - TimeUnit.HOURS.toMillis(1);
1368+
ResultList<PipelineStatus> statuses;
1369+
try {
1370+
statuses =
1371+
ingestionPipelineRepository.listPipelineStatus(
1372+
ingestionPipeline.getFullyQualifiedName(), startTs, now);
1373+
} catch (Exception e) {
1374+
LOG.error(
1375+
"Failed to list pipeline statuses for [{}]. Kill will proceed but DB statuses may remain inconsistent.",
1376+
ingestionPipeline.getFullyQualifiedName(),
1377+
e);
1378+
return;
1379+
}
1380+
for (PipelineStatus status : statuses.getData()) {
1381+
if (status.getRunId() == null || status.getRunId().isBlank()) {
1382+
continue;
1383+
}
1384+
if (!PipelineStatusUtils.isTerminalState(status.getPipelineState())) {
1385+
markPipelineStatusAsStopped(ingestionPipeline, status.getRunId());
1386+
}
1387+
}
1388+
}
1389+
1390+
private Response toStopResponse(PipelineServiceClientResponse killResponse) {
1391+
int code = killResponse.getCode();
1392+
if (code >= 200 && code < 300) {
1393+
return Response.status(code).entity(killResponse).build();
1394+
}
1395+
if (code == 404) {
1396+
LOG.warn(
1397+
"Kill request returned 404 — workflow already completed. DB status already marked STOPPED.");
1398+
return Response.ok(killResponse).build();
1399+
}
1400+
LOG.error(
1401+
"Kill request returned unexpected code [{}]. DB status already marked STOPPED but workflow may still be running.",
1402+
code);
1403+
return Response.status(Response.Status.BAD_GATEWAY).entity(killResponse).build();
1404+
}
1405+
12551406
@POST
12561407
@Path("/deploy/{name}")
12571408
@Operation(
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
* Copyright 2021 Collate
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
* Unless required by applicable law or agreed to in writing, software
8+
* distributed under the License is distributed on an "AS IS" BASIS,
9+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
* See the License for the specific language governing permissions and
11+
* limitations under the License.
12+
*/
13+
14+
package org.openmetadata.service.util;
15+
16+
import java.util.Set;
17+
import org.openmetadata.schema.entity.services.ingestionPipelines.PipelineStatusType;
18+
19+
public final class PipelineStatusUtils {
20+
21+
private static final Set<PipelineStatusType> TERMINAL_STATES =
22+
Set.of(
23+
PipelineStatusType.SUCCESS,
24+
PipelineStatusType.FAILED,
25+
PipelineStatusType.STOPPED,
26+
PipelineStatusType.PARTIAL_SUCCESS);
27+
28+
private PipelineStatusUtils() {}
29+
30+
public static boolean isTerminalState(PipelineStatusType state) {
31+
return state != null && TERMINAL_STATES.contains(state);
32+
}
33+
}

openmetadata-spec/src/main/java/org/openmetadata/sdk/PipelineServiceClientInterface.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,23 @@ default PipelineServiceClientResponse runPipeline(
127127
/* Get the all last run logs of a deployed pipeline */
128128
Map<String, String> getLastIngestionLogs(IngestionPipeline ingestionPipeline, String after);
129129

130+
/* Get logs for a specific pipeline run identified by runId.
131+
* When runId is null or blank, falls back to getLastIngestionLogs (latest run). */
132+
default Map<String, String> getIngestionLogs(
133+
IngestionPipeline ingestionPipeline, String after, String runId) {
134+
return getLastIngestionLogs(ingestionPipeline, after);
135+
}
136+
130137
/* Get the all last run logs of a deployed pipeline */
131138
PipelineServiceClientResponse killIngestion(IngestionPipeline ingestionPipeline);
132139

140+
/* Stop a specific run of a deployed pipeline identified by its run ID.
141+
* Default is a no-op: clients that do not support per-run stopping return success without
142+
* taking any action. The DB status is already marked STOPPED before this is called. */
143+
default PipelineServiceClientResponse killIngestionRun(
144+
IngestionPipeline ingestionPipeline, String runId) {
145+
return new PipelineServiceClientResponse().withCode(200).withPlatform(getPlatform());
146+
}
147+
133148
String getPlatform();
134149
}

openmetadata-spec/src/main/resources/json/schema/entity/services/ingestionPipelines/ingestionPipeline.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
"description": "Pipeline status denotes if its failed or succeeded.",
2727
"type": "string",
2828
"javaType": "org.openmetadata.schema.entity.services.ingestionPipelines.PipelineStatusType",
29-
"enum": ["queued","success","failed","running","partialSuccess"]
29+
"enum": ["queued","success","failed","running","partialSuccess","stopped"]
3030
},
3131
"startDate": {
3232
"description": "startDate of the pipeline run for this particular execution.",

0 commit comments

Comments
 (0)