Skip to content

Commit a78823e

Browse files
authored
fix mesh worker service get status with fault pod (#239)
* fix status with fault pod * update * fix error message
1 parent ed76782 commit a78823e

7 files changed

Lines changed: 239 additions & 48 deletions

File tree

charts/function-mesh-operator/crds/compute.functionmesh.io_functionmeshes.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,6 +1067,8 @@ spec:
10671067
type: string
10681068
type: object
10691069
type: object
1070+
serviceAccountName:
1071+
type: string
10701072
sidecars:
10711073
items:
10721074
properties:
@@ -3240,6 +3242,8 @@ spec:
32403242
type: string
32413243
type: object
32423244
type: object
3245+
serviceAccountName:
3246+
type: string
32433247
sidecars:
32443248
items:
32453249
properties:
@@ -5392,6 +5396,8 @@ spec:
53925396
type: string
53935397
type: object
53945398
type: object
5399+
serviceAccountName:
5400+
type: string
53955401
sidecars:
53965402
items:
53975403
properties:

charts/function-mesh-operator/crds/compute.functionmesh.io_functions.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,6 +1068,8 @@ spec:
10681068
type: string
10691069
type: object
10701070
type: object
1071+
serviceAccountName:
1072+
type: string
10711073
sidecars:
10721074
items:
10731075
properties:

charts/function-mesh-operator/crds/compute.functionmesh.io_sinks.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,6 +1003,8 @@ spec:
10031003
type: string
10041004
type: object
10051005
type: object
1006+
serviceAccountName:
1007+
type: string
10061008
sidecars:
10071009
items:
10081010
properties:

charts/function-mesh-operator/crds/compute.functionmesh.io_sources.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -978,6 +978,8 @@ spec:
978978
type: string
979979
type: object
980980
type: object
981+
serviceAccountName:
982+
type: string
981983
sidecars:
982984
items:
983985
properties:

mesh-worker-service/src/main/java/io/functionmesh/compute/rest/api/FunctionsImpl.java

Lines changed: 75 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import io.functionmesh.compute.util.KubernetesUtils;
3232
import io.grpc.ManagedChannel;
3333
import io.grpc.ManagedChannelBuilder;
34+
import io.kubernetes.client.openapi.models.V1ContainerState;
3435
import io.kubernetes.client.openapi.models.V1ContainerStatus;
3536
import io.kubernetes.client.openapi.models.V1Pod;
3637
import io.kubernetes.client.openapi.models.V1PodList;
@@ -368,23 +369,28 @@ public FunctionStatus getFunctionStatus(final String tenant,
368369
componentName);
369370
throw new RestException(Response.Status.NOT_FOUND, "no ServiceName exists");
370371
}
371-
if (v1StatefulSet.getStatus() != null && v1StatefulSet.getStatus().getReplicas() != null
372-
&& v1StatefulSet.getStatus().getReadyReplicas() != null) {
373-
functionStatus.setNumInstances(v1StatefulSet.getStatus().getReplicas());
374-
functionStatus.setNumRunning(v1StatefulSet.getStatus().getReadyReplicas());
375-
for (int i = 0; i < v1StatefulSet.getStatus().getReplicas(); i++) {
376-
FunctionStatus.FunctionInstanceStatus functionInstanceStatus = new FunctionStatus.FunctionInstanceStatus();
377-
FunctionStatus.FunctionInstanceStatus.FunctionInstanceStatusData functionInstanceStatusData = new FunctionStatus.FunctionInstanceStatus.FunctionInstanceStatusData();
378-
functionInstanceStatus.setInstanceId(i);
379-
functionInstanceStatus.setStatus(functionInstanceStatusData);
380-
functionStatus.addInstance(functionInstanceStatus);
372+
if (v1StatefulSet.getStatus() != null) {
373+
Integer replicas = v1StatefulSet.getStatus().getReplicas();
374+
if (replicas != null) {
375+
functionStatus.setNumInstances(replicas);
376+
for (int i = 0; i < replicas; i++) {
377+
FunctionStatus.FunctionInstanceStatus functionInstanceStatus = new FunctionStatus.FunctionInstanceStatus();
378+
FunctionStatus.FunctionInstanceStatus.FunctionInstanceStatusData functionInstanceStatusData = new FunctionStatus.FunctionInstanceStatus.FunctionInstanceStatusData();
379+
functionInstanceStatus.setInstanceId(i);
380+
functionInstanceStatus.setStatus(functionInstanceStatusData);
381+
functionStatus.addInstance(functionInstanceStatus);
382+
}
383+
if (v1StatefulSet.getStatus().getReadyReplicas() != null) {
384+
functionStatus.setNumRunning(v1StatefulSet.getStatus().getReadyReplicas());
385+
}
381386
}
382387
} else {
383-
log.warn(
388+
log.error(
384389
"no StatefulSet status exists when get status of function {}/{}/{}",
385390
tenant,
386391
namespace,
387392
componentName);
393+
throw new RestException(Response.Status.NOT_FOUND, "no StatefulSet status exists");
388394
}
389395
V1PodList podList = worker().getCoreV1Api().listNamespacedPod(
390396
nameSpaceName, null, null, null, null,
@@ -395,13 +401,13 @@ public FunctionStatus getFunctionStatus(final String tenant,
395401
filter(KubernetesUtils::isPodRunning).collect(Collectors.toList());
396402
List<V1Pod> pendingPods = podList.getItems().stream().
397403
filter(pod -> !KubernetesUtils.isPodRunning(pod)).collect(Collectors.toList());
404+
final String finalStatefulSetName = statefulSetName;
398405
if (!runningPods.isEmpty()) {
399406
int podsCount = runningPods.size();
400407
ManagedChannel[] channel = new ManagedChannel[podsCount];
401408
InstanceControlGrpc.InstanceControlFutureStub[] stub =
402409
new InstanceControlGrpc.InstanceControlFutureStub[podsCount];
403410
final String finalSubdomain = subdomain;
404-
final String finalStatefulSetName = statefulSetName;
405411
Set<CompletableFuture<InstanceCommunication.FunctionStatus>> completableFutureSet = new HashSet<>();
406412
runningPods.forEach(pod -> {
407413
String podName = KubernetesUtils.getPodName(pod);
@@ -412,7 +418,13 @@ public FunctionStatus getFunctionStatus(final String tenant,
412418
log.warn("shardId invalid {}", podName);
413419
return;
414420
}
415-
FunctionStatus.FunctionInstanceStatus functionInstanceStatus = functionStatus.getInstances().get(shardId);
421+
FunctionStatus.FunctionInstanceStatus functionInstanceStatus = null;
422+
for (FunctionStatus.FunctionInstanceStatus ins : functionStatus.getInstances()) {
423+
if (ins.getInstanceId() == shardId) {
424+
functionInstanceStatus = ins;
425+
break;
426+
}
427+
}
416428
if (functionInstanceStatus != null) {
417429
FunctionStatus.FunctionInstanceStatus.FunctionInstanceStatusData functionInstanceStatusData = functionInstanceStatus.getStatus();
418430
V1PodStatus podStatus = pod.getStatus();
@@ -451,6 +463,12 @@ public FunctionStatus getFunctionStatus(final String tenant,
451463
}
452464
});
453465
completableFutureSet.add(future);
466+
} else {
467+
log.error("Get function {}-{} status failed from namespace {}, cannot find status for shardId {}",
468+
finalStatefulSetName,
469+
shardId,
470+
nameSpaceName,
471+
shardId);
454472
}
455473
});
456474
completableFutureSet.forEach(CompletableFuture::join);
@@ -463,13 +481,54 @@ public FunctionStatus getFunctionStatus(final String tenant,
463481
log.warn("shardId invalid {}", podName);
464482
return;
465483
}
466-
FunctionStatus.FunctionInstanceStatus functionInstanceStatus = functionStatus.getInstances().get(shardId);
484+
FunctionStatus.FunctionInstanceStatus functionInstanceStatus = null;
485+
for (FunctionStatus.FunctionInstanceStatus ins : functionStatus.getInstances()) {
486+
if (ins.getInstanceId() == shardId) {
487+
functionInstanceStatus = ins;
488+
break;
489+
}
490+
}
467491
if (functionInstanceStatus != null) {
468492
FunctionStatus.FunctionInstanceStatus.FunctionInstanceStatusData functionInstanceStatusData = functionInstanceStatus.getStatus();
469493
V1PodStatus podStatus = pod.getStatus();
470-
if (podStatus != null && StringUtils.isNotEmpty(podStatus.getPhase())) {
471-
functionInstanceStatusData.setError(podStatus.getPhase());
494+
if (podStatus != null) {
495+
List<V1ContainerStatus> containerStatuses = podStatus.getContainerStatuses();
496+
if (containerStatuses != null && !containerStatuses.isEmpty()) {
497+
V1ContainerStatus containerStatus = null;
498+
for (V1ContainerStatus s : containerStatuses){
499+
if (s.getImage().contains(v1alpha1Function.getSpec().getImage())) {
500+
containerStatus = s;
501+
break;
502+
}
503+
}
504+
if (containerStatus != null) {
505+
V1ContainerState state = containerStatus.getState();
506+
if (state != null && state.getTerminated() != null) {
507+
functionInstanceStatusData.setError(state.getTerminated().getMessage());
508+
} else if (state != null && state.getWaiting() != null) {
509+
functionInstanceStatusData.setError(state.getWaiting().getMessage());
510+
} else {
511+
V1ContainerState lastState = containerStatus.getLastState();
512+
if (lastState != null && lastState.getTerminated() != null) {
513+
functionInstanceStatusData.setError(lastState.getTerminated().getMessage());
514+
} else if (lastState != null && lastState.getWaiting() != null) {
515+
functionInstanceStatusData.setError(lastState.getWaiting().getMessage());
516+
}
517+
}
518+
if (containerStatus.getRestartCount() != null) {
519+
functionInstanceStatusData.setNumRestarts(containerStatus.getRestartCount());
520+
}
521+
} else {
522+
functionInstanceStatusData.setError(podStatus.getPhase());
523+
}
524+
}
472525
}
526+
} else {
527+
log.error("Get function {}-{} status failed from namespace {}, cannot find status for shardId {}",
528+
finalStatefulSetName,
529+
shardId,
530+
nameSpaceName,
531+
shardId);
473532
}
474533
});
475534
}

0 commit comments

Comments
 (0)