Skip to content

Commit bb9cbf1

Browse files
HIVE-29679: Update Tez AM K8s Operator Auto-Scaling to scale down idle AMs (#6561)
1 parent bdde46e commit bb9cbf1

24 files changed

Lines changed: 579 additions & 65 deletions

llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,6 +1080,9 @@ public void dagComplete() {
10801080
writeLock.lock();
10811081
try {
10821082
dagRunning = false;
1083+
if (metrics != null) {
1084+
metrics.setDagRunning(false);
1085+
}
10831086
dagStats = new StatsPerDag();
10841087
int pendingCount = 0;
10851088
for (Entry<Priority, List<TaskInfo>> entry : pendingTasks.entrySet()) {
@@ -1173,6 +1176,9 @@ public void allocateTask(Object task, Resource capability, String[] hosts, Strin
11731176
metrics.setDagId(id.getDAGID().toString());
11741177
}
11751178
dagRunning = true;
1179+
if (metrics != null) {
1180+
metrics.setDagRunning(true);
1181+
}
11761182
}
11771183
dagStats.registerTaskRequest(hosts, racks);
11781184
addPendingTask(taskInfo);
@@ -1194,6 +1200,9 @@ public void allocateTask(Object task, Resource capability, ContainerId container
11941200
metrics.setDagId(id.getDAGID().toString());
11951201
}
11961202
dagRunning = true;
1203+
if (metrics != null) {
1204+
metrics.setDagRunning(true);
1205+
}
11971206
}
11981207
dagStats.registerTaskRequest(null, null);
11991208
addPendingTask(taskInfo);

llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/metrics/LlapTaskSchedulerInfo.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ public enum LlapTaskSchedulerInfo implements MetricsInfo {
3838
SchedulerRunningTaskCount("Total number of running tasks"),
3939
SchedulerPendingPreemptionTaskCount("Total number of tasks pending for pre-emption"),
4040
SchedulerPreemptedTaskCount("Total number of tasks pre-empted"),
41-
SchedulerCompletedDagCount("Number of DAGs completed");
41+
SchedulerCompletedDagCount("Number of DAGs completed"),
42+
SchedulerDagStatus("Current AM operational DAG status: 0 for IDLE, 1 for RUNNING");
4243

4344
private final String desc;
4445

llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/metrics/LlapTaskSchedulerMetrics.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import static org.apache.hadoop.hive.llap.tezplugins.metrics.LlapTaskSchedulerInfo.SchedulerPendingPreemptionTaskCount;
2626
import static org.apache.hadoop.hive.llap.tezplugins.metrics.LlapTaskSchedulerInfo.SchedulerPendingTaskCount;
2727
import static org.apache.hadoop.hive.llap.tezplugins.metrics.LlapTaskSchedulerInfo.SchedulerPreemptedTaskCount;
28+
import static org.apache.hadoop.hive.llap.tezplugins.metrics.LlapTaskSchedulerInfo.SchedulerDagStatus;
2829
import static org.apache.hadoop.hive.llap.tezplugins.metrics.LlapTaskSchedulerInfo.SchedulerRunningTaskCount;
2930
import static org.apache.hadoop.hive.llap.tezplugins.metrics.LlapTaskSchedulerInfo.SchedulerSchedulableTaskCount;
3031
import static org.apache.hadoop.hive.llap.tezplugins.metrics.LlapTaskSchedulerInfo.SchedulerSuccessfulTaskCount;
@@ -84,6 +85,8 @@ public class LlapTaskSchedulerMetrics implements MetricsSource {
8485
@Metric
8586
MutableCounterInt completedDagcount;
8687
@Metric
88+
MutableGaugeInt dagRunning;
89+
@Metric
8790
MutableCounterInt pendingPreemptionTasksCount;
8891
@Metric
8992
MutableGaugeInt wmUnusedGuaranteedCount;
@@ -276,6 +279,7 @@ private void getTaskSchedulerStats(MetricsRecordBuilder rb) {
276279
.addGauge(SchedulerMemoryPerInstance, memoryPerInstance.value())
277280
.addGauge(SchedulerCpuCoresPerInstance, cpuCoresPerInstance.value())
278281
.addGauge(SchedulerDisabledNodeCount, disabledNodeCount.value())
282+
.addGauge(SchedulerDagStatus, dagRunning.value())
279283
.addCounter(SchedulerPendingTaskCount, pendingTasksCount.value())
280284
.addCounter(SchedulerSchedulableTaskCount, schedulableTasksCount.value())
281285
.addCounter(SchedulerRunningTaskCount, runningTasksCount.value())
@@ -285,6 +289,10 @@ private void getTaskSchedulerStats(MetricsRecordBuilder rb) {
285289
.addCounter(SchedulerCompletedDagCount, completedDagcount.value());
286290
}
287291

292+
public void setDagRunning(boolean running) {
293+
dagRunning.set(running ? 1 : 0);
294+
}
295+
288296
public JvmMetrics getJvmMetrics() {
289297
return jvmMetrics;
290298
}

packaging/src/kubernetes/README.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,7 @@ HS2 routes sessions to clusters server-side based on admin-defined user/group ru
520520

521521
Each LLAP cluster is fully isolated:
522522
- **Separate LLAP daemon StatefulSet** with independent executor count, memory, and replicas
523-
- **Separate TezAM StatefulSet** (one per LLAP cluster) with its own ZooKeeper registration
523+
- **Separate TezAM Deployment** (one per LLAP cluster) with its own ZooKeeper registration
524524
- **Separate autoscaling** — each cluster scales independently based on its own metrics
525525
- **Shared scratch PVC** (ReadWriteMany) for HS2 ↔ TezAM coordination files
526526

@@ -618,11 +618,11 @@ For the above configuration, the operator creates:
618618
| Resource | Name | Purpose |
619619
|----------|------|---------|
620620
| StatefulSet | `hive-production` | LLAP daemons for production cluster |
621-
| StatefulSet | `hive-tezam-production` | TezAM for production cluster |
621+
| Deployment | `hive-tezam-production` | TezAM for production cluster |
622622
| StatefulSet | `hive-analytics` | LLAP daemons for analytics cluster |
623-
| StatefulSet | `hive-tezam-analytics` | TezAM for analytics cluster |
623+
| Deployment | `hive-tezam-analytics` | TezAM for analytics cluster |
624624
| StatefulSet | `hive-dev` | LLAP daemons for dev cluster |
625-
| StatefulSet | `hive-tezam-dev` | TezAM for dev cluster |
625+
| Deployment | `hive-tezam-dev` | TezAM for dev cluster |
626626
| Service (headless) | `hive-production`, `hive-analytics`, `hive-dev` | LLAP daemon discovery |
627627
| Service (headless) | `hive-tezam-production`, `hive-tezam-analytics`, `hive-tezam-dev` | TezAM discovery |
628628
| ConfigMap | `hive-production-config`, etc. | `llap-daemon-site.xml` per cluster |
@@ -1391,7 +1391,7 @@ setup is needed — simply connect to HS2 and the operator wakes LLAP/TezAM as n
13911391

13921392
LLAP is configured as an array (`llapClusters`) to support multi-tenant deployments with
13931393
independent scaling. Each entry creates a separate LLAP StatefulSet, Service, ConfigMap,
1394-
and a paired TezAM StatefulSet (when `tezAm.enabled: true`).
1394+
and a paired TezAM Deployment (when `tezAm.enabled: true`).
13951395

13961396
| Value | Default | Description |
13971397
|-------|---------|-------------|
@@ -1419,7 +1419,7 @@ Clients connect with just their identity — no cluster-specific JDBC URL params
14191419

14201420
### Tez AM
14211421

1422-
TezAM is deployed as one StatefulSet per LLAP cluster. The global `tezAm` section
1422+
TezAM is deployed as one Deployment per LLAP cluster. The global `tezAm` section
14231423
controls shared settings (enabled flag, scratch PVC). Per-LLAP TezAM settings
14241424
(replicas, autoscaling) can be overridden in each `llapClusters[].tezAm` entry.
14251425

@@ -1587,14 +1587,14 @@ HiveClusterReconciler
15871587
|
15881588
+-- [Imperative] Per-LLAP-Cluster Resources (for each llapClusters[] entry):
15891589
+-- LLAP StatefulSet + headless Service + ConfigMap + PDB
1590-
+-- TezAM StatefulSet + headless Service + ConfigMap (one TezAM per LLAP cluster)
1590+
+-- TezAM Deployment + headless Service + ConfigMap (one TezAM per LLAP cluster)
15911591
```
15921592
15931593
LLAP clusters and their paired TezAM instances are managed imperatively by the reconciler
15941594
(not via JOSDK workflow dependents) because the number of clusters is dynamic — determined
15951595
at runtime from the CR spec. Each `llapClusters[]` entry produces:
15961596
- **LLAP**: StatefulSet (`{cluster}-{name}`), headless Service, ConfigMap (`llap-daemon-site.xml`), PDB
1597-
- **TezAM**: StatefulSet (`{cluster}-tezam-{name}`), headless Service, ConfigMap (`tez-site.xml`)
1597+
- **TezAM**: Deployment (`{cluster}-tezam-{name}`), headless Service, ConfigMap (`tez-site.xml`)
15981598
15991599
All imperative resources are applied via `serverSideApply()`. Removed LLAP clusters (and
16001600
their TezAMs) are garbage-collected automatically using label-based discovery.

packaging/src/kubernetes/helm/hive-operator/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ description: Apache Hive Kubernetes Operator - deploys and manages Hive clusters
1919
type: application
2020
version: "4.3.0-SNAPSHOT"
2121
appVersion: "4.3.0-SNAPSHOT"
22-
kubeVersion: ">=1.25.0"
22+
kubeVersion: ">=1.25.0-0"
2323
keywords:
2424
- hive
2525
- hadoop

packaging/src/kubernetes/helm/hive-operator/crds/hiveclusters.hive.apache.org-v1.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,10 @@ spec:
692692
type: string
693693
type: object
694694
x-kubernetes-preserve-unknown-fields: true
695+
serviceAccountName:
696+
description: "Kubernetes ServiceAccount name for all component pods.\
697+
\ If not specified, pods use the namespace default service account."
698+
type: string
695699
suspend:
696700
description: "When true, the cluster is immediately suspended (all\
697701
\ components scaled to 0). Set to false to wake a suspended cluster."

packaging/src/kubernetes/helm/hive-operator/templates/clusterrole.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,7 @@ rules:
5858
- apiGroups: ["policy"]
5959
resources: ["poddisruptionbudgets"]
6060
verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
61+
# EndpointSlices: operator manages a custom per-pod-hostname slice for TezAM DNS
62+
- apiGroups: ["discovery.k8s.io"]
63+
resources: ["endpointslices"]
64+
verbs: ["get", "list", "create", "patch", "delete"]

packaging/src/kubernetes/helm/hive-operator/templates/hivecluster.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ metadata:
2626
spec:
2727
image: {{ .Values.cluster.image }}
2828
imagePullPolicy: {{ .Values.cluster.imagePullPolicy }}
29+
{{- if .Values.cluster.serviceAccountName }}
30+
serviceAccountName: {{ .Values.cluster.serviceAccountName }}
31+
{{- end }}
2932

3033
metastore:
3134
enabled: {{ .Values.cluster.metastore.enabled }}
@@ -178,7 +181,6 @@ spec:
178181
tezAm:
179182
enabled: {{ .Values.cluster.tezAm.enabled }}
180183
{{- if .Values.cluster.tezAm.enabled }}
181-
replicas: {{ .Values.cluster.tezAm.replicas }}
182184
scratchStorageSize: {{ .Values.cluster.tezAm.scratchStorageSize | quote }}
183185
{{- if .Values.cluster.tezAm.scratchStorageClassName }}
184186
scratchStorageClassName: {{ .Values.cluster.tezAm.scratchStorageClassName | quote }}

packaging/src/kubernetes/helm/hive-operator/values.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ cluster:
4747
image: "apache/hive:4.3.0-SNAPSHOT"
4848
imagePullPolicy: IfNotPresent
4949

50+
# ServiceAccount name for all component pods (HS2, Metastore, LLAP, TezAM, schema-init).
51+
# If empty, pods use the namespace default service account.
52+
serviceAccountName: ""
53+
5054
# ---------------------------------------------------------------------------
5155
# DATABASE (Required) — RDBMS for the Hive Metastore backend
5256
# ---------------------------------------------------------------------------
@@ -207,7 +211,6 @@ cluster:
207211
# ---------------------------------------------------------------------------
208212
tezAm:
209213
enabled: true
210-
replicas: 2
211214
scratchStorageSize: "1Gi"
212215
scratchStorageClassName: ""
213216
resources: {}

packaging/src/kubernetes/pom.xml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,6 @@
5252
<artifactId>kubernetes-httpclient-vertx</artifactId>
5353
<version>${fabric8.version}</version>
5454
</dependency>
55-
<dependency>
56-
<groupId>org.apache.commons</groupId>
57-
<artifactId>commons-lang3</artifactId>
58-
</dependency>
5955
<dependency>
6056
<groupId>io.github.java-diff-utils</groupId>
6157
<artifactId>java-diff-utils</artifactId>
@@ -73,6 +69,11 @@
7369
<version>${fabric8.version}</version>
7470
<scope>provided</scope>
7571
</dependency>
72+
<dependency>
73+
<groupId>org.apache.curator</groupId>
74+
<artifactId>curator-framework</artifactId>
75+
<version>${curator.version}</version>
76+
</dependency>
7677
<dependency>
7778
<groupId>org.slf4j</groupId>
7879
<artifactId>slf4j-api</artifactId>

0 commit comments

Comments
 (0)