1- from typing import Optional , List
1+ from typing import Optional , Union
22
3- from clipped .compact .pydantic import Field
4- from clipped .types .ref_or_obj import IntOrRef
3+ from clipped .compact .pydantic import (
4+ Field ,
5+ field_validator ,
6+ validation_always ,
7+ validation_before ,
8+ )
9+ from clipped .types .ref_or_obj import IntOrRef , RefField
10+ from polyaxon ._k8s import k8s_schemas , k8s_validation
511
612from polyaxon ._schemas .base import BaseSchemaModel
713
814
9- class V1ActivityProbeHttp (BaseSchemaModel ):
10- """HTTP-based activity probe configuration for detecting service activity.
11-
12- Used with service culling to check for activity by polling an HTTP endpoint.
13- Commonly used with Jupyter notebooks to poll the `/api/status` endpoint.
14-
15- Args:
16- path: str, optional - The HTTP path to poll for activity status
17- port: int, optional - The port number where the service is listening
18-
19- ## YAML usage
20-
21- ```yaml
22- >>> probe:
23- >>> http:
24- >>> path: "/api/status"
25- >>> port: 8888
26- ```
27-
28- ## Python usage
29-
30- ```python
31- >>> from polyaxon.schemas import V1ActivityProbeHttp
32- >>> probe = V1ActivityProbeHttp(
33- >>> path="/api/status",
34- >>> port=8888
35- >>> )
36- ```
37-
38- ## Fields
39-
40- ### path
41-
42- The HTTP path to the activity status endpoint. For Jupyter notebooks,
43- this is typically `/api/status` which returns information about
44- last activity, active kernels, and connections.
45-
46- ```yaml
47- >>> probe:
48- >>> http:
49- >>> path: "/api/status"
50- ```
51-
52- ### port
53-
54- The port number where the service is listening. For Jupyter notebooks,
55- this is typically 8888.
56-
57- ```yaml
58- >>> probe:
59- >>> http:
60- >>> port: 8888
61- ```
62- """
63-
64- path : Optional [str ] = None
65- port : Optional [int ] = None
66-
67-
68- class V1ActivityProbeExec (BaseSchemaModel ):
69- """Command-based activity probe configuration for detecting service activity.
70-
71- Used with service culling to check for activity by executing a custom command.
72- The command should return exit code 0 if there was activity, or exit code 1 if idle.
73-
74- Args:
75- command: List[str], optional - The command to execute for checking activity
76-
77- ## YAML usage
78-
79- ```yaml
80- >>> probe:
81- >>> exec:
82- >>> command: ["bash", "-c", "check-activity.sh"]
83- ```
84-
85- ## Python usage
86-
87- ```python
88- >>> from polyaxon.schemas import V1ActivityProbeExec
89- >>> probe = V1ActivityProbeExec(
90- >>> command=["bash", "-c", "check-activity.sh"]
91- >>> )
92- ```
93-
94- ## Fields
95-
96- ### command
97-
98- The command to execute inside the container to check for activity.
99- The command should return:
100- - Exit code 0: Activity detected (service is active)
101- - Exit code 1: No activity detected (service is idle)
102-
103- The command is executed directly (not in a shell) unless you explicitly
104- invoke a shell as shown in the example.
105-
106- ```yaml
107- >>> probe:
108- >>> exec:
109- >>> command: ["bash", "-c", "test -f /tmp/activity && exit 0 || exit 1"]
110- ```
111- """
112-
113- command : Optional [List [str ]] = None
114-
115-
11615class V1ActivityProbe (BaseSchemaModel ):
11716 """Activity probe configuration for detecting service activity during culling checks.
11817
@@ -178,8 +77,21 @@ class V1ActivityProbe(BaseSchemaModel):
17877 ```
17978 """
18079
181- var_exec : Optional [V1ActivityProbeExec ] = Field (None , alias = "exec" )
182- http : Optional [V1ActivityProbeHttp ] = None
80+ _IDENTIFIER = "probe"
81+ _SWAGGER_FIELDS = [
82+ "exec" ,
83+ "http" ,
84+ ]
85+ var_exec : Optional [k8s_schemas .V1ExecAction ] = Field (None , alias = "exec" )
86+ http : Optional [k8s_schemas .V1HTTPGetAction ] = None
87+
88+ @field_validator ("var_exec" , ** validation_always , ** validation_before )
89+ def validate_var_exec (cls , v ):
90+ return k8s_validation .validate_k8s_exec_action (v )
91+
92+ @field_validator ("http" , ** validation_always , ** validation_before )
93+ def validate_http (cls , v ):
94+ return k8s_validation .validate_k8s_http_get_action (v )
18395
18496
18597class V1Culling (BaseSchemaModel ):
@@ -244,6 +156,7 @@ class V1Termination(BaseSchemaModel):
244156 timeout: int, optional
245157 culling: V1Culling, optional
246158 probe: V1ActivityProbe, optional
159+ pod_failure_policy: V1PodFailurePolicy, optional
247160
248161 ## YAML usage
249162
@@ -258,6 +171,11 @@ class V1Termination(BaseSchemaModel):
258171 >>> http:
259172 >>> path: "/api/status"
260173 >>> port: 8888
174+ >>> podFailurePolicy:
175+ >>> rules:
176+ >>> - action: Ignore
177+ >>> onPodConditions:
178+ >>> - type: DisruptionTarget
261179 ```
262180
263181 ## Python usage
@@ -399,12 +317,59 @@ class V1Termination(BaseSchemaModel):
399317
400318 See [services timeout preset documentation](/docs/core/scheduling-presets/services-timeout/)
401319 for detailed examples and use cases.
320+
321+ ### podFailurePolicy
322+
323+ > **Note**: Available from v2.13. Requires Kubernetes v1.25+.
324+
325+ Pod failure policy configuration that defines fine-grained rules for how pod failures
326+ should be handled. This feature allows you to:
327+ - Fail jobs immediately on certain exit codes (non-retriable errors)
328+ - Ignore failures due to involuntary disruptions (preemption, eviction)
329+ - Control which failures count towards the backoff limit
330+
331+ ```yaml
332+ >>> termination:
333+ >>> maxRetries: 3
334+ >>> podFailurePolicy:
335+ >>> rules:
336+ >>> # Fail immediately on exit code 42 (non-retriable error)
337+ >>> - action: FailJob
338+ >>> onExitCodes:
339+ >>> containerName: main
340+ >>> operator: In
341+ >>> values: [42]
342+ >>> # Ignore pod disruptions (preemption, eviction)
343+ >>> - action: Ignore
344+ >>> onPodConditions:
345+ >>> - type: DisruptionTarget
346+ ```
347+
348+ Available actions:
349+ - `FailJob`: Mark the job as failed immediately without further retries
350+ - `Ignore`: Don't count this failure towards the backoff limit
351+ - `Count`: Count towards backoff limit (default behavior)
352+ - `FailIndex`: Fail the index for indexed jobs
353+
354+ See [Kubernetes Pod Failure Policy](https://kubernetes.io/docs/tasks/job/pod-failure-policy/)
355+ for more details.
402356 """
403357
404358 _IDENTIFIER = "termination"
359+ _SWAGGER_FIELDS = [
360+ "podFailurePolicy" ,
361+ ]
362+ _CUSTOM_DUMP_FIELDS = {"probe" }
405363
406364 max_retries : Optional [IntOrRef ] = Field (alias = "maxRetries" , default = None )
407365 ttl : Optional [IntOrRef ] = None
408366 timeout : Optional [IntOrRef ] = None
409367 culling : Optional [V1Culling ] = None
410368 probe : Optional [V1ActivityProbe ] = None
369+ pod_failure_policy : Optional [Union [k8s_schemas .V1PodFailurePolicy , RefField ]] = (
370+ Field (None , alias = "podFailurePolicy" )
371+ )
372+
373+ @field_validator ("pod_failure_policy" , ** validation_always , ** validation_before )
374+ def validate_pod_failure_policy (cls , v ):
375+ return k8s_validation .validate_k8s_pod_failure_policy (v )
0 commit comments