Skip to content

Commit 39abd44

Browse files
author
jiuyu
committed
Feat: support Runtime Fuse UpdateStrategy - OnIdle
Signed-off-by: 玖宇 <guotongyu.gty@alibaba-inc.com>
1 parent 13ec658 commit 39abd44

309 files changed

Lines changed: 77294 additions & 328 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

api/v1alpha1/constant.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,16 @@ const (
5353
// OnRuntimeDeletedCleanPolicy cleans fuse pod only when the cache runtime is deleted
5454
OnRuntimeDeletedCleanPolicy FuseCleanPolicy = "OnRuntimeDeleted"
5555
)
56+
57+
type FuseUpdateStrategy string
58+
59+
const (
60+
// NoneFuseUpdateStrategy is the default clean policy. It will be transformed to OnDeleteFuseUpdateStrategy automatically.
61+
NoneFuseUpdateStrategy FuseUpdateStrategy = ""
62+
63+
// OnDeleteFuseUpdateStrategy cleans fuse pod once th fuse pod on some node is deleted
64+
OnDeleteFuseUpdateStrategy FuseUpdateStrategy = "OnDelete"
65+
66+
// OnIdleFuseUpdateStrategy cleans fuse pod once th fuse pod on some node is in idle
67+
OnIdleFuseUpdateStrategy FuseUpdateStrategy = "OnIdle"
68+
)

api/v1alpha1/juicefsruntime_types.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,14 @@ type JuiceFSFuseSpec struct {
164164
// +optional
165165
CleanPolicy FuseCleanPolicy `json:"cleanPolicy,omitempty"`
166166

167+
// UpdateStrategy decides when to update Fuse pods.
168+
// Currently Fluid supports two UpdateStrategy: OnDelete and OnIdle
169+
// OnDelete update fuse pod by native daemonset once the fuse pod on some node is deleted
170+
// OnIdle update fuse pod once the fuse pod on some node is in idle
171+
// Defaults to OnDelete
172+
// +optional
173+
UpdateStrategy FuseUpdateStrategy `json:"updateStrategy,omitempty"`
174+
167175
// PodMetadata defines labels and annotations that will be propagated to JuiceFs's pods.
168176
// +optional
169177
PodMetadata PodMetadata `json:"podMetadata,omitempty"`

api/v1alpha1/openapi_generated.go

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

charts/arena-artifacts/.helmignore

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Patterns to ignore when building packages.
2+
# This supports shell glob matching, relative path matching, and
3+
# negation (prefixed with !). Only one pattern per line.
4+
.DS_Store
5+
# Common VCS dirs
6+
.git/
7+
.gitignore
8+
.bzr/
9+
.bzrignore
10+
.hg/
11+
.hgignore
12+
.svn/
13+
# Common backup files
14+
*.swp
15+
*.bak
16+
*.tmp
17+
*.orig
18+
*~
19+
# Various IDEs
20+
.project
21+
.idea/
22+
*.tmproj
23+
.vscode/

charts/arena-artifacts/Chart.yaml

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
apiVersion: v2
2+
name: arena-artifacts
3+
description: A Helm chart for installing arena dependencies
4+
5+
# A chart can be either an 'application' or a 'library' chart.
6+
#
7+
# Application charts are a collection of templates that can be packaged into versioned archives
8+
# to be deployed.
9+
#
10+
# Library charts provide useful utilities or functions for the chart developer. They're included as
11+
# a dependency of application charts to inject those utilities and functions into the rendering
12+
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
13+
type: application
14+
15+
# This is the chart version. This version number should be incremented each time you make changes
16+
# to the chart and its templates, including the app version.
17+
# Versions are expected to follow Semantic Versioning (https://semver.org/)
18+
version: 0.14.0
19+
20+
# This is the version number of the application being deployed. This version number should be
21+
# incremented each time you make changes to the application. Versions are not expected to
22+
# follow Semantic Versioning. They should reflect the version the application is using.
23+
# It is recommended to use it with quotes.
24+
appVersion: 0.14.0
25+
26+
dependencies:
27+
- name: tf-operator
28+
alias: tf
29+
version: 0.1.0
30+
repository: "@tf-operator"
31+
condition: tf.enabled,global.tf.enabled
32+
- name: tf-dashboard
33+
alias: tfdashboard
34+
version: 0.1.0
35+
repository: "@tf-dashbard"
36+
condition: tfdashboard.enabled,global.tfdashboard.enabled
37+
- name: cron-operator
38+
alias: cron
39+
version: 0.1.0
40+
repository: "@cron-operator"
41+
condition: cron.enabled,global.cron.enabled
42+
- name: et-operator
43+
alias: et
44+
version: 0.1.1
45+
repository: "@et-operator"
46+
condition: et.enabled,global.et.enabled
47+
- name: mpi-operator
48+
alias: mpi
49+
version: 0.1.0
50+
repository: "@mpi-operator"
51+
condition: mpi.enabled,global.mpi.enabled
52+
- name: pytorch-operator
53+
alias: pytorch
54+
version: 0.1.0
55+
repository: "@pytorch-operator"
56+
condition: pytorch.enabled,global.pytorch.enabled
57+
- name: gpu-exporter
58+
alias: exporter
59+
version: 0.1.0
60+
repository: "@gpu-exporter"
61+
condition: exporter.enabled,global.exporter.enabled
62+
- name: elastic-job-supervisor
63+
alias: elastic-job-supervisor
64+
version: 0.1.0
65+
repository: "@elastic-job-supervisor"
66+
condition: elastic-job-supervisor.enabled,global.elastic-job-supervisor.enabled
67+
Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
---
2+
apiVersion: apiextensions.k8s.io/v1
3+
kind: CustomResourceDefinition
4+
metadata:
5+
annotations:
6+
controller-gen.kubebuilder.io/version: v0.6.0
7+
git-repo: http://gitlab.alibaba-inc.com/kube-ai/kubedlpro.git
8+
git-branch: feature/k8s-1.22
9+
git-commit: 4f076d22
10+
creationTimestamp: null
11+
name: crons.apps.kubedl.io
12+
spec:
13+
group: apps.kubedl.io
14+
names:
15+
kind: Cron
16+
listKind: CronList
17+
plural: crons
18+
singular: cron
19+
scope: Namespaced
20+
versions:
21+
- additionalPrinterColumns:
22+
- jsonPath: .status.conditions[-1:].type
23+
name: State
24+
type: string
25+
- jsonPath: .metadata.creationTimestamp
26+
name: Age
27+
type: date
28+
name: v1alpha1
29+
schema:
30+
openAPIV3Schema:
31+
description: Cron is the Schema for the crons API
32+
properties:
33+
apiVersion:
34+
description: 'APIVersion defines the versioned schema of this representation
35+
of an object. Servers should convert recognized schemas to the latest
36+
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
37+
type: string
38+
kind:
39+
description: 'Kind is a string value representing the REST resource this
40+
object represents. Servers may infer this from the endpoint the client
41+
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
42+
type: string
43+
metadata:
44+
type: object
45+
spec:
46+
description: CronSpec defines the desired state of Cron
47+
properties:
48+
concurrencyPolicy:
49+
description: 'Specifies how to treat concurrent executions of a Task.
50+
Valid values are: - "Allow" (default): allows CronJobs to run concurrently;
51+
- "Forbid": forbids concurrent runs, skipping next run if previous
52+
run hasn''t finished yet; - "Replace": cancels currently running
53+
job and replaces it with a new one'
54+
type: string
55+
deadline:
56+
description: Deadline is the timestamp that a cron job can keep scheduling
57+
util then.
58+
format: date-time
59+
type: string
60+
historyLimit:
61+
description: The number of finished job history to retain. This is
62+
a pointer to distinguish between explicit zero and not specified.
63+
format: int32
64+
type: integer
65+
schedule:
66+
description: The schedule in Cron format, see https://en.wikipedia.org/wiki/Cron.
67+
type: string
68+
suspend:
69+
description: This flag tells the controller to suspend subsequent
70+
executions, it does not apply to already started executions. Defaults
71+
to false.
72+
type: boolean
73+
template:
74+
description: Specifies the job that will be created when executing
75+
a CronTask.
76+
properties:
77+
apiVersion:
78+
description: 'APIVersion defines the versioned schema of this
79+
representation of an object. Servers should convert recognized
80+
schemas to the latest internal value, and may reject unrecognized
81+
values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
82+
type: string
83+
kind:
84+
description: 'Kind is a string value representing the REST resource
85+
this object represents. Servers may infer this from the endpoint
86+
the client submits requests to. Cannot be updated. In CamelCase.
87+
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
88+
type: string
89+
workload:
90+
description: Workload is the specification of the desired cron
91+
job with specific types.
92+
type: object
93+
x-kubernetes-preserve-unknown-fields: true
94+
type: object
95+
required:
96+
- schedule
97+
- template
98+
type: object
99+
status:
100+
description: CronStatus defines the observed state of Cron
101+
properties:
102+
active:
103+
description: A list of currently running jobs.
104+
items:
105+
description: 'ObjectReference contains enough information to let
106+
you inspect or modify the referred object. --- New uses of this
107+
type are discouraged because of difficulty describing its usage
108+
when embedded in APIs. 1. Ignored fields. It includes many fields
109+
which are not generally honored. For instance, ResourceVersion
110+
and FieldPath are both very rarely valid in actual usage. 2.
111+
Invalid usage help. It is impossible to add specific help for
112+
individual usage. In most embedded usages, there are particular restrictions
113+
like, "must refer only to types A and B" or "UID not honored"
114+
or "name must be restricted". Those cannot be well described
115+
when embedded. 3. Inconsistent validation. Because the usages
116+
are different, the validation rules are different by usage, which
117+
makes it hard for users to predict what will happen. 4. The fields
118+
are both imprecise and overly precise. Kind is not a precise
119+
mapping to a URL. This can produce ambiguity during interpretation
120+
and require a REST mapping. In most cases, the dependency is
121+
on the group,resource tuple and the version of the actual
122+
struct is irrelevant. 5. We cannot easily change it. Because
123+
this type is embedded in many locations, updates to this type will
124+
affect numerous schemas. Don''t make new APIs embed an underspecified
125+
API type they do not control. Instead of using this type, create
126+
a locally provided and used type that is well-focused on your
127+
reference. For example, ServiceReferences for admission registration:
128+
https://github.com/kubernetes/api/blob/release-1.17/admissionregistration/v1/types.go#L533
129+
.'
130+
properties:
131+
apiVersion:
132+
description: API version of the referent.
133+
type: string
134+
fieldPath:
135+
description: 'If referring to a piece of an object instead of
136+
an entire object, this string should contain a valid JSON/Go
137+
field access statement, such as desiredState.manifest.containers[2].
138+
For example, if the object reference is to a container within
139+
a pod, this would take on a value like: "spec.containers{name}"
140+
(where "name" refers to the name of the container that triggered
141+
the event) or if no container name is specified "spec.containers[2]"
142+
(container with index 2 in this pod). This syntax is chosen
143+
only to have some well-defined way of referencing a part of
144+
an object. TODO: this design is not final and this field is
145+
subject to change in the future.'
146+
type: string
147+
kind:
148+
description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
149+
type: string
150+
name:
151+
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
152+
type: string
153+
namespace:
154+
description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
155+
type: string
156+
resourceVersion:
157+
description: 'Specific resourceVersion to which this reference
158+
is made, if any. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency'
159+
type: string
160+
uid:
161+
description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids'
162+
type: string
163+
type: object
164+
type: array
165+
history:
166+
description: History is a list of scheduled cron job with its digest
167+
records.
168+
items:
169+
properties:
170+
created:
171+
description: Created is the creation timestamp of job.
172+
format: date-time
173+
type: string
174+
finished:
175+
description: Finished is the failed or succeeded timestamp of
176+
job.
177+
format: date-time
178+
type: string
179+
object:
180+
description: Object is the reference of the historical scheduled
181+
cron job.
182+
properties:
183+
apiGroup:
184+
description: APIGroup is the group for the resource being
185+
referenced. If APIGroup is not specified, the specified
186+
Kind must be in the core API group. For any other third-party
187+
types, APIGroup is required.
188+
type: string
189+
kind:
190+
description: Kind is the type of resource being referenced
191+
type: string
192+
name:
193+
description: Name is the name of resource being referenced
194+
type: string
195+
required:
196+
- kind
197+
- name
198+
type: object
199+
status:
200+
description: Status is the final status when job finished.
201+
type: string
202+
required:
203+
- object
204+
- status
205+
type: object
206+
type: array
207+
lastScheduleTime:
208+
description: Information when was the last time the job was successfully
209+
scheduled.
210+
format: date-time
211+
type: string
212+
type: object
213+
type: object
214+
served: true
215+
storage: true
216+
subresources:
217+
status: {}
218+
status:
219+
acceptedNames:
220+
kind: ""
221+
plural: ""
222+
conditions: []
223+
storedVersions: []

0 commit comments

Comments
 (0)