Skip to content

Commit 4afb7ed

Browse files
Merge pull request #114 from DSFans2014/feat/sync-v0.19.0
sync nvidia/k8s-device-plugin v0.19.0
2 parents 38a5c30 + 7c6afed commit 4afb7ed

128 files changed

Lines changed: 15167 additions & 3286 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,14 @@ You can set the sharing mode and customize your installation by adjusting the [c
135135

136136
Once you have enabled this option on *all* the GPU nodes you wish to use,
137137
you can then enable GPU support in your cluster by deploying the following Daemonset:
138+
#### Normal Mode
138139

139140
```
140-
$ kubectl create -f volcano-vgpu-device-plugin.yml
141+
$ kubectl apply -f volcano-vgpu-device-plugin.yml
142+
```
143+
#### CDI Mode
144+
```
145+
$ kubectl apply -f volcano-vgpu-device-plugin-cdi.yml
141146
```
142147
143148
### Verify environment is ready

api/config/v1/config.go

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
/*
2+
* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package v1
18+
19+
import (
20+
"fmt"
21+
"io"
22+
"os"
23+
24+
cli "github.com/urfave/cli/v2"
25+
"k8s.io/klog/v2"
26+
27+
"sigs.k8s.io/yaml"
28+
)
29+
30+
// Version indicates the version of the 'Config' struct used to hold configuration information.
31+
const Version = "v1"
32+
33+
// Config is a versioned struct used to hold configuration information.
34+
type Config struct {
35+
Version string `json:"version" yaml:"version"`
36+
Flags Flags `json:"flags,omitempty" yaml:"flags,omitempty"`
37+
Resources Resources `json:"resources,omitempty" yaml:"resources,omitempty"`
38+
Sharing Sharing `json:"sharing,omitempty" yaml:"sharing,omitempty"`
39+
Imex Imex `json:"imex,omitempty" yaml:"imex,omitempty"`
40+
}
41+
42+
// NewConfig builds out a Config struct from a config file (or command line flags).
43+
// The data stored in the config will be populated in order of precedence from
44+
// (1) command line, (2) environment variable, (3) config file.
45+
func NewConfig(c *cli.Context, flags []cli.Flag) (*Config, error) {
46+
config := &Config{Version: Version}
47+
48+
if configFile := c.String("config-file"); configFile != "" {
49+
var err error
50+
config, err = parseConfig(configFile)
51+
if err != nil {
52+
return nil, fmt.Errorf("unable to parse config file: %v", err)
53+
}
54+
}
55+
56+
config.Flags.UpdateFromCLIFlags(c, flags)
57+
// TODO: This is currently not at the flags level?
58+
// Does this mean that we should move UpdateFromCLIFlags to function off Config?
59+
if c.IsSet("imex-channel-ids") {
60+
config.Imex.ChannelIDs = c.IntSlice("imex-channel-ids")
61+
}
62+
if c.IsSet("imex-required") {
63+
config.Imex.Required = c.Bool("imex-required")
64+
}
65+
66+
// If nvidiaDevRoot (the path to the device nodes on the host) is not set,
67+
// we default to using the driver root on the host.
68+
if config.Flags.NvidiaDevRoot == nil || *config.Flags.NvidiaDevRoot == "" {
69+
config.Flags.NvidiaDevRoot = config.Flags.NvidiaDriverRoot
70+
}
71+
72+
// We explicitly set sharing.mps.failRequestsGreaterThanOne = true
73+
// This can be relaxed in certain cases -- such as a single GPU -- but
74+
// requires additional logic around when it's OK to combine requests and
75+
// makes the semantics of a request unclear.
76+
if config.Sharing.MPS != nil {
77+
config.Sharing.MPS.FailRequestsGreaterThanOne = true
78+
}
79+
80+
return config, nil
81+
}
82+
83+
// DisableResourceNamingInConfig temporarily disable the resource renaming feature of the plugin.
84+
// This may be reenabled in a future release.
85+
func DisableResourceNamingInConfig(config *Config) {
86+
// Disable resource renaming through config.Resource
87+
if len(config.Resources.GPUs) > 0 || len(config.Resources.MIGs) > 0 {
88+
klog.Warning("Customizing the 'resources' field is not yet supported in the config. Ignoring...")
89+
}
90+
config.Resources.GPUs = nil
91+
config.Resources.MIGs = nil
92+
93+
// Disable renaming / device selection in Sharing.TimeSlicing.Resources
94+
config.Sharing.TimeSlicing.disableResoureRenaming("timeSlicing")
95+
// Disable renaming / device selection in Sharing.MPS.Resources
96+
config.Sharing.MPS.disableResoureRenaming("mps")
97+
}
98+
99+
// parseConfig parses a config file as either YAML of JSON and unmarshals it into a Config struct.
100+
func parseConfig(configFile string) (*Config, error) {
101+
reader, err := os.Open(configFile)
102+
if err != nil {
103+
return nil, fmt.Errorf("error opening config file: %v", err)
104+
}
105+
defer reader.Close()
106+
107+
config, err := parseConfigFrom(reader)
108+
if err != nil {
109+
return nil, fmt.Errorf("error parsing config file: %v", err)
110+
}
111+
112+
return config, nil
113+
}
114+
115+
func parseConfigFrom(reader io.Reader) (*Config, error) {
116+
var err error
117+
var configYaml []byte
118+
119+
configYaml, err = io.ReadAll(reader)
120+
if err != nil {
121+
return nil, fmt.Errorf("read error: %v", err)
122+
}
123+
124+
var config Config
125+
err = yaml.Unmarshal(configYaml, &config)
126+
if err != nil {
127+
return nil, fmt.Errorf("unmarshal error: %v", err)
128+
}
129+
130+
if config.Version == "" {
131+
config.Version = Version
132+
}
133+
134+
if config.Version != Version {
135+
return nil, fmt.Errorf("unknown version: %v", config.Version)
136+
}
137+
138+
return &config, nil
139+
}

api/config/v1/consts.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package v1
18+
19+
import (
20+
cdiapi "tags.cncf.io/container-device-interface/pkg/cdi"
21+
)
22+
23+
// Constants related to resource names
24+
const (
25+
ResourceNamePrefix = "volcano.sh"
26+
// ResourceNamePrefix = "nvidia.com"
27+
DefaultSharedResourceNameSuffix = ".shared"
28+
MaxResourceNameLength = 63
29+
)
30+
31+
// Constants representing the various MIG strategies
32+
const (
33+
MigStrategyNone = "none"
34+
MigStrategySingle = "single"
35+
MigStrategyMixed = "mixed"
36+
)
37+
38+
// Constants to represent the various device list strategies
39+
const (
40+
DeviceListStrategyEnvVar = "envvar"
41+
DeviceListStrategyVolumeMounts = "volume-mounts"
42+
DeviceListStrategyCDIAnnotations = "cdi-annotations"
43+
DeviceListStrategyCDICRI = "cdi-cri"
44+
)
45+
46+
// Constants to represent the various device id strategies
47+
const (
48+
DeviceIDStrategyUUID = "uuid"
49+
DeviceIDStrategyIndex = "index"
50+
)
51+
52+
// Constants related to generating CDI specifications
53+
const (
54+
DefaultCDIAnnotationPrefix = cdiapi.AnnotationPrefix
55+
DefaultNvidiaCTKPath = "/usr/bin/nvidia-ctk"
56+
DefaultContainerDriverRoot = "/driver-root"
57+
)

api/config/v1/duration.go

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package v1
18+
19+
import (
20+
"encoding/json"
21+
"fmt"
22+
"math"
23+
"time"
24+
)
25+
26+
// Duration wraps a time.Duration function with custom JSON marshaling/unmarshaling
27+
type Duration time.Duration
28+
29+
// IsInfinite returns true if the duration represents an infinite sleep interval.
30+
func (d *Duration) IsInfinite() bool {
31+
return d != nil && time.Duration(*d) == math.MaxInt64
32+
}
33+
34+
// String returns a human-readable representation of the duration.
35+
func (d Duration) String() string {
36+
if d.IsInfinite() {
37+
return "infinite"
38+
}
39+
return time.Duration(d).String()
40+
}
41+
42+
// MarshalJSON marshals 'Duration' to its raw bytes representation
43+
func (d Duration) MarshalJSON() ([]byte, error) {
44+
return json.Marshal(d.String())
45+
}
46+
47+
// UnmarshalJSON unmarshals raw bytes into a 'Duration' type.
48+
func (d *Duration) UnmarshalJSON(b []byte) error {
49+
var v interface{}
50+
if err := json.Unmarshal(b, &v); err != nil {
51+
return err
52+
}
53+
switch value := v.(type) {
54+
case float64:
55+
*d = Duration(time.Duration(value))
56+
return nil
57+
case string:
58+
return d.parse(value)
59+
default:
60+
return fmt.Errorf("invalid duration")
61+
}
62+
}
63+
64+
// parse parses a duration string, handling the special "infinite" value.
65+
func (d *Duration) parse(value string) error {
66+
if value == "infinite" {
67+
*d = Duration(math.MaxInt64)
68+
return nil
69+
}
70+
parsed, err := time.ParseDuration(value)
71+
if err != nil {
72+
return err
73+
}
74+
*d = Duration(parsed)
75+
return nil
76+
}
77+
78+
// DurationValue implements cli.Generic for parsing duration flags with "infinite" support
79+
type DurationValue struct {
80+
Value *Duration
81+
}
82+
83+
// NewDurationValue creates a new DurationValue with the given default duration
84+
func NewDurationValue(d time.Duration) *DurationValue {
85+
duration := Duration(d)
86+
return &DurationValue{Value: &duration}
87+
}
88+
89+
// Set implements cli.Generic
90+
func (d *DurationValue) Set(value string) error {
91+
return d.Value.parse(value)
92+
}
93+
94+
// String implements cli.Generic
95+
func (d *DurationValue) String() string {
96+
if d.Value == nil {
97+
return ""
98+
}
99+
if d.Value.IsInfinite() {
100+
return "infinite"
101+
}
102+
return time.Duration(*d.Value).String()
103+
}

0 commit comments

Comments
 (0)