diff --git a/Dockerfile b/Dockerfile index 634337e..98a00c5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,5 +21,6 @@ RUN make all FROM $BASE_IMAGE ENV LD_LIBRARY_PATH=/usr/local/Ascend/driver/lib64:/usr/local/Ascend/driver/lib64/driver:/usr/local/Ascend/driver/lib64/common COPY --from=build /build/ascend-device-plugin /usr/local/bin/ascend-device-plugin +COPY ./lib/hami-vnpu-core/ /usr/local/hami-vnpu-core-assets/ ENTRYPOINT ["ascend-device-plugin"] diff --git a/README.md b/README.md index 45ee4ec..a540ac1 100644 --- a/README.md +++ b/README.md @@ -1,110 +1,144 @@ -# Ascend Device Plugin -[![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2FProject-HAMi%2Fascend-device-plugin.svg?type=shield)](https://app.fossa.com/projects/git%2Bgithub.com%2FProject-HAMi%2Fascend-device-plugin?ref=badge_shield) - - -## Introduction - -This Ascend device plugin is implemented for [HAMi](https://github.com/Project-HAMi/HAMi) and [volcano](https://github.com/volcano-sh/volcano) scheduling. - -Memory slicing is supported based on virtualization template, lease available template is automatically used. For detailed information, check [template](./ascend-device-configmap.yaml) - -## Prerequisites - -[ascend-docker-runtime](https://gitcode.com/Ascend/mind-cluster/tree/master/component/ascend-docker-runtime) - -```bash -git submodule add https://gitcode.com/Ascend/mind-cluster.git -``` - -## Compile - -```bash -make all -``` - -### Build - -```bash -docker buildx build -t $IMAGE_NAME . -``` - -## Deployment - -### Label the Node with `ascend=on` - -```bash -kubectl label node {ascend-node} ascend=on -``` - -### Deploy ConfigMap - -```bash -kubectl apply -f ascend-device-configmap.yaml -``` - -### Deply RuntimeClass - -```bash -kubectl apply -f ascend-runtimeclass.yaml -``` - -### Deploy `ascend-device-plugin` - -```bash -kubectl apply -f ascend-device-plugin.yaml -``` - -If scheduling Ascend devices in HAMi, simply set `devices.ascend.enabled` to true when deploying HAMi, and the ConfigMap and `ascend-device-plugin` will be automatically deployed. refer https://github.com/Project-HAMi/HAMi/blob/master/charts/hami/README.md#huawei-ascend - -If you require HAMi to automatically add the `runtimeClassName` configuration to Pods requesting Ascend resources (this is disabled by default), you should set `devices.ascend.runtimeClassName` value to **a non-empty string** in HAMi’s `values.yaml` file, ensuring it matches the name of the `RuntimeClass` resource. For example: - -```yaml -devices: - ascend: - runtimeClassName: ascend -``` - -## Usage - -To exclusively use an entire card or request multiple cards, you only need to set the corresponding resourceName. If multiple tasks need to share the same NPU, you need to set the corresponding resource request to 1 and configure the appropriate ResourceMemoryName. - -### Usage in HAMi - -```yaml -... - containers: - - name: npu_pod - ... - resources: - limits: - huawei.com/Ascend910B: "1" - # if you don't specify Ascend910B-memory, it will use a whole NPU. - huawei.com/Ascend910B-memory: "4096" -``` - -For more examples, see [examples](./examples/) - -### Usage in volcano - -Volcano must be installed prior to usage, for more information see [here](https://github.com/volcano-sh/volcano/tree/master/docs/user-guide/how_to_use_vnpu.md) - -```yaml -apiVersion: v1 -kind: Pod -metadata: - name: ascend-pod -spec: - schedulerName: volcano - containers: - - name: ubuntu-container - image: swr.cn-south-1.myhuaweicloud.com/ascendhub/ascend-pytorch:24.0.RC1-A2-1.11.0-ubuntu20.04 - command: ["sleep"] - args: ["100000"] - resources: - limits: - huawei.com/Ascend310P: "1" - huawei.com/Ascend310P-memory: "4096" - ``` - -## License +# Ascend Device Plugin +[![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2FProject-HAMi%2Fascend-device-plugin.svg?type=shield)](https://app.fossa.com/projects/git%2Bgithub.com%2FProject-HAMi%2Fascend-device-plugin?ref=badge_shield) + + +## Introduction + +This Ascend device plugin is implemented for [HAMi](https://github.com/Project-HAMi/HAMi) and [volcano](https://github.com/volcano-sh/volcano) scheduling. + +#### 1. Template-based Hard Slicing (vNPU) + +Memory slicing is supported based on virtualization template, lease available template is automatically used. For detailed information, check [template](./ascend-device-configmap.yaml) + +#### 2. Soft Slicing with Runtime Interception (hami-vnpu-core) + +This project implements a soft slicing mechanism based on `libvnpu.so` interception and `limiter` token scheduling, enabling fine-grained resource sharing. For detailed information, check [hami-vnpu-core](https://github.com/Project-HAMi/hami-vnpu-core) + +## Prerequisites + +[ascend-docker-runtime](https://gitcode.com/Ascend/mind-cluster/tree/master/component/ascend-docker-runtime) + +update submodule: + +```bash +git submodule update --init --recursive +``` + +hami-vnpu-core Soft Slicing Requirements: + +- **Ascend Driver Version**: ≥ 25.5 +- **Chip Mode**: enable `device-share` mode on Ascend chips for virtualization + +## Compile + +```bash +make all +``` + +### Build + +```bash +docker buildx build -t $IMAGE_NAME . +``` + +## Deployment + +### Label the Node with `ascend=on` + +```bash +kubectl label node {ascend-node} ascend=on +``` + +### Deploy ConfigMap + +```bash +kubectl apply -f ascend-device-configmap.yaml +``` + +### Deply RuntimeClass + +```bash +kubectl apply -f ascend-runtimeclass.yaml +``` + +### Deploy `ascend-device-plugin` + +```bash +kubectl apply -f ascend-device-plugin.yaml +``` + +If scheduling Ascend devices in HAMi, simply set `devices.ascend.enabled` to true when deploying HAMi, and the ConfigMap and `ascend-device-plugin` will be automatically deployed. refer https://github.com/Project-HAMi/HAMi/blob/master/charts/hami/README.md#huawei-ascend + +If you require HAMi to automatically add the `runtimeClassName` configuration to Pods requesting Ascend resources (this is disabled by default), you should set `devices.ascend.runtimeClassName` value to **a non-empty string** in HAMi’s `values.yaml` file, ensuring it matches the name of the `RuntimeClass` resource. For example: + +```yaml +devices: + ascend: + runtimeClassName: ascend +``` + +## Usage + +To exclusively use an entire card or request multiple cards, you only need to set the corresponding resourceName. If multiple tasks need to share the same NPU, you need to set the corresponding resource request to 1 and configure the appropriate ResourceMemoryName. + +### Usage in HAMi + +```yaml +... + containers: + - name: npu_pod + ... + resources: + limits: + huawei.com/Ascend910B: "1" + # if you don't specify Ascend910B-memory, it will use a whole NPU. + huawei.com/Ascend910B-memory: "4096" +``` + +For more examples, see [examples](./examples/) + +### Soft Slicing Configuration (HAMi) + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: ascend-soft-slice-pod + annotations: + huawei.com/vnpu-mode: 'hami-core' # Enables hami-vnpu-core soft-segmentation for this pod +spec: + containers: + - name: npu_pod + ... + resources: + limits: + huawei.com/Ascend910B3: "1" # Request 1 physical NPU + huawei.com/Ascend910B3-memory: "28672" # Request 28Gi memory + huawei.com/Ascend910B3-core: "40" # Request 40% core +``` + +### Usage in volcano + +Volcano must be installed prior to usage, for more information see [here](https://github.com/volcano-sh/volcano/tree/master/docs/user-guide/how_to_use_vnpu.md) + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: ascend-pod +spec: + schedulerName: volcano + containers: + - name: ubuntu-container + image: swr.cn-south-1.myhuaweicloud.com/ascendhub/ascend-pytorch:24.0.RC1-A2-1.11.0-ubuntu20.04 + command: ["sleep"] + args: ["100000"] + resources: + limits: + huawei.com/Ascend310P: "1" + huawei.com/Ascend310P-memory: "4096" +``` + +## License + [![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2FProject-HAMi%2Fascend-device-plugin.svg?type=large)](https://app.fossa.com/projects/git%2Bgithub.com%2FProject-HAMi%2Fascend-device-plugin?ref=badge_large) \ No newline at end of file diff --git a/README_cn.md b/README_cn.md index 64251a0..cd7b1e5 100644 --- a/README_cn.md +++ b/README_cn.md @@ -1,107 +1,142 @@ -# Ascend Device Plugin - -## 说明 - -Ascend device plugin 是用来支持在 [HAMi](https://github.com/Project-HAMi/HAMi) 和 [volcano](https://github.com/volcano-sh/volcano) 中调度昇腾NPU设备. - -昇腾NPU虚拟化切分是通过模板来配置的,在调度时会找到满足显存需求的最小模板来作为容器的显存。各芯片的模板配置信息参考[这里](./ascend-device-configmap.yaml) - -## 环境要求 - -部署 [ascend-docker-runtime](https://gitcode.com/Ascend/mind-cluster/tree/master/component/ascend-docker-runtime) - -克隆子模块 mind-cluster - -```bash -git submodule add https://gitcode.com/Ascend/mind-cluster.git -``` - -## 编译 - -```bash -make all -``` - -### 编译镜像 - -```bash -docker buildx build -t $IMAGE_NAME . -``` - -## 部署 - -### 给 Node 打 ascend 标签 - -```bash -kubectl label node {ascend-node} ascend=on -``` - -### 部署 ConfigMap - -```bash -kubectl apply -f ascend-device-configmap.yaml -``` - -### 部署 RuntimeClass - -```bash -kubectl apply -f ascend-runtimeclass.yaml -``` - -### 部署 `ascend-device-plugin` - -```bash -kubectl apply -f ascend-device-plugin.yaml -``` - -如果要在HAMi中使用升腾NPU, 在部署HAMi时设置 `devices.ascend.enabled` 为 true 会自动部署 ConfigMap 和 `ascend-device-plugin`。 参考 - -如果需要 HAMi 为申请 ascend 资源的 Pod 自动添加 runtimeClassName 配置(默认关闭),则应该在 HAMi 的 values.yaml 文件中配置 `deivces.ascend.runtimeClassName` 为**一个非空字符串**,并且与 RuntimeClass 资源名称保持一致。 例如: - -```yaml -devices: - ascend: - runtimeClassName: ascend -``` - -## 使用 - -如果要独占整卡或者申请多张卡只需要设置对应的 resourceName 即可。如果多个任务要共享同一张卡,需要将 resourceName 设置为1,并且设置对应的 ResourceMemoryName。 - -### 在 HAMi 中使用 - -```yaml -... - containers: - - name: npu_pod - ... - resources: - limits: - huawei.com/Ascend910B: "1" - # 如果不指定显存大小, 就会使用整张卡 - huawei.com/Ascend910B-memory: "4096" -``` - - For more examples, see [examples](./examples/) - -### 在 volcano 中使用 - - 在 volcano 中使用时需要提前部署好 volcano, 更多信息请[参考这里](https://github.com/volcano-sh/volcano/tree/master/docs/user-guide/how_to_use_vnpu.md) - - ```yaml -apiVersion: v1 -kind: Pod -metadata: - name: ascend-pod -spec: - schedulerName: volcano - containers: - - name: ubuntu-container - image: swr.cn-south-1.myhuaweicloud.com/ascendhub/ascend-pytorch:24.0.RC1-A2-1.11.0-ubuntu20.04 - command: ["sleep"] - args: ["100000"] - resources: - limits: - huawei.com/Ascend310P: "1" - huawei.com/Ascend310P-memory: "4096" - ``` +# Ascend Device Plugin + +## 说明 + +Ascend device plugin 是用来支持在 [HAMi](https://github.com/Project-HAMi/HAMi) 和 [volcano](https://github.com/volcano-sh/volcano) 中调度昇腾NPU设备. + +**基于模板的硬切分 (vNPU)** + +支持基于虚拟化模板的显存切分,系统会自动使用最小可用模板。详细信息请参阅 [template](https://www.google.com/search?q=链接)。 + +**基于运行时拦截的软切分 (hami-vnpu-core)** + +实现了基于 `libvnpu.so` 拦截和limiter令牌调度的软切分机制,能够实现精细化的资源共享。详细信息请参阅 [hami-vnpu-core](https://www.google.com/search?q=链接)。 + +## 环境要求 + +部署 [ascend-docker-runtime](https://gitcode.com/Ascend/mind-cluster/tree/master/component/ascend-docker-runtime) + +更新子模块 + +```bash +git submodule update --init --recursive +``` + +**hami-vnpu-core 软切分要求:** + +- Ascend 驱动版本:≥ 25.5 +- 芯片模式:在昇腾芯片上开启 `device-share` 模式以支持虚拟化。 + +## 编译 + +```bash +make all +``` + +### 编译镜像 + +```bash +docker buildx build -t $IMAGE_NAME . +``` + +## 部署 + +### 给 Node 打 ascend 标签 + +```bash +kubectl label node {ascend-node} ascend=on +``` + +### 部署 ConfigMap + +```bash +kubectl apply -f ascend-device-configmap.yaml +``` + +### 部署 RuntimeClass + +```bash +kubectl apply -f ascend-runtimeclass.yaml +``` + +### 部署 `ascend-device-plugin` + +```bash +kubectl apply -f ascend-device-plugin.yaml +``` + +如果要在HAMi中使用升腾NPU, 在部署HAMi时设置 `devices.ascend.enabled` 为 true 会自动部署 ConfigMap 和 `ascend-device-plugin`。 参考 + +如果需要 HAMi 为申请 ascend 资源的 Pod 自动添加 runtimeClassName 配置(默认关闭),则应该在 HAMi 的 values.yaml 文件中配置 `deivces.ascend.runtimeClassName` 为**一个非空字符串**,并且与 RuntimeClass 资源名称保持一致。 例如: + +```yaml +devices: + ascend: + runtimeClassName: ascend +``` + +## 使用 + +如果要独占整卡或者申请多张卡只需要设置对应的 resourceName 即可。如果多个任务要共享同一张卡,需要将 resourceName 设置为1,并且设置对应的 ResourceMemoryName。 + +### 在 HAMi 中使用 + +```yaml +... + containers: + - name: npu_pod + ... + resources: + limits: + huawei.com/Ascend910B: "1" + # 如果不指定显存大小, 就会使用整张卡 + huawei.com/Ascend910B-memory: "4096" +``` + + For more examples, see [examples](./examples/) + +#### 软切分配置 (HAMi) + +YAML + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: ascend-soft-slice-pod + annotations: + huawei.com/vnpu-mode: 'hami-core' # 添加该注解的走hami-vnpu-core软切分 +spec: + containers: + - name: npu_pod + ... + resources: + limits: + huawei.com/Ascend910B3: "1" # 请求 1 块物理 NPU + huawei.com/Ascend910B3-memory: "28672" # 请求 28Gi 显存 + huawei.com/Ascend910B3-core: "40" # 请求 40% 的算力 +``` + + + +### 在 volcano 中使用 + + 在 volcano 中使用时需要提前部署好 volcano, 更多信息请[参考这里](https://github.com/volcano-sh/volcano/tree/master/docs/user-guide/how_to_use_vnpu.md) + + ```yaml +apiVersion: v1 +kind: Pod +metadata: + name: ascend-pod +spec: + schedulerName: volcano + containers: + - name: ubuntu-container + image: swr.cn-south-1.myhuaweicloud.com/ascendhub/ascend-pytorch:24.0.RC1-A2-1.11.0-ubuntu20.04 + command: ["sleep"] + args: ["100000"] + resources: + limits: + huawei.com/Ascend310P: "1" + huawei.com/Ascend310P-memory: "4096" + ``` diff --git a/ascend-device-configmap.yaml b/ascend-device-configmap.yaml index afc2152..9319d20 100644 --- a/ascend-device-configmap.yaml +++ b/ascend-device-configmap.yaml @@ -55,6 +55,7 @@ data: commonWord: Ascend910B3 resourceName: huawei.com/Ascend910B3 resourceMemoryName: huawei.com/Ascend910B3-memory + resourceCoreName: huawei.com/Ascend910B3-core memoryAllocatable: 65536 memoryCapacity: 65536 aiCore: 20 @@ -127,6 +128,7 @@ data: commonWord: Ascend910C resourceName: huawei.com/Ascend910C resourceMemoryName: huawei.com/Ascend910C-memory + resourceCoreName: huawei.com/Ascend910C-core memoryAllocatable: 65536 memoryCapacity: 65536 aiCore: 20 diff --git a/ascend-device-plugin.yaml b/ascend-device-plugin.yaml index c22c5f4..ff3a9c0 100644 --- a/ascend-device-plugin.yaml +++ b/ascend-device-plugin.yaml @@ -81,6 +81,10 @@ spec: mountPath: /var/log/mindx-dl/devicePlugin - name: tmp mountPath: /tmp + - mountPath: /usr/local/hami-shared-region + name: hami-shared-region + - mountPath: /usr/local/hami-vnpu-core + name: hami-vnpu-core - name: ascend-config mountPath: /device-config.yaml subPath: device-config.yaml @@ -107,6 +111,14 @@ spec: - name: tmp hostPath: path: /tmp + - name: hami-shared-region + hostPath: + path: /usr/local/hami-shared-region + type: DirectoryOrCreate + - name: hami-vnpu-core + hostPath: + path: /usr/local/hami-vnpu-core + type: DirectoryOrCreate - name: ascend-config configMap: name: hami-scheduler-device diff --git a/internal/server/server.go b/internal/server/server.go index 8fefc9e..8623ab7 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -25,9 +25,10 @@ import ( "path" "strings" "time" + "strconv" "github.com/Project-HAMi/HAMi/pkg/device" - "github.com/Project-HAMi/HAMi/pkg/device/ascend" + // "github.com/Project-HAMi/HAMi/pkg/device/ascend" "github.com/Project-HAMi/HAMi/pkg/util" "github.com/Project-HAMi/HAMi/pkg/util/nodelock" "github.com/Project-HAMi/ascend-device-plugin/internal/manager" @@ -37,6 +38,10 @@ import ( "k8s.io/apimachinery/pkg/util/json" "k8s.io/klog/v2" "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" + "io" + "path/filepath" + "crypto/sha256" + "encoding/hex" ) const ( @@ -45,6 +50,8 @@ const ( NodeLockAscend = "hami.io/mutex.lock" Ascend910Prefix = "Ascend910" Ascend910CType = "Ascend910C" + VNPUModeAnnotation = "huawei.com/vnpu-mode" + VNPUModeHamiCore = "hami-core" ) var ( @@ -64,6 +71,13 @@ type PluginServer struct { checkIdleVNPUInterval int } +type RuntimeInfo struct { + UUID string `json:"UUID,omitempty"` + Temp string `json:"temp,omitempty"` + Memory *int64 `json:"memory,omitempty"` + Core *int32 `json:"core,omitempty"` +} + func NewPluginServer(mgr *manager.AscendManager, nodeName string, checkIdleVNPUInterval int) (*PluginServer, error) { return &PluginServer{ nodeName: nodeName, @@ -79,7 +93,118 @@ func NewPluginServer(mgr *manager.AscendManager, nodeName string, checkIdleVNPUI }, nil } +// fileSHA256 calculates the SHA256 checksum of the specified file +func fileSHA256(path string) (string, error) { + f, err := os.Open(path) + if err != nil { + return "", err + } + defer f.Close() + + h := sha256.New() + if _, err := io.Copy(h, f); err != nil { + return "", err + } + return hex.EncodeToString(h.Sum(nil)), nil +} + +// Automatically creates directories, sets permissions, and copies core files on the host +func prepareHostResources() error { + klog.Info("Starting host resource preparation for HAMi vNPU core...") + + // 1. Create shared memory directory + sharedRegionPath := "/usr/local/hami-shared-region" + if err := os.MkdirAll(sharedRegionPath, 0777); err != nil { + if !os.IsExist(err) { + return fmt.Errorf("failed to create %s: %v", sharedRegionPath, err) + } + } + if err := os.Chmod(sharedRegionPath, 0777); err != nil { + return fmt.Errorf("failed to chmod %s: %v", sharedRegionPath, err) + } + klog.Infof("Successfully prepared directory: %s", sharedRegionPath) + + // 2. Prepare /usr/local/hami-vnpu-core/ directory + targetDir := "/usr/local/hami-vnpu-core" + if err := os.MkdirAll(targetDir, 0775); err != nil { + return fmt.Errorf("failed to create %s: %v", targetDir, err) + } + + // Specify the in-container assets directory (can be overridden via environment variable, default follows standard DevicePlugin convention) + assetsDir := os.Getenv("HAMI_VNPU_ASSETS_PATH") + if assetsDir == "" { + assetsDir = "/usr/local/hami-vnpu-core-assets" + } + + // Define files to copy: source path in container -> target path on host + filesToCopy := map[string]string{ + "limiter": filepath.Join(targetDir, "limiter"), + "libvnpu.so": filepath.Join(targetDir, "libvnpu.so"), + "ld.so.preload": filepath.Join(targetDir, "ld.so.preload"), + } + + for srcName, destPath := range filesToCopy { + srcPath := filepath.Join(assetsDir, srcName) + + // File already exists, skip if content is consistent + if _, err := os.Stat(destPath); err == nil { + srcSum, err1 := fileSHA256(srcPath) + dstSum, err2 := fileSHA256(destPath) + + if err1 == nil && err2 == nil && srcSum == dstSum { + klog.Infof("✓ %s already up-to-date, skipping", destPath) + continue + } + } + + if err := copyFile(srcPath, destPath); err != nil { + if strings.Contains(err.Error(), "text file busy") { + klog.Warningf("⚠ %s is in use by running process, keeping existing version (safe)", destPath) + continue + } + return fmt.Errorf("failed to copy %s: %v", destPath, err) + } + klog.Infof("✓ Copied %s -> %s", srcPath, destPath) + } + + klog.Info("Host resource preparation completed successfully.") + return nil +} + +// A standard file copy implementation that preserves the original file permissions +func copyFile(src, dst string) error { + srcFile, err := os.Open(src) + if err != nil { + return err + } + defer srcFile.Close() + + dstFile, err := os.Create(dst) + if err != nil { + return err + } + defer dstFile.Close() + + if _, err = io.Copy(dstFile, srcFile); err != nil { + return err + } + + // Sync source file permissions (ensure the limiter binary retains executable permission) + srcInfo, err := srcFile.Stat() + if err != nil { + return err + } + return os.Chmod(dst, srcInfo.Mode()) +} + + func (ps *PluginServer) Start() error { + // Automatically prepare host environment when the plugin starts + if err := prepareHostResources(); err != nil { + klog.Errorf("Failed to prepare host resources: %v. vNPU core functionality will be impaired.", err) + return err + } + ps.stopCh = make(chan interface{}) err := ps.mgr.UpdateDevice() if err != nil { @@ -303,33 +428,43 @@ func (ps *PluginServer) watchAndRegister() { } } -func (ps *PluginServer) parsePodAnnotation(pod *v1.Pod) ([]int32, []string, error) { + +func (ps *PluginServer) parsePodAnnotation(pod *v1.Pod) ([]int32, []string, []*int64, []*int32, error) { anno, ok := pod.Annotations[ps.allocAnno] if !ok { - return nil, nil, fmt.Errorf("annotation %s not set", "huawei.com/Ascend") + return nil, nil,nil, nil, fmt.Errorf("annotation %s not set", "huawei.com/Ascend") } - var rtInfo []ascend.RuntimeInfo + var rtInfo []RuntimeInfo err := json.Unmarshal([]byte(anno), &rtInfo) if err != nil { - return nil, nil, fmt.Errorf("annotation %s value %s invalid", ps.allocAnno, anno) + return nil, nil,nil, nil, fmt.Errorf("annotation %s value %s invalid", ps.allocAnno, anno) } var IDs []int32 var temps []string + var memories []*int64 + var cores []*int32 + for _, info := range rtInfo { if info.UUID == "" { continue } d := ps.mgr.GetDeviceByUUID(info.UUID) if d == nil { - return nil, nil, fmt.Errorf("unknown uuid: %s", info.UUID) + return nil, nil, nil, nil, fmt.Errorf("unknown uuid: %s", info.UUID) } IDs = append(IDs, d.PhyID) temps = append(temps, info.Temp) + if info.Memory != nil { + memories = append(memories, info.Memory) + } + if info.Core != nil { + cores = append(cores, info.Core) + } } if len(IDs) == 0 { - return nil, nil, fmt.Errorf("annotation %s value %s invalid", ps.allocAnno, anno) + return nil, nil, nil, nil, fmt.Errorf("annotation %s value %s invalid", ps.allocAnno, anno) } - return IDs, temps, nil + return IDs, temps, memories, cores, nil } func (ps *PluginServer) apiDevices() []*v1beta1.Device { @@ -389,28 +524,89 @@ func (ps *PluginServer) Allocate(ctx context.Context, reqs *v1beta1.AllocateRequ return nil, fmt.Errorf("get pending pod error: %v", err) } resp := v1beta1.ContainerAllocateResponse{} - IDs, temps, err := ps.parsePodAnnotation(pod) + IDs, temps, memories, cores, err := ps.parsePodAnnotation(pod) if err != nil { return nil, fmt.Errorf("parse pod annotation error: %v", err) } + + vnpuMode := pod.Annotations[VNPUModeAnnotation] + klog.V(4).Infof("Pod %s vnpu mode: %s", pod.Name, vnpuMode) + if len(IDs) == 0 { return nil, fmt.Errorf("empty id from pod annotation") } ascendVisibleDevices := fmt.Sprintf("%d", IDs[0]) - ascendVNPUSpec := "" for i := 1; i < len(IDs); i++ { ascendVisibleDevices = fmt.Sprintf("%s,%d", ascendVisibleDevices, IDs[i]) } - for i := 0; i < len(temps); i++ { - if temps[i] != "" { - ascendVNPUSpec = temps[i] - break - } - } + resp.Envs = make(map[string]string) resp.Envs["ASCEND_VISIBLE_DEVICES"] = ascendVisibleDevices - if ascendVNPUSpec != "" { - resp.Envs["ASCEND_VNPU_SPECS"] = ascendVNPUSpec + + if vnpuMode == VNPUModeHamiCore { + // 1. Handle volume mount injection + var mounts []*v1beta1.Mount + // A.Huawei driver and SMI toolchain (Read-Only) + driverPaths := []string{ + "/usr/local/bin/npu-smi", + "/etc/ascend_install.info", + "/usr/local/Ascend/driver/lib64/driver", + "/usr/local/Ascend/driver/version.info", + } + for _, p := range driverPaths { + mounts = append(mounts, &v1beta1.Mount{HostPath: p, ContainerPath: p, ReadOnly: true}) + } + + mounts = append(mounts, &v1beta1.Mount{ + HostPath: "/usr/local/hami-vnpu-core", + ContainerPath: "/hami-vnpu-core", + ReadOnly: true, + }) + // B. Inject HAMi library path by mounting /etc/ld.so.preload. + mounts = append(mounts, &v1beta1.Mount{ + HostPath: "/usr/local/hami-vnpu-core/ld.so.preload", // Template file on host + ContainerPath: "/etc/ld.so.preload", // Overwrites the target file in container + ReadOnly: true, + }) + + // C. Shared directory for HAMi compute resource partitioning (Read/Write) + mounts = append(mounts, &v1beta1.Mount{ + HostPath: "/usr/local/hami-shared-region", + ContainerPath: "/hami-shared-region", + ReadOnly: false, + }) + resp.Mounts = mounts + + // Set NPU_MEM_QUOTA + if len(memories) > 0 && memories[0] != nil { + resp.Envs["NPU_MEM_QUOTA"] = strconv.FormatInt(*memories[0], 10) + klog.V(4).InfoS("Memory quota set", "value", *memories[0]) + } + + // Set NPU_PRIORITY + if len(cores) > 0 && cores[0] != nil { + resp.Envs["NPU_PRIORITY"] = strconv.FormatInt(int64(*cores[0]), 10) + klog.V(4).InfoS("Core priority set", "value", *cores[0]) + } + + // Set GLOBAL_SHM_PATH separated by device ID. + if len(IDs) > 0 { + resp.Envs["NPU_GLOBAL_SHM_PATH"] = fmt.Sprintf("/hami-shared-region/%d_global_registry", IDs[0]) + klog.V(5).Infof("Create %d_global_registry", IDs[0]) + } else { + klog.Warningf("No device IDs allocated") + } + } else { + ascendVNPUSpec := "" + for i := 0; i < len(temps); i++ { + if temps[i] != "" { + ascendVNPUSpec = temps[i] + break + } + } + if ascendVNPUSpec != "" { + resp.Envs["ASCEND_VNPU_SPECS"] = ascendVNPUSpec + } } klog.V(5).Infof("allocate response: %v", resp) success = true diff --git a/lib/hami-vnpu-core/ld.so.preload b/lib/hami-vnpu-core/ld.so.preload new file mode 100644 index 0000000..eba2ba3 --- /dev/null +++ b/lib/hami-vnpu-core/ld.so.preload @@ -0,0 +1 @@ +/vnpu/libvnpu.so \ No newline at end of file diff --git a/lib/hami-vnpu-core/libvnpu.so b/lib/hami-vnpu-core/libvnpu.so new file mode 100644 index 0000000..ccbbe36 Binary files /dev/null and b/lib/hami-vnpu-core/libvnpu.so differ diff --git a/lib/hami-vnpu-core/limiter b/lib/hami-vnpu-core/limiter new file mode 100644 index 0000000..fc842ea Binary files /dev/null and b/lib/hami-vnpu-core/limiter differ