Project-HAMi · hami-robot · Apr 29, 2026 · Apr 23, 2026 · Apr 23, 2026 · Apr 27, 2026
diff --git a/README.md b/README.md
@@ -30,6 +30,18 @@ hami-vnpu-core Soft Slicing Requirements:
 
 - **Ascend Driver Version**: ≥ 25.5
 - **Chip Mode**: enable `device-share` mode on Ascend chips for virtualization
+Below is the English translation of the instructions for enabling `device-share` mode:
+
+**Enabling `device-share` Mode**
+
+**npu-smi set -t device-share -i** *id* **-d** *value* This command is used to set the container sharing mode for all chips on a specified device.
+
+**Parameter Description**
+
+| Type | Description |
+| :--- | :--- |
+| *id* | **Device ID**. The NPU ID found by running the **npu-smi info -l** command is the device ID. |
+| *value* | **Container Enable Status**: Options are disabled or enabled. The default is disabled.<br>0: Disabled<br>1: Enabled |
 
 ## Compile
 
@@ -57,6 +69,16 @@ kubectl label node {ascend-node} ascend=on
 kubectl apply -f ascend-device-configmap.yaml
 ```
 
+#### **Node Custom Configuration Description**
+The `hami-device-node-config` is used to enable hami-vnpu-core for specific nodes within the cluster.
+* By setting `hami-vnpu-core: true`, the specified node will enable soft-partitioning based on `hami-vnpu-core`.
+* Specify the number of virtual devices reported to Kubernetes for each physical chip via the `vDeviceCount` field.
+* Nodes without specific configurations will default to template-based hard-partitioning.
+
+```bash
+kubectl apply -f ascend-device-node-configmap.yaml
+```
+
 ### Deply RuntimeClass
 
 ```bash

diff --git a/README_cn.md b/README_cn.md
@@ -29,6 +29,17 @@ git submodule update --init --recursive
 - Ascend 驱动版本：≥ 25.5
 - 芯片模式：在昇腾芯片上开启 `device-share` 模式以支持虚拟化。
 
+**开启 `device-share`模式**
+
+**npu-smi set -t device-share -i** *id* **-d** *value*  用于设置指定设备的所有芯片的容器共享模式。
+
+**参数说明**
+
+| 类型    | 描述                                                        |
+| ------- | ----------------------------------------------------------- |
+| *id*    | 设备ID。通过**npu-smi info -l**命令查出的NPU ID即为设备ID。 |
+| *value* | 容器使能状态：分为禁用、使能。默认禁用。0：禁用1：使能      |
+
 ## 编译
 
 ```bash
@@ -55,6 +66,13 @@ kubectl label node {ascend-node} ascend=on
 kubectl apply -f ascend-device-configmap.yaml
 ```
 
+#### 节点自定义配置说明
+hami-device-node-config 用于对集群中特定节点的显卡虚拟化策略进行精细化控制。
+通过设置 hami-vnpu-core: true，指定节点将启用基于 hami-vnpu-core 的软切分，通过 vDeviceCount 字段，手动定义每个物理芯片上报给 Kubernetes 的虚拟设备数量；否则走基于模板的硬切分。
+```bash
+kubectl apply -f ascend-device-node-configmap.yaml
+```
+
 ### 部署 RuntimeClass
 
 ```bash

diff --git a/ascend-device-node-configmap.yaml b/ascend-device-node-configmap.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  labels:
+    app.kubernetes.io/component: hami-scheduler
+    app.kubernetes.io/name: hami
+    app.kubernetes.io/instance: hami
+  name: hami-device-node-config
+  namespace: kube-system
+data:
+  node-config.yaml: |-
+    nodes:
+      - name: "cnst-dev-w2"
+        hami-vnpu-core: true
+        vDeviceCount: 8
diff --git a/ascend-device-plugin.yaml b/ascend-device-plugin.yaml
@@ -89,6 +89,10 @@ spec:
               mountPath: /device-config.yaml
               subPath: device-config.yaml
               readOnly: true
+            - mountPath: /node-config.yaml
+              name: ascend-node-config
+              readOnly: true
+              subPath: node-config.yaml
           env:
             - name: NODE_NAME
               valueFrom:
@@ -122,5 +126,8 @@ spec:
         - name: ascend-config
           configMap:
             name: hami-scheduler-device
+        - name: ascend-node-config
+          configMap:
+            name: hami-device-node-config
       nodeSelector:
         ascend: "on"
diff --git a/cmd/main.go b/cmd/main.go
@@ -36,6 +36,7 @@ import (
 var (
 	hwLoglevel            = flag.Int("hw_loglevel", 0, "huawei log level, -1-debug, 0-info, 1-warning, 2-error 3-critical default value: 0")
 	configFile            = flag.String("config_file", "", "config file path")
+	nodeConfigFile = flag.String("node_config_file", "", "node specific config file path") 
 	nodeName              = flag.String("node_name", os.Getenv("NODE_NAME"), "node name")
 	checkIdleVNPUInterval = flag.Int("check_idle_vnpu_interval", 60, "the interval (in seconds) to check idle vNPU and release them")
 )
@@ -136,6 +137,12 @@ func main() {
 	if err != nil {
 		klog.Fatalf("load config failed, error is %v", err)
 	}
+	if *nodeConfigFile != "" {
+		err = mgr.LoadNodeConfig(*nodeConfigFile, *nodeName)
+		if err != nil {
+			klog.Errorf("load node config failed: %v", err)
+		}
+	}
 	server, err := server.NewPluginServer(mgr, *nodeName, *checkIdleVNPUInterval)
 	if err != nil {
 		klog.Fatalf("init PluginServer failed, error is %v", err)

diff --git a/internal/manager/manager.go b/internal/manager/manager.go
@@ -43,6 +43,7 @@ type AscendManager struct {
 	//nodeName string
 	config internal.VNPUConfig
 	devs   []*Device
+	nodeConfig *internal.NodeConfig 
 }
 
 func NewAscendManager() (*AscendManager, error) {
@@ -56,6 +57,25 @@ func NewAscendManager() (*AscendManager, error) {
 	}, nil
 }
 
+func (am *AscendManager) LoadNodeConfig(nodePath string, nodeName string) error {
+	nodeConfigList, err := internal.LoadNodeConfig(nodePath) 
+	if err != nil {
+		klog.Warningf("Failed to load node config from %s: %v", nodePath, err)
+		return err
+	}
+
+	for _, n := range nodeConfigList.Nodes {
+		if n.Name == nodeName {
+			am.nodeConfig = &n
+			klog.Infof("Successfully matched node config for %s: %+v", nodeName, n)
+			return nil
+		}
+	}
+
+	klog.Infof("No specific config found for node %s, will use default settings", nodeName)
+	return nil
+}
+
 func (am *AscendManager) LoadConfig(path string) error {
 	config, err := internal.LoadConfig(path)
 	if err != nil {
@@ -232,3 +252,8 @@ func (am *AscendManager) CleanupIdleVNPUs() error {
 	klog.Infof("Cleanup completed, destroyed %d idle vNPUs", totalCleaned)
 	return nil
 }
+
+
+func (am *AscendManager) GetNodeConfig() *internal.NodeConfig {
+    return am.nodeConfig
+}
diff --git a/internal/server/server.go b/internal/server/server.go
@@ -52,6 +52,7 @@ const (
 	Ascend910CType  = "Ascend910C"
 	VNPUModeAnnotation     = "huawei.com/vnpu-mode"
     VNPUModeHamiCore       = "hami-core"
+	VNPUNodeSelectorAnnotation = "hami-vnpu-core"
 )
 
 var (
@@ -387,6 +388,14 @@ func (ps *PluginServer) registerHAMi() error {
 	annos := make(map[string]string)
 	annos[ps.registerAnno] = device.MarshalNodeDevices(apiDevices)
 	annos[ps.handshakeAnno] = "Reported_" + time.Now().Add(time.Duration(*reportTimeOffset)*time.Second).Format("2006.01.02 15:04:05")
+
+	if ps.mgr.GetNodeConfig() != nil && ps.mgr.GetNodeConfig().HamiVnpuCore {
+        annos[VNPUNodeSelectorAnnotation] = "true"
+        klog.V(4).Infof("Node %s has HamiVnpuCore enabled, patching annotation %s: true", ps.nodeName, VNPUNodeSelectorAnnotation)
+    } else {
+		annos[VNPUNodeSelectorAnnotation] = "false"
+	}
+
 	node, err := util.GetNode(ps.nodeName)
 	if err != nil {
 		return fmt.Errorf("get node %s error: %v", ps.nodeName, err)

diff --git a/internal/vnpu.go b/internal/vnpu.go
@@ -57,3 +57,27 @@ func LoadConfig(path string) (*Config, error) {
 	}
 	return &yamlData, nil
 }
+
+
+type NodeConfig struct {
+	Name         string `json:"name"`
+	HamiVnpuCore bool   `json:"hami-vnpu-core"`
+	VDeviceCount int    `json:"vDeviceCount"`
+}
+
+type NodeListConfig struct {
+	Nodes []NodeConfig `json:"nodes"`
+}
+
+func LoadNodeConfig(path string) (*NodeListConfig, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, err
+	}
+	var yamlData NodeListConfig
+	err = yaml.Unmarshal(data, &yamlData)
+	if err != nil {
+		return nil, err
+	}
+	return &yamlData, nil
+}