44# Prometheus metrics exporter.
55metrics :
66 enabled : false
7- port : 0 # HTTP port for /metrics endpoint (0 = auto-assign free port)
7+ # HTTP port for /metrics endpoint (0 = auto-assign free port)
8+ port : 0
89
910controller :
1011 # User-defined sampler. User can pass sampler instance to overwrite this string config.
@@ -20,37 +21,39 @@ backend:
2021 # SimpleStorage, Yuanrong, MooncakeStore, ...
2122 storage_backend : SimpleStorage
2223
23- # For SimpleStorage:
24+ # SimpleStorage: ZMQ-based in-memory storage for out-of-the-box usage
2425 SimpleStorage :
25- # Total number of samples
26+ # Maximum number of experience samples to hold across all storage units
2627 total_storage_size : 100000
27- # Number of distributed storage units for SimpleStorage backend
28+ # Number of distributed storage units.
29+ # Recommended: >= 2 x number of nodes for load balancing.
2830 num_data_storage_units : 2
2931 # ZMQ Server IP & Ports (automatically generated during init)
3032 zmq_info : null
3133
32- # For MooncakeStore:
34+ # MooncakeStore: high-performance KV-based hierarchical storage
35+ # that supports RDMA transport between GPU and DRAM.
3336 MooncakeStore :
34- # Whether to let TQ automatically init metadata_server.
37+ # Whether TransferQueue should automatically start the Mooncake metadata server.
38+ # WARNING: When set to `true`, TQ will attempt to terminate any existing mooncake_master process.
3539 auto_init : true
36- # Address of the HTTP metadata server
40+ # Address of the metadata coordination server.
3741 metadata_server : localhost:50050
38- # Address of master server
42+ # Address of the Mooncake master server.
3943 master_server_address : localhost:50051
40- # Address of local host. Set to "" to use Ray IP as local host address
44+ # Local host address visible to the Mooncake cluster.
45+ # Set to "" to auto-detect using Ray's node IP.
4146 local_hostname : " "
42- # Protocol for transmission . Choose from: tcp, rdma. (default: tcp)
47+ # Transport protocol . Choose from: tcp, rdma.
4348 protocol : tcp
44- # Memory segment size in bytes for mounting (default: 4GB)
49+ # Global memory segment size in bytes **per client** for mounting (default: 4GB)
4550 global_segment_size : 4294967296
46- # Local buffer size in bytes (default: 1GB)
51+ # Local buffer size in bytes **per client** (default: 1GB)
4752 local_buffer_size : 1073741824
48- # Network device name. Set to "" to let Mooncake to auto-picks devices
53+ # Network device name.
54+ # Set to "" to let Mooncake auto-select available devices.
4955 device_name : " "
5056
51- # For RayStore:
52- RayStore :
53-
5457 # For Yuanrong:
5558 Yuanrong :
5659 # Whether to let TQ automatically init yuanrong
@@ -59,9 +62,9 @@ backend:
5962 worker_port : 31501
6063 # Metastore service port
6164 metastore_port : 2379
62- # If enable npu transport
65+ # Whether to enable npu transport
6366 enable_yr_npu_transport : false
64- # If enable host RDMA (H2H) transport via UCX. Requires RDMA NIC hardware and rdma-core driver.
67+ # Whether to enable host RDMA (H2H) transport via UCX. Requires RDMA NIC hardware and rdma-core driver.
6568 # See https://pages.openeuler.openatom.cn/openyuanrong-datasystem/docs/zh-cn/latest/best_practices/best_practices_for_rdma.html
6669 enable_rdma : false
6770 # UCX env vars passed to dscli subprocess. Precedence: ucx_env_vars > parent env > TQ default (UCX_TLS=rc_x when enable_rdma=true).
@@ -81,3 +84,6 @@ backend:
8184 # ulimit -l unlimited (allow pinning enough memory for RDMA/Ascend)
8285 # Example: "--shared_memory_size_mb 16384 --remote_h2d_device_ids 0,1,2,3 --enable_huge_tlb true"
8386 worker_args : " --shared_memory_size_mb 8192"
87+
88+ # For RayStore:
89+ RayStore:
0 commit comments