Skip to content

Commit f3a9a4b

Browse files
authored
[chore] Optimize config descriptions for better understanding (#109)
As title --------- Signed-off-by: 0oshowero0 <o0shower0o@outlook.com>
1 parent dc7d203 commit f3a9a4b

2 files changed

Lines changed: 45 additions & 33 deletions

File tree

scripts/performance_test/perftest_config.yaml

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# Prometheus metrics exporter.
55
metrics:
66
enabled: true
7+
# HTTP port for /metrics endpoint (0 = auto-assign free port)
78
port: 0
89

910
controller:
@@ -20,37 +21,39 @@ backend:
2021
# SimpleStorage, Yuanrong, MooncakeStore, ...
2122
storage_backend: SimpleStorage
2223

23-
# For SimpleStorage:
24+
# SimpleStorage: ZMQ-based in-memory storage for out-of-the-box usage
2425
SimpleStorage:
25-
# Total number of samples
26+
# Maximum number of experience samples to hold across all storage units
2627
total_storage_size: 100000
27-
# Number of distributed storage units for SimpleStorage backend
28+
# Number of distributed storage units.
29+
# Recommended: >= 2 x number of nodes for load balancing.
2830
num_data_storage_units: 16
2931
# ZMQ Server IP & Ports (automatically generated during init)
3032
zmq_info: null
3133

32-
# For MooncakeStore:
34+
# MooncakeStore: high-performance KV-based hierarchical storage
35+
# that supports RDMA transport between GPU and DRAM.
3336
MooncakeStore:
34-
# Whether to let TQ automatically init metadata_server.
37+
# Whether TransferQueue should automatically start the Mooncake metadata server.
38+
# WARNING: When set to `true`, TQ will attempt to terminate any existing mooncake_master process.
3539
auto_init: true
36-
# Address of the HTTP metadata server
40+
# Address of the metadata coordination server.
3741
metadata_server: localhost:50050
38-
# Address of master server
42+
# Address of the Mooncake master server.
3943
master_server_address: localhost:50051
40-
# Address of local host. Set to "" to use Ray IP as local host address
44+
# Local host address visible to the Mooncake cluster.
45+
# Set to "" to auto-detect using Ray's node IP.
4146
local_hostname: ""
42-
# Protocol for transmission. Choose from: tcp, rdma. (default: rdma)
47+
# Transport protocol. Choose from: tcp, rdma.
4348
protocol: rdma
44-
# Memory segment size in bytes for mounting
49+
# Global memory segment size in bytes **per client** for mounting (default: 4GB)
4550
global_segment_size: 86294967296
46-
# Local buffer size in bytes
51+
# Local buffer size in bytes **per client** (default: 1GB)
4752
local_buffer_size: 86294967296
48-
# Network device name. Set to "" to let Mooncake to auto-picks devices
53+
# Network device name.
54+
# Set to "" to let Mooncake auto-select available devices.
4955
device_name: ""
5056

51-
# For RayStore:
52-
RayStore:
53-
5457
# For Yuanrong:
5558
Yuanrong:
5659
# Whether to let TQ automatically init yuanrong
@@ -67,3 +70,6 @@ backend:
6770
# --enable_huge_tlb Enable huge page memory to improve performance. Required for >21GB shared memory on 910B.
6871
# Example: "--shared_memory_size_mb 16384 --remote_h2d_device_ids 0,1,2,3 --enable_huge_tlb true"
6972
worker_args: "--shared_memory_size_mb 65536 --remote_h2d_device_ids 0 --enable_huge_tlb true"
73+
74+
# For RayStore:
75+
RayStore:

transfer_queue/config.yaml

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
# Prometheus metrics exporter.
55
metrics:
66
enabled: false
7-
port: 0 # HTTP port for /metrics endpoint (0 = auto-assign free port)
7+
# HTTP port for /metrics endpoint (0 = auto-assign free port)
8+
port: 0
89

910
controller:
1011
# User-defined sampler. User can pass sampler instance to overwrite this string config.
@@ -20,37 +21,39 @@ backend:
2021
# SimpleStorage, Yuanrong, MooncakeStore, ...
2122
storage_backend: SimpleStorage
2223

23-
# For SimpleStorage:
24+
# SimpleStorage: ZMQ-based in-memory storage for out-of-the-box usage
2425
SimpleStorage:
25-
# Total number of samples
26+
# Maximum number of experience samples to hold across all storage units
2627
total_storage_size: 100000
27-
# Number of distributed storage units for SimpleStorage backend
28+
# Number of distributed storage units.
29+
# Recommended: >= 2 x number of nodes for load balancing.
2830
num_data_storage_units: 2
2931
# ZMQ Server IP & Ports (automatically generated during init)
3032
zmq_info: null
3133

32-
# For MooncakeStore:
34+
# MooncakeStore: high-performance KV-based hierarchical storage
35+
# that supports RDMA transport between GPU and DRAM.
3336
MooncakeStore:
34-
# Whether to let TQ automatically init metadata_server.
37+
# Whether TransferQueue should automatically start the Mooncake metadata server.
38+
# WARNING: When set to `true`, TQ will attempt to terminate any existing mooncake_master process.
3539
auto_init: true
36-
# Address of the HTTP metadata server
40+
# Address of the metadata coordination server.
3741
metadata_server: localhost:50050
38-
# Address of master server
42+
# Address of the Mooncake master server.
3943
master_server_address: localhost:50051
40-
# Address of local host. Set to "" to use Ray IP as local host address
44+
# Local host address visible to the Mooncake cluster.
45+
# Set to "" to auto-detect using Ray's node IP.
4146
local_hostname: ""
42-
# Protocol for transmission. Choose from: tcp, rdma. (default: tcp)
47+
# Transport protocol. Choose from: tcp, rdma.
4348
protocol: tcp
44-
# Memory segment size in bytes for mounting (default: 4GB)
49+
# Global memory segment size in bytes **per client** for mounting (default: 4GB)
4550
global_segment_size: 4294967296
46-
# Local buffer size in bytes (default: 1GB)
51+
# Local buffer size in bytes **per client** (default: 1GB)
4752
local_buffer_size: 1073741824
48-
# Network device name. Set to "" to let Mooncake to auto-picks devices
53+
# Network device name.
54+
# Set to "" to let Mooncake auto-select available devices.
4955
device_name: ""
5056

51-
# For RayStore:
52-
RayStore:
53-
5457
# For Yuanrong:
5558
Yuanrong:
5659
# Whether to let TQ automatically init yuanrong
@@ -59,9 +62,9 @@ backend:
5962
worker_port: 31501
6063
# Metastore service port
6164
metastore_port: 2379
62-
# If enable npu transport
65+
# Whether to enable npu transport
6366
enable_yr_npu_transport: false
64-
# If enable host RDMA (H2H) transport via UCX. Requires RDMA NIC hardware and rdma-core driver.
67+
# Whether to enable host RDMA (H2H) transport via UCX. Requires RDMA NIC hardware and rdma-core driver.
6568
# See https://pages.openeuler.openatom.cn/openyuanrong-datasystem/docs/zh-cn/latest/best_practices/best_practices_for_rdma.html
6669
enable_rdma: false
6770
# UCX env vars passed to dscli subprocess. Precedence: ucx_env_vars > parent env > TQ default (UCX_TLS=rc_x when enable_rdma=true).
@@ -81,3 +84,6 @@ backend:
8184
# ulimit -l unlimited (allow pinning enough memory for RDMA/Ascend)
8285
# Example: "--shared_memory_size_mb 16384 --remote_h2d_device_ids 0,1,2,3 --enable_huge_tlb true"
8386
worker_args: "--shared_memory_size_mb 8192"
87+
88+
# For RayStore:
89+
RayStore:

0 commit comments

Comments
 (0)