@@ -85,6 +85,7 @@ def handle_tensor(
8585 strides .append (block_stride )
8686 tensor_size = math .prod ([t .shape [i ] for i in size_dims ]) * t .element_size ()
8787 # GPU buffer sizes for GPUDirect RDMA registration in store.
88+ # Total buffer size = number of blocks (shape[0]) × bytes per block stride.
8889 buffer_sizes .append (int (t .shape [0 ]) * block_stride )
8990 token_dim = 1
9091 tensor_block_size = int (t .shape [token_dim ])
@@ -575,8 +576,7 @@ def _create_store(
575576 f"register FAWA { label } GPU KV buffers: "
576577 f"count={ len (gpu_kv_buffer_addrs )} , "
577578 f"bytes={ sum (int (size ) for size in gpu_kv_buffer_sizes )} , "
578- f"addrs={ gpu_kv_buffer_addrs } , "
579- f"sizes={ gpu_kv_buffer_sizes } "
579+ f"first_5={ [(addr , size ) for addr , size in zip (gpu_kv_buffer_addrs [:5 ], gpu_kv_buffer_sizes [:5 ])]} "
580580 )
581581 if cpu_affinity_cores :
582582 config ["cpu_affinity_cores" ] = list (cpu_affinity_cores )
@@ -598,12 +598,13 @@ def _summarize_store_config(config: dict[str, object]) -> dict[str, object]:
598598 summary ["tensor_bytes" ] = sum (tensor_sizes )
599599 gpu_kv_buffer_addrs = summary .pop ("gpu_kv_buffer_addrs" , None )
600600 gpu_kv_buffer_sizes = summary .pop ("gpu_kv_buffer_sizes" , None )
601+ assert (gpu_kv_buffer_addrs is None ) == (gpu_kv_buffer_sizes is None ), (
602+ "GPU KV buffer addresses and sizes must be both None or both non-None"
603+ )
601604 if gpu_kv_buffer_addrs is not None :
602605 summary ["gpu_kv_buffer_count" ] = len (gpu_kv_buffer_addrs )
603606 summary ["gpu_kv_buffer_bytes" ] = (
604607 sum (int (size ) for size in gpu_kv_buffer_sizes )
605- if gpu_kv_buffer_sizes is not None
606- else 0
607608 )
608609 return summary
609610
0 commit comments