1111#include <dirent.h>
1212#include <errno.h>
1313#include <fcntl.h>
14+ #include <inttypes.h>
1415#include <pthread.h>
1516#include <sched.h>
1617#include <stdint.h>
@@ -101,7 +102,9 @@ static void enable_device(const char *bdf) {
101102 snprintf (path , sizeof (path ), "/sys/bus/pci/devices/%s/enable" , bdf );
102103 int fd = open (path , O_WRONLY );
103104 if (fd >= 0 ) {
104- write (fd , "1" , 1 );
105+ if (write (fd , "1" , 1 ) != 1 ) {
106+ fprintf (stderr , " Cannot enable %s: %s\n" , bdf , strerror (errno ));
107+ }
105108 close (fd );
106109 }
107110}
@@ -118,9 +121,11 @@ static int discover_devices(void) {
118121 continue ;
119122 if (read_pci_id (ent -> d_name , "device" ) != CXL_TYPE2_DEVICE )
120123 continue ;
124+ if (strlen (ent -> d_name ) >= sizeof (g_devs [g_num_devices ].bdf ))
125+ continue ;
121126
122127 cxl_dev_t * d = & g_devs [g_num_devices ];
123- strncpy (d -> bdf , ent -> d_name , sizeof ( d -> bdf ) - 1 );
128+ strcpy (d -> bdf , ent -> d_name );
124129 enable_device (d -> bdf );
125130
126131 d -> bar2_size = bar_range (d -> bdf , 2 );
@@ -182,6 +187,49 @@ static double time_ns(void) {
182187 return ts .tv_sec * 1e9 + ts .tv_nsec ;
183188}
184189
190+ static inline uint64_t read_reg64 (cxl_dev_t * d , size_t off ) { return * (volatile uint64_t * )(d -> bar2 + off ); }
191+
192+ static inline void write_reg64 (cxl_dev_t * d , size_t off , uint64_t val ) { * (volatile uint64_t * )(d -> bar2 + off ) = val ; }
193+
194+ static int issue_command (cxl_dev_t * d , uint32_t cmd ) {
195+ volatile uint32_t * cmd_reg = (volatile uint32_t * )(d -> bar2 + CXL_GPU_REG_CMD );
196+ volatile uint32_t * status_reg = (volatile uint32_t * )(d -> bar2 + CXL_GPU_REG_CMD_STATUS );
197+ volatile uint32_t * result_reg = (volatile uint32_t * )(d -> bar2 + CXL_GPU_REG_CMD_RESULT );
198+
199+ * cmd_reg = cmd ;
200+ __sync_synchronize ();
201+
202+ for (int timeout = 100000 ; timeout > 0 ; timeout -- ) {
203+ uint32_t st = * status_reg ;
204+
205+ if (st == CXL_GPU_CMD_STATUS_COMPLETE ) {
206+ return (int )* result_reg ;
207+ }
208+ if (st == CXL_GPU_CMD_STATUS_ERROR ) {
209+ return - (int )* result_reg ;
210+ }
211+ }
212+
213+ return - ETIMEDOUT ;
214+ }
215+
216+ static void measure_command_latency (cxl_dev_t * d , uint32_t cmd , const char * name , int iters ) {
217+ double t0 ;
218+ double t1 ;
219+ int rc = 0 ;
220+
221+ t0 = time_ns ();
222+ for (int i = 0 ; i < iters ; i ++ ) {
223+ rc = issue_command (d , cmd );
224+ if (rc != CXL_GPU_SUCCESS ) {
225+ printf (" %-20s failed: rc=%d\n" , name , rc );
226+ return ;
227+ }
228+ }
229+ t1 = time_ns ();
230+ printf (" %-20s %7.1f ns/op (%d ops)\n" , name , (t1 - t0 ) / iters , iters );
231+ }
232+
185233/* Benchmark: Register Latency */
186234
187235static void bench_register_latency (cxl_dev_t * d ) {
@@ -453,6 +501,114 @@ static void bench_access_patterns(cxl_dev_t *d) {
453501 }
454502}
455503
504+ /* Benchmark: DCD / GFAM / MH-SLD Fabric Memory Controls */
505+
506+ static void bench_fabric_memory (cxl_dev_t * d , uint32_t caps ) {
507+ uint64_t dcd_total = 0 ;
508+ uint64_t dcd_alloc = 0 ;
509+ uint64_t dcd_free = 0 ;
510+ uint64_t dcd_extents = 0 ;
511+ uint64_t gfam_hosts = 0 ;
512+ uint64_t gfam_mappings = 0 ;
513+ uint64_t gfam_allowed = 0 ;
514+ uint64_t gfam_denied = 0 ;
515+ uint64_t mhsld_heads = 0 ;
516+ uint64_t mhsld_head_id = 0 ;
517+ int rc ;
518+
519+ if (!(caps & (CXL_GPU_CAP_DCD | CXL_GPU_CAP_GFAM | CXL_GPU_CAP_MHSLD ))) {
520+ printf ("\n--- Fabric Memory Controls: skipped (no DCD/GFAM/MH-SLD caps) ---\n" );
521+ return ;
522+ }
523+
524+ printf ("\n--- Fabric Memory Controls (device %s) ---\n" , d -> bdf );
525+
526+ if (caps & CXL_GPU_CAP_DCD ) {
527+ rc = issue_command (d , CXL_GPU_CMD_DCD_GET_INFO );
528+ if (rc == CXL_GPU_SUCCESS ) {
529+ dcd_total = read_reg64 (d , CXL_GPU_REG_RESULT0 );
530+ dcd_alloc = read_reg64 (d , CXL_GPU_REG_RESULT1 );
531+ dcd_free = read_reg64 (d , CXL_GPU_REG_RESULT2 );
532+ dcd_extents = read_reg64 (d , CXL_GPU_REG_RESULT3 );
533+ printf (" DCD command info: total=%" PRIu64 " alloc=%" PRIu64 " free=%" PRIu64 " extents=%" PRIu64 "\n" ,
534+ dcd_total , dcd_alloc , dcd_free , dcd_extents );
535+ printf (" DCD status regs: total=%" PRIu64 " alloc=%" PRIu64 " free=%" PRIu64 " extents=%" PRIu64 "\n" ,
536+ read_reg64 (d , CXL_GPU_REG_DCD_TOTAL ), read_reg64 (d , CXL_GPU_REG_DCD_ALLOCATED ),
537+ read_reg64 (d , CXL_GPU_REG_DCD_FREE ), read_reg64 (d , CXL_GPU_REG_DCD_EXTENTS ));
538+ measure_command_latency (d , CXL_GPU_CMD_DCD_GET_INFO , "DCD_GET_INFO" , 5000 );
539+ } else {
540+ printf (" DCD_GET_INFO failed: rc=%d\n" , rc );
541+ }
542+
543+ if (dcd_free >= 1024 * 1024 ) {
544+ write_reg64 (d , CXL_GPU_REG_PARAM0 , UINT64_MAX );
545+ write_reg64 (d , CXL_GPU_REG_PARAM1 , 1024 * 1024 );
546+ write_reg64 (d , CXL_GPU_REG_PARAM2 , 0 );
547+ rc = issue_command (d , CXL_GPU_CMD_DCD_ADD );
548+ if (rc == CXL_GPU_SUCCESS ) {
549+ uint64_t base = read_reg64 (d , CXL_GPU_REG_RESULT0 );
550+ uint64_t size = read_reg64 (d , CXL_GPU_REG_RESULT1 );
551+ uint64_t tag = read_reg64 (d , CXL_GPU_REG_RESULT2 );
552+
553+ printf (" DCD add/release: base=0x%" PRIx64 " size=%" PRIu64 " tag=%" PRIu64 "\n" , base , size , tag );
554+ write_reg64 (d , CXL_GPU_REG_PARAM0 , base );
555+ write_reg64 (d , CXL_GPU_REG_PARAM1 , size );
556+ write_reg64 (d , CXL_GPU_REG_PARAM2 , tag );
557+ rc = issue_command (d , CXL_GPU_CMD_DCD_RELEASE );
558+ if (rc != CXL_GPU_SUCCESS ) {
559+ printf (" DCD_RELEASE failed after add: rc=%d\n" , rc );
560+ }
561+ } else {
562+ printf (" DCD_ADD skipped/failed: rc=%d\n" , rc );
563+ }
564+ } else {
565+ printf (" DCD add/release: skipped (no free DCD capacity)\n" );
566+ }
567+ }
568+
569+ if (caps & CXL_GPU_CAP_GFAM ) {
570+ rc = issue_command (d , CXL_GPU_CMD_GFAM_GET_INFO );
571+ if (rc == CXL_GPU_SUCCESS ) {
572+ gfam_hosts = read_reg64 (d , CXL_GPU_REG_RESULT0 );
573+ gfam_mappings = read_reg64 (d , CXL_GPU_REG_RESULT1 );
574+ gfam_allowed = read_reg64 (d , CXL_GPU_REG_RESULT2 );
575+ gfam_denied = read_reg64 (d , CXL_GPU_REG_RESULT3 );
576+ printf (" GFAM command info: hosts=%" PRIu64 " mappings=%" PRIu64 " allowed=%" PRIu64 " denied=%" PRIu64
577+ "\n" ,
578+ gfam_hosts , gfam_mappings , gfam_allowed , gfam_denied );
579+ printf (" GFAM status regs: hosts=%" PRIu64 " mappings=%" PRIu64 " denied=%" PRIu64 "\n" ,
580+ read_reg64 (d , CXL_GPU_REG_GFAM_HOSTS ), read_reg64 (d , CXL_GPU_REG_GFAM_MAPPINGS ),
581+ read_reg64 (d , CXL_GPU_REG_GFAM_DENIED ));
582+ measure_command_latency (d , CXL_GPU_CMD_GFAM_GET_INFO , "GFAM_GET_INFO" , 5000 );
583+ } else {
584+ printf (" GFAM_GET_INFO failed: rc=%d\n" , rc );
585+ }
586+ }
587+
588+ if (caps & CXL_GPU_CAP_MHSLD ) {
589+ rc = issue_command (d , CXL_GPU_CMD_MHSLD_GET_INFO );
590+ if (rc == CXL_GPU_SUCCESS ) {
591+ mhsld_heads = read_reg64 (d , CXL_GPU_REG_RESULT0 );
592+ mhsld_head_id = read_reg64 (d , CXL_GPU_REG_RESULT1 );
593+ printf (" MH-SLD command info: heads=%" PRIu64 " local=%" PRIu64 " reads=%" PRIu64 " writes=%" PRIu64 "\n" ,
594+ mhsld_heads , mhsld_head_id , read_reg64 (d , CXL_GPU_REG_RESULT2 ), read_reg64 (d , CXL_GPU_REG_RESULT3 ));
595+ printf (" MH-SLD status regs: heads=%" PRIu64 " local=%" PRIu64 " conflicts=%" PRIu64
596+ " invalidations=%" PRIu64 "\n" ,
597+ read_reg64 (d , CXL_GPU_REG_MHSLD_HEADS ), read_reg64 (d , CXL_GPU_REG_MHSLD_HEAD_ID ),
598+ read_reg64 (d , CXL_GPU_REG_MHSLD_CONFLICTS ), read_reg64 (d , CXL_GPU_REG_MHSLD_INV ));
599+ measure_command_latency (d , CXL_GPU_CMD_MHSLD_GET_INFO , "MHSLD_GET_INFO" , 5000 );
600+ } else {
601+ printf (" MHSLD_GET_INFO failed: rc=%d\n" , rc );
602+ }
603+
604+ if (mhsld_heads > 0 ) {
605+ write_reg64 (d , CXL_GPU_REG_PARAM0 , mhsld_head_id );
606+ rc = issue_command (d , CXL_GPU_CMD_MHSLD_SET_HEAD );
607+ printf (" MH-SLD set-head: head=%" PRIu64 " rc=%d\n" , mhsld_head_id , rc );
608+ }
609+ }
610+ }
611+
456612/* Benchmark: Dual-Device Concurrent Access */
457613
458614typedef struct {
@@ -561,6 +717,7 @@ int main(void) {
561717
562718 bench_register_latency (d );
563719 bench_cmd_latency (d );
720+ bench_fabric_memory (d , caps );
564721 bench_data_region_bw (d );
565722 bench_access_patterns (d );
566723 bench_bar4_bulk_bw (d );
0 commit comments