Skip to content

Commit dcba0c0

Browse files
authored
Merge pull request #666 from therault/evicitons-in-gpu-device-statistics
Add the number of copies evicted in the statistics of the devices.
2 parents a5f49ab + 4602cc2 commit dcba0c0

6 files changed

Lines changed: 25 additions & 12 deletions

File tree

parsec/mca/device/cuda/device_cuda_module.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,7 @@ parsec_cuda_module_init( int dev_id, parsec_device_module_t** module )
528528
device->data_out_to_host = 0;
529529
device->required_data_in = 0;
530530
device->required_data_out = 0;
531+
device->nb_evictions = 0;
531532

532533
device->attach = parsec_device_attach;
533534
device->detach = parsec_device_detach;

parsec/mca/device/device.c

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -408,10 +408,10 @@ void parsec_compute_best_unit( uint64_t length, float* updated_value, char** bes
408408

409409
void parsec_devices_save_statistics(uint64_t **pstats) {
410410
if(NULL == *pstats) {
411-
*pstats = (uint64_t*)calloc(sizeof(uint64_t), parsec_nb_devices * 6 /* see below for the number of arrays */);
411+
*pstats = (uint64_t*)calloc(sizeof(uint64_t), parsec_nb_devices * 7 /* see below for the number of arrays */);
412412
}
413413
else {
414-
memset(*pstats, 0, parsec_nb_devices * sizeof(uint64_t) * 6);
414+
memset(*pstats, 0, parsec_nb_devices * sizeof(uint64_t) * 7);
415415
}
416416
uint64_t *stats = *pstats;
417417
uint64_t *executed_tasks = stats;
@@ -420,12 +420,14 @@ void parsec_devices_save_statistics(uint64_t **pstats) {
420420
uint64_t *req_in = stats + 3*parsec_nb_devices;
421421
uint64_t *req_out = stats + 4*parsec_nb_devices;
422422
uint64_t *transfer_d2d = stats + 5*parsec_nb_devices;
423+
uint64_t *nb_evictions = stats + 6*parsec_nb_devices;
423424

424425
for(uint32_t i = 0; i < parsec_nb_devices; i++) {
425426
parsec_device_module_t *device = parsec_devices[i];
426427
if(NULL == device) continue;
427428
assert( i == device->device_index );
428429
executed_tasks[i] = device->executed_tasks;
430+
nb_evictions[i] = device->nb_evictions;
429431
transfer_in[i] = device->data_in_from_device[0]; /* cpu-core device */
430432
transfer_out[i] = device->data_out_to_host;
431433
req_in[i] = device->required_data_in;
@@ -446,6 +448,7 @@ void parsec_devices_print_statistics(parsec_context_t *parsec_context, uint64_t
446448
uint64_t *end_stats = NULL;
447449
uint64_t total_tasks = 0, total_data_in = 0, total_data_out = 0;
448450
uint64_t total_required_in = 0, total_required_out = 0, total_d2d = 0;
451+
uint64_t total_evicted = 0;
449452
float gtotal = 0.0;
450453
float best_data_in, best_data_out, best_d2d;
451454
float best_required_in, best_required_out;
@@ -457,7 +460,7 @@ void parsec_devices_print_statistics(parsec_context_t *parsec_context, uint64_t
457460
/* initialize the arrays */
458461
parsec_devices_save_statistics(&end_stats);
459462
if(NULL != start_stats) {
460-
for(i = 0; i < parsec_nb_devices * 6; i++) {
463+
for(i = 0; i < parsec_nb_devices * 7; i++) {
461464
assert(end_stats[i] >= start_stats[i]);
462465
end_stats[i] -= start_stats[i];
463466
}
@@ -468,6 +471,7 @@ void parsec_devices_print_statistics(parsec_context_t *parsec_context, uint64_t
468471
uint64_t *required_in = end_stats + 3*parsec_nb_devices;
469472
uint64_t *required_out = end_stats + 4*parsec_nb_devices;
470473
uint64_t *transferred_d2d = end_stats + 5*parsec_nb_devices;
474+
uint64_t *nb_evictions = end_stats + 6*parsec_nb_devices;
471475

472476
/* Compute total statistics */
473477
for(i = 0; i < parsec_nb_devices; i++) {
@@ -479,17 +483,18 @@ void parsec_devices_print_statistics(parsec_context_t *parsec_context, uint64_t
479483
total_required_in += required_in[i];
480484
total_required_out += required_out[i];
481485
total_d2d += transferred_d2d[i];
486+
total_evicted += nb_evictions[i];
482487
}
483488

484489
/* Print statistics */
485490
gtotal = (float)total_tasks;
486491
double percent_in, percent_out, percent_d2d;
487492

488-
printf("+----------------------------------------------------------------------------------------------------------------------------+\n");
489-
printf("| | | Data In | Data Out |\n");
490-
printf("|Rank %3d | # KERNEL | %% | Required | Transfered H2D(%%) | Transfered D2D(%%) | Required | Transfered(%%) |\n",
493+
printf("+-----------------------------------------------------------------------------------------------------------------------------------------------+\n");
494+
printf("| | | Data In | Data Out | |\n");
495+
printf("|Rank %3d | # KERNEL | %% | Required | Transfered H2D(%%) | Transfered D2D(%%) | Required | Transfered(%%) | Evictions |\n",
491496
(NULL == parsec_context ? parsec_debug_rank : parsec_context->my_rank));
492-
printf("|---------|-----------|--------|------------|-----------------------|-----------------------|------------|-------------------|\n");
497+
printf("|---------|-----------|--------|------------|-----------------------|-----------------------|------------|-------------------|------------------|\n");
493498
for( i = 0; i < parsec_nb_devices; i++ ) {
494499
if( NULL == (device = parsec_devices[i]) ) continue;
495500

@@ -503,15 +508,16 @@ void parsec_devices_print_statistics(parsec_context_t *parsec_context, uint64_t
503508
percent_d2d = (0 == required_in[i])? nan(""): (((double)transferred_d2d[i]) / (double)required_in[i] ) * 100.0;
504509
percent_out = (0 == required_out[i])? nan(""): (((double)transferred_out[i]) / (double)required_out[i] ) * 100.0;
505510

506-
printf("| Dev %2d |%10"PRIu64" | %6.2f | %8.2f%2s | %8.2f%2s(%5.2f) | %8.2f%2s(%5.2f) | %8.2f%2s | %8.2f%2s(%5.2f) | %s\n",
511+
printf("| Dev %2d |%10"PRIu64" | %6.2f | %8.2f%2s | %8.2f%2s(%5.2f) | %8.2f%2s(%5.2f) | %8.2f%2s | %8.2f%2s(%5.2f) | %10"PRIu64" | %s\n",
507512
device->device_index, executed_tasks[i], (executed_tasks[i]/gtotal)*100.00,
508513
best_required_in, required_in_unit, best_data_in, data_in_unit, percent_in,
509514
best_d2d, d2d_unit, percent_d2d,
510515
best_required_out, required_out_unit, best_data_out, data_out_unit, percent_out,
516+
nb_evictions[i],
511517
device->name );
512518
}
513519

514-
printf("|---------|-----------|--------|------------|-----------------------|-----------------------|------------|-------------------|\n");
520+
printf("|---------|-----------|--------|------------|-----------------------|-----------------------|------------|-------------------|------------------|\n");
515521

516522
parsec_compute_best_unit( total_required_in, &best_required_in, &required_in_unit );
517523
parsec_compute_best_unit( total_required_out, &best_required_out, &required_out_unit );
@@ -523,12 +529,13 @@ void parsec_devices_print_statistics(parsec_context_t *parsec_context, uint64_t
523529
percent_d2d = (0 == total_required_in)? nan(""): (((double)total_d2d) / (double)total_required_in) * 100.0;
524530
percent_out = (0 == total_required_out)? nan(""): (((double)total_data_out) / (double)total_required_out) * 100.0;
525531

526-
printf("|All Devs |%10"PRIu64" | %6.2f | %8.2f%2s | %8.2f%2s(%5.2f) | %8.2f%2s(%5.2f) | %8.2f%2s | %8.2f%2s(%5.2f) |\n",
532+
printf("|All Devs |%10"PRIu64" | %6.2f | %8.2f%2s | %8.2f%2s(%5.2f) | %8.2f%2s(%5.2f) | %8.2f%2s | %8.2f%2s(%5.2f) | %10"PRIu64" |\n",
527533
total_tasks, (total_tasks/gtotal)*100.00,
528534
best_required_in, required_in_unit, best_data_in, data_in_unit, percent_in,
529535
best_d2d, d2d_unit, percent_d2d,
530-
best_required_out, required_out_unit, best_data_out, data_out_unit, percent_out);
531-
printf("+----------------------------------------------------------------------------------------------------------------------------+\n");
536+
best_required_out, required_out_unit, best_data_out, data_out_unit, percent_out,
537+
total_evicted);
538+
printf("+-----------------------------------------------------------------------------------------------------------------------------------------------+\n");
532539

533540
parsec_devices_free_statistics(&end_stats);
534541
}
@@ -545,6 +552,7 @@ void parsec_mca_device_reset_statistics(parsec_context_t *parsec_context) {
545552
device->data_out_to_host = 0;
546553
device->required_data_in = 0;
547554
device->required_data_out = 0;
555+
device->nb_evictions = 0;
548556
}
549557
}
550558

parsec/mca/device/device.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ struct parsec_device_module_s {
165165
uint64_t required_data_out;
166166
uint64_t executed_tasks;
167167
uint64_t nb_data_faults;
168+
uint64_t nb_evictions;
168169
/* We provide the compute capacity of the device in GFlop/s so that conversion to #nanosec in load estimates is straightforward */
169170
/* These compute capacities can be useful for users when providing their own
170171
* time_estimate functions: the user can divide the number of flops for the

parsec/mca/device/device_gpu.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,6 +1064,7 @@ parsec_device_data_reserve_space( parsec_device_gpu_module_t* gpu_device,
10641064
gpu_device->super.device_index, gpu_device->super.name, task_name, this_task->task_class->name, i, lru_gpu_elem);
10651065
oldmaster = NULL;
10661066
}
1067+
gpu_device->super.nb_evictions++;
10671068
#if !defined(PARSEC_GPU_ALLOC_PER_TILE)
10681069
/* Let's free this space, and try again to malloc some space */
10691070
PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream,

parsec/mca/device/level_zero/device_level_zero_module.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,7 @@ int parsec_level_zero_module_init( int dev_id, parsec_device_level_zero_driver_t
386386
device->data_out_to_host = 0;
387387
device->required_data_in = 0;
388388
device->required_data_out = 0;
389+
device->nb_evictions = 0;
389390

390391
device->attach = parsec_device_attach;
391392
device->detach = parsec_device_detach;

parsec/mca/device/template/device_template_module.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ parsec_device_template_module_init( int deviceid, parsec_device_module_t** modul
160160
device->super.transferred_data_out = 0;
161161
device->super.required_data_in = 0;
162162
device->super.required_data_out = 0;
163+
device->super.nb_evictions = 0;
163164

164165
device->super.attach = (parsec_device_attach_f)parsec_device_template_attach;
165166
device->super.detach = (parsec_device_detach_f)parsec_device_template_detach;

0 commit comments

Comments
 (0)