|
1 | 1 | /** |
2 | | -* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2015. ALL RIGHTS RESERVED. |
| 2 | +* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2026. ALL RIGHTS RESERVED. |
3 | 3 | * Copyright (C) Shanghai Zhaoxin Semiconductor Co., Ltd. 2020. ALL RIGHTS RESERVED. |
4 | 4 | * Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED. |
5 | 5 | * Copyright (C) Advanced Micro Devices, Inc. 2024. ALL RIGHTS RESERVED. |
|
13 | 13 |
|
14 | 14 | #include "ucx_info.h" |
15 | 15 |
|
16 | | -#include <ucs/sys/string.h> |
| 16 | +#include <ucs/debug/table.h> |
17 | 17 | #include <ucs/sys/sys.h> |
18 | 18 | #include <ucs/sys/math.h> |
19 | 19 | #include <ucs/time/time.h> |
@@ -61,131 +61,143 @@ static double measure_memcpy_bandwidth(size_t size) |
61 | 61 | return result; |
62 | 62 | } |
63 | 63 |
|
64 | | -static void print_repeat_char(int ch, int count) |
| 64 | +/* Add an empty row used as vertical padding around content rows */ |
| 65 | +static void print_sys_topo_add_padding(ucs_table_t *table) |
65 | 66 | { |
66 | | - int i; |
| 67 | + ucs_table_row_t *row = ucs_table_add_row(table); |
| 68 | + unsigned i; |
67 | 69 |
|
68 | | - for (i = 0; i < count; ++i) { |
69 | | - putchar(ch); |
| 70 | + for (i = 0; i < table->config.n_cols; ++i) { |
| 71 | + ucs_table_row_add_cell_empty(row, 1); |
70 | 72 | } |
71 | 73 | } |
72 | 74 |
|
73 | | -static void print_row_separator(int column_width, int first_column_width, |
74 | | - int num_columns, int fill_char, |
75 | | - int separator_char) |
| 75 | +/* Add a header row of the shape "<label> | dev0 | dev1 | ...", surrounded |
| 76 | + * by empty padding rows and followed by a separator. */ |
| 77 | +static void print_sys_topo_add_devices_header(ucs_table_t *table, |
| 78 | + const char *first_col_label, |
| 79 | + unsigned num_devices) |
76 | 80 | { |
77 | | - int i; |
| 81 | + ucs_table_row_t *row; |
| 82 | + ucs_sys_device_t sys_dev; |
78 | 83 |
|
79 | | - printf("# %c", separator_char); |
80 | | - print_repeat_char(fill_char, first_column_width); |
81 | | - for (i = 0; i < num_columns; ++i) { |
82 | | - putchar(separator_char); |
83 | | - print_repeat_char(fill_char, column_width); |
84 | | - } |
85 | | - printf("%c\n", separator_char); |
86 | | -} |
| 84 | + print_sys_topo_add_padding(table); |
87 | 85 |
|
88 | | -static void print_table_header(const char *title, const char *distance_unit, |
89 | | - int column_width, int first_column_width, |
90 | | - int num_columns) |
91 | | -{ |
92 | | - int column; |
93 | | - |
94 | | - printf("#\n"); |
95 | | - printf("# %s\n", title); |
96 | | - printf("#\n"); |
97 | | - print_row_separator(column_width, first_column_width, num_columns, '-', |
98 | | - '+'); |
99 | | - print_row_separator(column_width, first_column_width, num_columns, ' ', |
100 | | - '|'); |
101 | | - printf("# |%*s ", first_column_width - 1, distance_unit); |
102 | | - for (column = 0; column < num_columns; ++column) { |
103 | | - printf("|%*s ", column_width - 1, |
104 | | - ucs_topo_sys_device_get_name((ucs_sys_device_t)column)); |
| 86 | + row = ucs_table_add_row(table); |
| 87 | + ucs_table_row_add_cell_fmt(row, 1, UCS_TABLE_ALIGN_RIGHT, "%s", |
| 88 | + first_col_label); |
| 89 | + for (sys_dev = 0; sys_dev < num_devices; ++sys_dev) { |
| 90 | + ucs_table_row_add_cell_fmt(row, 1, UCS_TABLE_ALIGN_RIGHT, "%s", |
| 91 | + ucs_topo_sys_device_get_name(sys_dev)); |
105 | 92 | } |
106 | 93 |
|
107 | | - printf("|\n"); |
108 | | - print_row_separator(column_width, first_column_width, num_columns, ' ', |
109 | | - '|'); |
110 | | - print_row_separator(column_width, first_column_width, num_columns, '-', |
111 | | - '+'); |
| 94 | + print_sys_topo_add_padding(table); |
| 95 | + ucs_table_add_separator(table); |
112 | 96 | } |
113 | 97 |
|
114 | | -static void print_sys_topo() |
| 98 | +static void print_sys_topo_distances(unsigned num_devices) |
115 | 99 | { |
116 | | - const unsigned num_devices = ucs_topo_num_devices(); |
117 | | - static const int distance_width = 10; |
118 | | - static const char *system_topo_title = "System topology"; |
119 | | - static const char *distance_unit = "MB/s"; |
120 | | - static const char *memory_latency_title = "NUMA memory latency"; |
121 | | - static const char *numa_distance_unit = "nsec"; |
| 100 | + ucs_table_config_t cfg = { |
| 101 | + .n_cols = 1 + num_devices, |
| 102 | + .row_prefix = "# ", |
| 103 | + .equal_widths = 1, |
| 104 | + }; |
122 | 105 | ucs_sys_device_t sys_dev1, sys_dev2; |
123 | 106 | ucs_sys_dev_distance_t distance; |
124 | | - char distance_str[20]; |
125 | 107 | ucs_status_t status; |
126 | | - int name_width; |
| 108 | + ucs_table_row_t *row; |
| 109 | + ucs_table_t table; |
127 | 110 |
|
128 | | - /* Get maximal width of device name */ |
129 | | - name_width = 2 + strlen(distance_unit); |
130 | | - for (sys_dev1 = 0; sys_dev1 < num_devices; ++sys_dev1) { |
131 | | - name_width = ucs_max( |
132 | | - name_width, 2 + strlen(ucs_topo_sys_device_get_name(sys_dev1))); |
133 | | - } |
| 111 | + printf("#\n# System topology\n#\n"); |
| 112 | + |
| 113 | + ucs_table_init(&table, &cfg); |
134 | 114 |
|
135 | | - print_table_header(system_topo_title, distance_unit, distance_width, |
136 | | - name_width, num_devices); |
| 115 | + print_sys_topo_add_devices_header(&table, "MB/s", num_devices); |
137 | 116 |
|
138 | | - /* Print table content */ |
139 | 117 | for (sys_dev1 = 0; sys_dev1 < num_devices; ++sys_dev1) { |
140 | | - print_row_separator(distance_width, name_width, num_devices, ' ', '|'); |
| 118 | + if (sys_dev1 > 0) { |
| 119 | + ucs_table_add_separator(&table); |
| 120 | + } |
| 121 | + |
| 122 | + print_sys_topo_add_padding(&table); |
| 123 | + |
| 124 | + row = ucs_table_add_row(&table); |
| 125 | + ucs_table_row_add_cell_fmt(row, 1, UCS_TABLE_ALIGN_RIGHT, "%s", |
| 126 | + ucs_topo_sys_device_get_name(sys_dev1)); |
141 | 127 |
|
142 | | - printf("# |%*s ", name_width - 1, |
143 | | - ucs_topo_sys_device_get_name(sys_dev1)); |
144 | 128 | for (sys_dev2 = 0; sys_dev2 < num_devices; ++sys_dev2) { |
145 | 129 | if (sys_dev1 == sys_dev2) { |
146 | 130 | /* Do not print distance of device to itself */ |
147 | | - strncpy(distance_str, "-", sizeof(distance_str)); |
| 131 | + ucs_table_row_add_cell_fmt(row, 1, UCS_TABLE_ALIGN_RIGHT, "%s", |
| 132 | + "-"); |
| 133 | + continue; |
| 134 | + } |
| 135 | + |
| 136 | + status = ucs_topo_get_distance(sys_dev1, sys_dev2, &distance); |
| 137 | + if (status != UCS_OK) { |
| 138 | + ucs_table_row_add_cell_fmt(row, 1, UCS_TABLE_ALIGN_RIGHT, |
| 139 | + "<%s>", ucs_status_string(status)); |
| 140 | + } else if (distance.bandwidth > UCS_PBYTE) { |
| 141 | + ucs_table_row_add_cell_fmt(row, 1, UCS_TABLE_ALIGN_RIGHT, "%s", |
| 142 | + "inf"); |
148 | 143 | } else { |
149 | | - status = ucs_topo_get_distance(sys_dev1, sys_dev2, &distance); |
150 | | - if (status != UCS_OK) { |
151 | | - ucs_snprintf_safe(distance_str, sizeof(distance_str), |
152 | | - "<%s>", ucs_status_string(status)); |
153 | | - } else if (distance.bandwidth > UCS_PBYTE) { |
154 | | - ucs_snprintf_safe(distance_str, sizeof(distance_str), |
155 | | - "inf"); |
156 | | - } else { |
157 | | - ucs_snprintf_safe(distance_str, sizeof(distance_str), |
158 | | - "%.1f", distance.bandwidth / UCS_MBYTE); |
159 | | - } |
| 144 | + ucs_table_row_add_cell_fmt(row, 1, UCS_TABLE_ALIGN_RIGHT, |
| 145 | + "%.1f", |
| 146 | + distance.bandwidth / UCS_MBYTE); |
160 | 147 | } |
161 | | - printf("|%*s ", distance_width - 1, distance_str); |
162 | 148 | } |
163 | | - printf("|\n"); |
164 | 149 |
|
165 | | - print_row_separator(distance_width, name_width, num_devices, ' ', '|'); |
166 | | - print_row_separator(distance_width, name_width, num_devices, '-', '+'); |
| 150 | + print_sys_topo_add_padding(&table); |
167 | 151 | } |
168 | 152 |
|
169 | | - print_table_header(memory_latency_title, "device", distance_width, |
170 | | - name_width, num_devices); |
171 | | - print_row_separator(distance_width, name_width, num_devices, ' ', '|'); |
| 153 | + ucs_table_print(&table); |
| 154 | + ucs_table_cleanup(&table); |
| 155 | +} |
172 | 156 |
|
173 | | - printf("# |%*s ", name_width - 1, numa_distance_unit); |
174 | | - printf("|"); |
175 | | - for (sys_dev1 = 0; sys_dev1 < num_devices; ++sys_dev1) { |
176 | | - ucs_topo_get_memory_distance(sys_dev1, &distance); |
177 | | - ucs_snprintf_safe(distance_str, sizeof(distance_str), "%.1f", |
178 | | - distance.latency * UCS_NSEC_PER_SEC); |
179 | | - printf("%*s |", distance_width - 1, distance_str); |
| 157 | +static void print_sys_topo_memory_latency(unsigned num_devices) |
| 158 | +{ |
| 159 | + ucs_table_config_t cfg = { |
| 160 | + .n_cols = 1 + num_devices, |
| 161 | + .row_prefix = "# ", |
| 162 | + .equal_widths = 1, |
| 163 | + }; |
| 164 | + ucs_sys_dev_distance_t distance; |
| 165 | + ucs_sys_device_t sys_dev; |
| 166 | + ucs_table_row_t *row; |
| 167 | + ucs_table_t table; |
| 168 | + |
| 169 | + printf("#\n# NUMA memory latency\n#\n"); |
| 170 | + |
| 171 | + ucs_table_init(&table, &cfg); |
| 172 | + |
| 173 | + print_sys_topo_add_devices_header(&table, "device", num_devices); |
| 174 | + |
| 175 | + print_sys_topo_add_padding(&table); |
| 176 | + |
| 177 | + row = ucs_table_add_row(&table); |
| 178 | + ucs_table_row_add_cell_fmt(row, 1, UCS_TABLE_ALIGN_RIGHT, "%s", "nsec"); |
| 179 | + for (sys_dev = 0; sys_dev < num_devices; ++sys_dev) { |
| 180 | + ucs_topo_get_memory_distance(sys_dev, &distance); |
| 181 | + ucs_table_row_add_cell_fmt(row, 1, UCS_TABLE_ALIGN_RIGHT, "%.1f", |
| 182 | + distance.latency * UCS_NSEC_PER_SEC); |
180 | 183 | } |
181 | 184 |
|
182 | | - printf("\n"); |
183 | | - print_row_separator(distance_width, name_width, num_devices, ' ', '|'); |
184 | | - print_row_separator(distance_width, name_width, num_devices, '-', '+'); |
| 185 | + print_sys_topo_add_padding(&table); |
| 186 | + |
| 187 | + ucs_table_print(&table); |
| 188 | + ucs_table_cleanup(&table); |
185 | 189 |
|
186 | 190 | printf("# Memory latency is calculated according to the CPU affinity\n"); |
187 | 191 | } |
188 | 192 |
|
| 193 | +static void print_sys_topo() |
| 194 | +{ |
| 195 | + const unsigned num_devices = ucs_topo_num_devices(); |
| 196 | + |
| 197 | + print_sys_topo_distances(num_devices); |
| 198 | + print_sys_topo_memory_latency(num_devices); |
| 199 | +} |
| 200 | + |
189 | 201 | static double measure_timer_accuracy() |
190 | 202 | { |
191 | 203 | double elapsed, elapsed_accurate; |
|
0 commit comments