Skip to content

Commit 93df596

Browse files
author
David Eberius
committed
Fixed a bug with disabling the mmap interface. Updated and added SPC examples for MPI_T, mmap, and snapshot SPC usage. Added SPCs for tracking time spent matching out of sequence messages and inserting them into the OOS queue. Added documentation for SPCs in the form of a markdown file in the runtime directory.
Signed-off-by: David Eberius <deberius@vols.utk.edu>
1 parent 67834be commit 93df596

8 files changed

Lines changed: 1263 additions & 13 deletions

File tree

examples/Makefile

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,14 +68,15 @@ EXAMPLES = \
6868
oshmem_max_reduction \
6969
oshmem_strided_puts \
7070
oshmem_symmetric_data \
71-
spc_example
71+
spc_mpit_example \
72+
spc_mmap_example
7273

7374

7475
# Default target. Always build the C MPI examples. Only build the
7576
# others if we have the appropriate Open MPI / OpenSHMEM language
7677
# bindings.
7778

78-
all: hello_c ring_c connectivity_c spc_example
79+
all: hello_c ring_c connectivity_c spc_mpit_example spc_mmap_example
7980
@ if which ompi_info >/dev/null 2>&1 ; then \
8081
$(MAKE) mpi; \
8182
fi
@@ -133,7 +134,9 @@ ring_c: ring_c.c
133134
$(MPICC) $(CFLAGS) $(LDFLAGS) $? $(LDLIBS) -o $@
134135
connectivity_c: connectivity_c.c
135136
$(MPICC) $(CFLAGS) $(LDFLAGS) $? $(LDLIBS) -o $@
136-
spc_example: spc_example.c
137+
spc_mpit_example: spc_mpit_example.c
138+
$(MPICC) $(CFLAGS) $(LDFLAGS) $? $(LDLIBS) -o $@
139+
spc_mmap_example: spc_mmap_example.c
137140
$(MPICC) $(CFLAGS) $(LDFLAGS) $? $(LDLIBS) -o $@
138141

139142
hello_cxx: hello_cxx.cc

examples/spc_mmap_example.c

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
/*
2+
* Copyright (c) 2020 The University of Tennessee and The University
3+
* of Tennessee Research Foundation. All rights
4+
* reserved.
5+
*
6+
* Simple example usage of SPCs through an mmap'd file.
7+
*/
8+
9+
#include <stdlib.h>
10+
#include <stdio.h>
11+
#include <string.h>
12+
#include <unistd.h>
13+
#include <sys/mman.h>
14+
#include <sys/types.h>
15+
#include <sys/stat.h>
16+
#include <fcntl.h>
17+
#include <errno.h>
18+
19+
#include <mpi.h>
20+
21+
/* This structure will help us store all of the offsets for each
22+
* counter that we want to print out.
23+
*/
24+
typedef struct spc_s {
25+
char name[128];
26+
int offset;
27+
int rules_offset;
28+
int bins_offset;
29+
} spc_t;
30+
31+
int main(int argc, char **argv)
32+
{
33+
if(argc < 4) {
34+
printf("Usage: ./spc_mmap_test [num_messages] [message_size] [XML string]\n");
35+
return -1;
36+
}
37+
38+
MPI_Init(NULL, NULL);
39+
40+
int i, num_messages = atoi(argv[1]), message_size = atoi(argv[2]), rank, shm_fd;
41+
char *buf = (char*)malloc(message_size * sizeof(char));
42+
43+
MPI_Request *requests = (MPI_Request*)malloc(num_messages * sizeof(MPI_Request));
44+
MPI_Status *statuses = (MPI_Status*)malloc(num_messages * sizeof(MPI_Status));
45+
MPI_Status status;
46+
47+
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
48+
49+
int retval, shm_file_size, num_counters, freq_mhz;
50+
long long value;
51+
char filename[128], shm_filename[128], line[128], *token;
52+
53+
char hostname[128];
54+
gethostname(hostname, 128);
55+
56+
char *nodename;
57+
nodename = strtok(hostname, ".");
58+
59+
char *xml_string = argv[3];
60+
snprintf(filename, 128, "/dev/shm/spc_data.%s.%s.%d.xml", nodename, xml_string, rank);
61+
62+
FILE *fptr = NULL;
63+
void *data_ptr;
64+
spc_t *spc_data;
65+
66+
if(NULL == (fptr = fopen(filename, "r"))) {
67+
printf("Couldn't open xml file.\n");
68+
MPI_Finalize();
69+
return -1;
70+
} else {
71+
printf("[%d] Successfully opened the XML file!\n", rank);
72+
}
73+
74+
/* The following is to read the formatted XML file to get the basic
75+
* information we need to read the shared memory file and properly
76+
* format some counters.
77+
*/
78+
char tmp_filename[128];
79+
fgets(line, 128, fptr);
80+
fgets(line, 128, fptr);
81+
82+
fgets(line, 128, fptr);
83+
token = strtok(line, ">");
84+
token = strtok(NULL, "<");
85+
sscanf(token, "%s", shm_filename);
86+
87+
if(rank == 0) {
88+
printf("shm_filename: %s\n", shm_filename);
89+
}
90+
91+
fgets(line, 128, fptr);
92+
token = strtok(line, ">");
93+
token = strtok(NULL, "<");
94+
sscanf(token, "%d", &shm_file_size);
95+
if(rank == 0) {
96+
printf("shm_file_size: %d\n", shm_file_size);
97+
}
98+
99+
fgets(line, 128, fptr);
100+
token = strtok(line, ">");
101+
token = strtok(NULL, "<");
102+
sscanf(token, "%d", &num_counters);
103+
if(rank == 0) {
104+
printf("num_counters: %d\n", num_counters);
105+
}
106+
107+
fgets(line, 128, fptr);
108+
token = strtok(line, ">");
109+
token = strtok(NULL, "<");
110+
sscanf(token, "%d", &freq_mhz);
111+
if(rank == 0) {
112+
printf("freq_mhz: %d\n", freq_mhz);
113+
}
114+
115+
if(-1 == (shm_fd = open(shm_filename, O_RDONLY))){
116+
printf("\nCould not open file '%s'... Error String: %s\n", shm_filename, strerror(errno));
117+
return -1;
118+
} else {
119+
if(MAP_FAILED == (data_ptr = mmap(0, 8192, PROT_READ, MAP_SHARED, shm_fd, 0))) {
120+
printf("Map failed :(\n");
121+
return -1;
122+
}
123+
printf("Successfully mmap'd file!\n");
124+
}
125+
126+
spc_data = (spc_t*)malloc(num_counters * sizeof(spc_t));
127+
128+
for(i = 0; i < num_counters; i++) {
129+
fgets(line, 128, fptr); /* Counter begin header */
130+
/* This should never happen... */
131+
if(strcmp(line,"</SPC>\n") == 0) {
132+
printf("Parsing ended prematurely. There weren't enough counters.\n");
133+
break;
134+
}
135+
136+
fgets(line, 128, fptr); /* Counter name header */
137+
token = strtok(line, ">");
138+
token = strtok(NULL, "<");
139+
sscanf(token, "%s", spc_data[i].name); /* Counter name */
140+
141+
fgets(line, 128, fptr); /* Counter value offset header */
142+
token = strtok(line, ">");
143+
token = strtok(NULL, "<");
144+
sscanf(token, "%d", &spc_data[i].offset); /* Counter offset */
145+
146+
fgets(line, 128, fptr); /* Counter rules offset header */
147+
token = strtok(line, ">");
148+
token = strtok(NULL, "<");
149+
sscanf(token, "%d", &spc_data[i].rules_offset); /* Counter rules offset */
150+
151+
fgets(line, 128, fptr); /* Counter bins offset header */
152+
token = strtok(line, ">");
153+
token = strtok(NULL, "<");
154+
sscanf(token, "%d", &spc_data[i].bins_offset); /* Counter bins offset */
155+
156+
fgets(line, 128, fptr); /* Counter end header */
157+
}
158+
159+
fclose(fptr);
160+
161+
/* The following communication pattern is intended to cause a certain
162+
* number of unexpected messages.
163+
*/
164+
if(rank==0) {
165+
for(i=num_messages; i > 0; i--) {
166+
MPI_Isend(buf, message_size, MPI_BYTE, 1, i, MPI_COMM_WORLD, &requests[i-1]);
167+
}
168+
MPI_Send(buf, message_size, MPI_BYTE, 1, 0, MPI_COMM_WORLD);
169+
MPI_Waitall(num_messages, requests, statuses);
170+
171+
MPI_Barrier(MPI_COMM_WORLD);
172+
173+
for(i = 0; i < num_counters; i++) {
174+
if((0 == strcmp(spc_data[i].name, "OMPI_SPC_MATCH_TIME")) || (0 == strcmp(spc_data[i].name, "OMPI_SPC_MATCH_QUEUE_TIME"))) {
175+
value = (*((long long*)(data_ptr+spc_data[i].offset))) / freq_mhz;
176+
} else {
177+
value = *((long long*)(data_ptr+spc_data[i].offset));
178+
}
179+
if(value > 0)
180+
printf("[%d] %s\t%lld\n", rank, spc_data[i].name, value );
181+
}
182+
MPI_Barrier(MPI_COMM_WORLD);
183+
} else {
184+
MPI_Recv(buf, message_size, MPI_BYTE, 0, 0, MPI_COMM_WORLD, &status);
185+
for(i=0; i < num_messages; i++) {
186+
MPI_Recv(buf, message_size, MPI_BYTE, 0, i+1, MPI_COMM_WORLD, &statuses[i]);
187+
}
188+
189+
MPI_Barrier(MPI_COMM_WORLD);
190+
MPI_Barrier(MPI_COMM_WORLD);
191+
for(i = 0; i < num_counters; i++) {
192+
/* These counters are stored in cycles, so we convert them to microseconds.
193+
*/
194+
if((0 == strcmp(spc_data[i].name, "OMPI_SPC_MATCH_TIME")) || (0 == strcmp(spc_data[i].name, "OMPI_SPC_MATCH_QUEUE_TIME"))) {
195+
value = (*((long long*)(data_ptr+spc_data[i].offset))) / freq_mhz;
196+
} else {
197+
value = *((long long*)(data_ptr+spc_data[i].offset));
198+
}
199+
if(value > 0) {
200+
printf("[%d] %s\t%lld\n", rank, spc_data[i].name, value );
201+
if(spc_data[i].rules_offset > 0) {
202+
int j, *rules = (int*)(data_ptr+spc_data[i].rules_offset);
203+
long long *bins = (long long*)(data_ptr+spc_data[i].bins_offset);
204+
205+
for(j = 0; j < rules[0]; j++) {
206+
if(j == rules[0]-1) {
207+
printf("\t> %d\t", rules[j]);
208+
}
209+
else {
210+
printf("\t<= %d\t", rules[j+1]);
211+
}
212+
printf("%lld\n", bins[j]);
213+
}
214+
}
215+
}
216+
}
217+
}
218+
219+
MPI_Finalize();
220+
221+
return 0;
222+
}
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2018 The University of Tennessee and The University
2+
* Copyright (c) 2020 The University of Tennessee and The University
33
* of Tennessee Research Foundation. All rights
44
* reserved.
55
*
@@ -109,7 +109,7 @@ int main(int argc, char **argv)
109109
}
110110
MPI_Barrier(MPI_COMM_WORLD);
111111

112-
/* Determine the MPI_T pvar indices for the OMPI_BYTES_SENT/RECIEVED_USER SPCs */
112+
/* Determine the MPI_T pvar indices for the requested SPCs */
113113
index = xml_index = -1;
114114
MPI_T_pvar_get_num(&num);
115115
for(i = 0; i < num; i++) {
@@ -174,7 +174,7 @@ int main(int argc, char **argv)
174174
printf("[%d] XML Counter Value Read: %s\n", rank, xml_filename);
175175
}
176176
for(j = 0; j < count; j++) {
177-
printf("[%d] Counter Value Read: %lld\n", rank, values[j]);
177+
printf("[%d] %s Counter Value Read: %lld\n", rank, counter_names[rank], values[j]);
178178
}
179179
fflush(stdout);
180180
}

0 commit comments

Comments
 (0)