Skip to content

Commit cc804e9

Browse files
committed
feat(dashboard): Optimizing the Streamlit dashboard architecture
1 parent 54e5bbf commit cc804e9

File tree

7 files changed

+487
-511
lines changed

7 files changed

+487
-511
lines changed

dashboard/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
)
2525

2626
# Initialize session state
27-
if "ni" not in st.session_state:
27+
if "data_loader" not in st.session_state:
2828
st.session_state.data_loader = InfiniMetricsDataLoader()
2929
if "selected_accelerators" not in st.session_state:
3030
st.session_state.selected_accelerators = []

dashboard/pages/communication.py

Lines changed: 25 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -179,64 +179,31 @@ def main():
179179
fig = plot_comparison_matrix(selected_runs, "latency", y_log_scale)
180180
st.plotly_chart(fig, use_container_width=True)
181181

182-
if len(selected_runs) == 1:
183-
st.markdown("#### 📌 核心指标(最新)")
184-
run = selected_runs[0]
185-
186-
max_bw = None
187-
avg_lat = None
188-
duration = None
189-
190-
for metric in run.get("data", {}).get("metrics", []):
191-
metric_name = metric.get("name", "")
192-
193-
# bandwidth
194-
if (
195-
metric_name == "comm.bandwidth"
196-
and metric.get("data") is not None
197-
):
198-
df = metric["data"]
199-
if "bandwidth_gbs" in df.columns:
200-
max_bw = df["bandwidth_gbs"].max()
201-
202-
# latency
203-
elif (
204-
metric_name == "comm.latency" and metric.get("data") is not None
205-
):
206-
df = metric["data"]
207-
if "latency_us" in df.columns:
208-
avg_lat = df["latency_us"].mean()
209-
210-
# duration
211-
elif metric_name == "comm.duration":
212-
duration = metric.get("value")
213-
c1, c2, c3 = st.columns(3)
214-
215-
with c1:
216-
if max_bw is not None and max_bw > 0:
217-
st.metric("峰值带宽", f"{max_bw:.2f} GB/s")
218-
else:
219-
st.metric("峰值带宽", "-")
220-
221-
with c2:
222-
if avg_lat is not None and avg_lat > 0:
223-
st.metric("平均延迟", f"{avg_lat:.2f} μs")
224-
else:
225-
st.metric("平均延迟", "-")
226-
227-
with c3:
228-
if duration is not None and duration > 0:
229-
st.metric("测试耗时", f"{duration:.2f} ms")
230-
else:
231-
st.metric("测试耗时", "-")
232-
# Gauge charts for key metrics
233182
if len(selected_runs) == 1:
234183
st.markdown("#### 关键指标")
235184
run = selected_runs[0]
185+
core = extract_core_metrics(run)
186+
187+
# First Line: numerical indicators
188+
cols = st.columns(3)
189+
cols[0].metric(
190+
"峰值带宽",
191+
f"{core['bandwidth_gbps']:.2f} GB/s"
192+
if core["bandwidth_gbps"]
193+
else "-",
194+
)
195+
cols[1].metric(
196+
"平均延迟",
197+
f"{core['latency_us']:.2f} μs" if core["latency_us"] else "-",
198+
)
199+
cols[2].metric(
200+
"测试耗时",
201+
f"{core['duration_ms']:.2f} ms" if core["duration_ms"] else "-",
202+
)
236203

237-
col1, col2, col3 = st.columns(3)
204+
cols = st.columns(3)
238205

239-
with col1:
206+
with cols[0]:
240207
# Find max bandwidth
241208
max_bw = 0
242209
for metric in run.get("data", {}).get("metrics", []):
@@ -249,15 +216,15 @@ def main():
249216
max_bw = df["bandwidth_gbs"].max()
250217
fig = create_gauge_chart(
251218
max_bw,
252-
300,
219+
300, # Theoretical max for A100 NVLink
253220
"峰值带宽",
254221
"blue",
255222
"GB/s",
256223
)
257224
st.plotly_chart(fig, use_container_width=True)
258225
break
259226

260-
with col2:
227+
with cols[1]:
261228
# Find average latency
262229
avg_lat = 0
263230
for metric in run.get("data", {}).get("metrics", []):
@@ -270,15 +237,15 @@ def main():
270237
avg_lat = df["latency_us"].mean()
271238
fig = create_gauge_chart(
272239
avg_lat,
273-
1000,
240+
1000, # Reference: 1000 µs
274241
"平均延迟",
275242
"red",
276243
"µs",
277244
)
278245
st.plotly_chart(fig, use_container_width=True)
279246
break
280247

281-
with col3:
248+
with cols[2]:
282249
# Extract duration
283250
duration = 0
284251
for metric in run.get("data", {}).get("metrics", []):
@@ -289,7 +256,7 @@ def main():
289256
if duration > 0:
290257
fig = create_gauge_chart(
291258
duration,
292-
duration * 2,
259+
duration * 2, # Scale to show progress
293260
"测试耗时",
294261
"green",
295262
"ms",

0 commit comments

Comments
 (0)