Skip to content

Commit 4061885

Browse files
Merge pull request #23 from InfiniTensor/feat/dashboard-streamlit
feat: add Streamlit dashboard for InfiniMetrics
2 parents 30f5b00 + 69fee5f commit 4061885

11 files changed

Lines changed: 1734 additions & 0 deletions

File tree

dashboard/__init__.py

Whitespace-only changes.

dashboard/app.py

Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
#!/usr/bin/env python3
2+
"""Main Streamlit application for InfiniMetrics dashboard."""
3+
4+
import streamlit as st
5+
import pandas as pd
6+
from pathlib import Path
7+
import sys
8+
from datetime import datetime
9+
from infinimetrics.common.constants import AcceleratorType
10+
11+
# Add project root to path
12+
project_root = Path(__file__).parent
13+
sys.path.append(str(project_root))
14+
15+
from components.header import render_header
16+
from utils.data_loader import InfiniMetricsDataLoader, load_summary_file
17+
18+
# Page configuration
19+
st.set_page_config(
20+
page_title="InfiniMetrics Dashboard",
21+
page_icon="🏭",
22+
layout="wide",
23+
initial_sidebar_state="expanded",
24+
)
25+
26+
# Initialize session state
27+
if "data_loader" not in st.session_state:
28+
st.session_state.data_loader = InfiniMetricsDataLoader()
29+
if "selected_accelerators" not in st.session_state:
30+
st.session_state.selected_accelerators = []
31+
32+
33+
def main():
34+
render_header()
35+
36+
# =========================
37+
# Sidebar
38+
# =========================
39+
40+
with st.sidebar:
41+
st.markdown("## ⚙️ 设置")
42+
43+
results_dir = st.text_input(
44+
"测试结果目录", value="./test_output", help="包含 JSON/CSV 测试结果的目录"
45+
)
46+
47+
if results_dir != str(st.session_state.data_loader.results_dir):
48+
st.session_state.data_loader = InfiniMetricsDataLoader(results_dir)
49+
50+
auto_refresh = st.toggle("自动刷新", value=False)
51+
if auto_refresh:
52+
st.rerun()
53+
54+
st.markdown("---")
55+
st.markdown("## 🧠 筛选条件")
56+
57+
# Base accelerator types from constants.py
58+
ACCELERATOR_OPTIONS = ["cpu"] + [a.value for a in AcceleratorType]
59+
60+
# UI display names (only labels live here)
61+
ACCELERATOR_LABELS = {
62+
"cpu": "CPU",
63+
AcceleratorType.NVIDIA.value: "NVIDIA",
64+
AcceleratorType.AMD.value: "AMD",
65+
AcceleratorType.ASCEND.value: "昇腾 NPU",
66+
AcceleratorType.CAMBRICON.value: "寒武纪 MLU",
67+
AcceleratorType.GENERIC.value: "Generic",
68+
}
69+
70+
selected_accs = st.multiselect(
71+
"加速卡类型",
72+
options=ACCELERATOR_OPTIONS,
73+
default=ACCELERATOR_OPTIONS,
74+
format_func=lambda x: ACCELERATOR_LABELS.get(x, x),
75+
)
76+
st.session_state.selected_accelerators = selected_accs
77+
78+
run_id_filter = st.text_input("Run ID 模糊搜索")
79+
# test_type / testcase filtering will be applied dynamically after runs are loaded
80+
81+
render_dashboard(run_id_filter)
82+
83+
84+
def render_dashboard(run_id_filter: str):
85+
st.markdown(
86+
"""
87+
<h1 style="margin-bottom: 0.2em;">
88+
📊 综合仪表板
89+
</h1>
90+
""",
91+
unsafe_allow_html=True,
92+
)
93+
94+
st.markdown(
95+
"""
96+
<div style="
97+
margin-top: 0.5em;
98+
margin-bottom: 1.5em;
99+
max-width: 1100px;
100+
font-size: 1.05em;
101+
line-height: 1.6;
102+
">
103+
<strong>InfiniMetrics Dashboard</strong> 用于统一展示
104+
<strong>通信(NCCL / 集合通信)</strong>、
105+
<strong>推理(Direct / Service)</strong>、
106+
<strong>算子(核心算子性能)</strong>
107+
等 AI 加速卡性能测试结果。
108+
<br/>
109+
测试框架输出 <code>JSON</code>(环境 / 配置 / 标量指标) +
110+
<code>CSV</code>(曲线 / 时序数据),
111+
Dashboard 自动加载并支持多次运行的对比分析与可视化。
112+
</div>
113+
""",
114+
unsafe_allow_html=True,
115+
)
116+
117+
try:
118+
runs = st.session_state.data_loader.list_test_runs()
119+
120+
# ========== Accelerator filtering ==========
121+
selected_accs = st.session_state.get("selected_accelerators", [])
122+
if selected_accs:
123+
runs = [
124+
r
125+
for r in runs
126+
if set(r.get("accelerator_types", [])) & set(selected_accs)
127+
]
128+
129+
# ========== run_id filtering ==========
130+
if run_id_filter:
131+
runs = [r for r in runs if run_id_filter in r.get("run_id", "")]
132+
133+
if not runs:
134+
st.warning("No test results match the current filters.")
135+
return
136+
137+
# ========== Sort by time (latest first) ==========
138+
def _parse_time(t):
139+
try:
140+
return datetime.fromisoformat(t)
141+
except Exception:
142+
return datetime.min
143+
144+
runs = sorted(runs, key=lambda r: _parse_time(r.get("time", "")), reverse=True)
145+
146+
total = len(runs)
147+
success = sum(1 for r in runs if r.get("success"))
148+
fail = total - success
149+
150+
# ========== Categorize runs ==========
151+
comm_runs = [r for r in runs if r.get("testcase", "").startswith("comm")]
152+
infer_runs = [r for r in runs if r.get("testcase", "").startswith("infer")]
153+
154+
ops_runs, hw_runs = [], []
155+
for r in runs:
156+
p = str(r.get("path", "")).replace("\\", "/").lower()
157+
tc = (r.get("testcase", "") or "").lower()
158+
if "/operators/" in p or tc.startswith(("operator", "operators", "ops")):
159+
ops_runs.append(r)
160+
if "/hardware/" in p or tc.startswith("hardware"):
161+
hw_runs.append(r)
162+
163+
# ========== KPI ==========
164+
c1, c2, c3, c4, c5, c6 = st.columns(6)
165+
c1.metric("总测试数", total)
166+
c2.metric("成功率", f"{(success/total*100):.1f}%")
167+
c3.metric("通信测试", len(comm_runs))
168+
c4.metric("推理测试", len(infer_runs))
169+
c5.metric("算子测试", len(ops_runs))
170+
c6.metric("硬件检测", len(hw_runs))
171+
172+
st.caption(f"失败测试数:{fail}")
173+
st.caption(f"当前筛选:加速卡={','.join(selected_accs) or '全部'}")
174+
175+
st.divider()
176+
177+
# ========== Latest results ==========
178+
def _latest(lst):
179+
return lst[0] if lst else None
180+
181+
latest_comm = _latest(comm_runs)
182+
latest_infer = _latest(infer_runs)
183+
latest_ops = _latest(ops_runs)
184+
185+
colA, colB, colC = st.columns(3)
186+
187+
with colA:
188+
st.markdown("#### 🔗 通信(最新)")
189+
if not latest_comm:
190+
st.info("暂无通信结果")
191+
else:
192+
st.write(f"- testcase: `{latest_comm.get('testcase','')}`")
193+
st.write(f"- time: {latest_comm.get('time','')}")
194+
st.write(f"- status: {'✅' if latest_comm.get('success') else '❌'}")
195+
196+
with colB:
197+
st.markdown("#### 🚀 推理(最新)")
198+
if not latest_infer:
199+
st.info("暂无推理结果")
200+
else:
201+
st.write(f"- testcase: `{latest_infer.get('testcase','')}`")
202+
st.write(f"- time: {latest_infer.get('time','')}")
203+
st.write(f"- status: {'✅' if latest_infer.get('success') else '❌'}")
204+
205+
with colC:
206+
st.markdown("#### ⚡ 算子(最新)")
207+
if not latest_ops:
208+
st.info("暂无算子结果")
209+
else:
210+
st.write(f"- testcase: `{latest_ops.get('testcase','')}`")
211+
st.write(f"- time: {latest_ops.get('time','')}")
212+
st.write(f"- status: {'✅' if latest_ops.get('success') else '❌'}")
213+
214+
st.divider()
215+
216+
# ========== Recent runs table ==========
217+
st.markdown("### 🕒 最近测试运行")
218+
df = pd.DataFrame(
219+
[
220+
{
221+
"类型": (r.get("testcase", "").split(".")[0] or "UNKNOWN").upper(),
222+
"加速卡": ", ".join(r.get("accelerator_types", [])),
223+
"时间": r.get("time", ""),
224+
"状态": "✅" if r.get("success") else "❌",
225+
"run_id": r.get("run_id", "")[:32],
226+
}
227+
for r in runs[:15]
228+
]
229+
)
230+
st.dataframe(df, use_container_width=True, hide_index=True)
231+
232+
# ========== Dispatcher summary ==========
233+
summaries = load_summary_file()
234+
235+
if not summaries:
236+
st.info("No dispatcher_summary file found")
237+
return
238+
239+
st.markdown("### 🧾 Dispatcher 汇总记录")
240+
241+
rows = []
242+
for s in summaries:
243+
rows.append(
244+
{
245+
"时间": s.get("timestamp"),
246+
"总测试数": s.get("total_tests"),
247+
"成功": s.get("successful_tests"),
248+
"失败": s.get("failed_tests"),
249+
"成功率": (
250+
f"{s['successful_tests'] / s['total_tests'] * 100:.1f}%"
251+
if s.get("total_tests")
252+
else "-"
253+
),
254+
"文件": s.get("file"),
255+
}
256+
)
257+
258+
df = pd.DataFrame(rows).sort_values("时间", ascending=False)
259+
260+
st.dataframe(
261+
df,
262+
use_container_width=True,
263+
hide_index=True,
264+
)
265+
266+
# ========== Quick navigation ==========
267+
st.markdown("---")
268+
st.markdown("### 🚀 快速导航")
269+
270+
col1, col2, col3 = st.columns(3)
271+
if col1.button("🔗 通信测试分析", use_container_width=True):
272+
st.switch_page("pages/communication.py")
273+
if col2.button("⚡ 算子测试分析", use_container_width=True):
274+
st.switch_page("pages/operator.py")
275+
if col3.button("🤖 推理测试分析", use_container_width=True):
276+
st.switch_page("pages/inference.py")
277+
278+
except Exception as e:
279+
st.error(f"Dashboard 加载失败: {e}")
280+
281+
282+
if __name__ == "__main__":
283+
main()

dashboard/common.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/usr/bin/env python3
2+
"""Common utilities for dashboard pages."""
3+
4+
import streamlit as st
5+
import sys
6+
from pathlib import Path
7+
8+
9+
def init_page(page_title: str, page_icon: str):
10+
"""
11+
Common page initialization:
12+
- -Set Streamlit page configuration
13+
- initialize DataLoader
14+
- set project path
15+
"""
16+
# Add project root to Python path
17+
project_root = Path(__file__).parent.parent
18+
if str(project_root) not in sys.path:
19+
sys.path.append(str(project_root))
20+
21+
# Page configuration
22+
st.set_page_config(page_title=page_title, page_icon=page_icon, layout="wide")
23+
24+
# Initialize DataLoader
25+
if "data_loader" not in st.session_state:
26+
from utils.data_loader import InfiniMetricsDataLoader
27+
28+
st.session_state.data_loader = InfiniMetricsDataLoader()

dashboard/components/header.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/usr/bin/env python3
2+
"""Header component for InfiniMetrics dashboard."""
3+
4+
import streamlit as st
5+
6+
7+
def render_header():
8+
"""Render the dashboard header."""
9+
st.markdown(
10+
"""
11+
<style>
12+
.main-header {
13+
font-size: 2.5rem;
14+
font-weight: bold;
15+
color: #1E3A8A;
16+
text-align: center;
17+
margin-bottom: 1rem;
18+
}
19+
.sub-header {
20+
font-size: 1.2rem;
21+
color: #6B7280;
22+
text-align: center;
23+
margin-bottom: 2rem;
24+
}
25+
</style>
26+
""",
27+
unsafe_allow_html=True,
28+
)
29+
30+
st.markdown(
31+
'<div class="main-header">🏭 InfiniMetrics 测试结果展示平台</div>',
32+
unsafe_allow_html=True,
33+
)
34+
st.markdown(
35+
'<div class="sub-header">AI加速卡通信、算力、推理性能一站式分析与可视化</div>',
36+
unsafe_allow_html=True,
37+
)

0 commit comments

Comments
 (0)