@@ -1225,15 +1225,16 @@ <h3 class="asr-category-title">
12251225 < p class ="lang-en "> Fun-ASR achieves industry-leading performance on multiple public datasets and industrial test sets. The following are detailed performance comparison data.</ p >
12261226 < p class ="lang-zh "> Fun-ASR 在多个公开数据集和工业测试集上均达到业界领先水平,以下为详细性能对比数据。</ p >
12271227
1228- <!-- 开源数据集性能对比 -->
1228+ <!-- 开源数据集性能对比 -->
12291229 < div class ="performance-table-container ">
12301230 < h4 class ="table-title "> Open-Source Dataset Performance (WER %)</ h4 >
12311231 < div class ="table-wrapper ">
12321232 < table class ="performance-table ">
12331233 < thead >
12341234 < tr >
12351235 < th > Test Set</ th >
1236- < th > GLM-ASR-Nano</ th >
1236+ < th > GLM-ASR-nano</ th >
1237+ < th > GLM-ASR-nano*</ th >
12371238 < th > Whisper-large-v3</ th >
12381239 < th > Seed-ASR</ th >
12391240 < th > Seed-ASR*</ th >
@@ -1242,120 +1243,145 @@ <h4 class="table-title">Open-Source Dataset Performance (WER %)</h4>
12421243 < th > FireRed-ASR</ th >
12431244 < th > Fun-ASR-nano</ th >
12441245 < th > Fun-ASR</ th >
1245- < th > Fun-ASR (1126)</ th >
12461246 </ tr >
12471247 </ thead >
12481248 < tbody >
1249+ < tr >
1250+ < td > Model Size</ td >
1251+ < td > 1.5B</ td >
1252+ < td > 1.5B</ td >
1253+ < td > 1.6B</ td >
1254+ < td > -</ td >
1255+ < td > -</ td >
1256+ < td > -</ td >
1257+ < td > -</ td >
1258+ < td > 1.1B</ td >
1259+ < td > 0.8B</ td >
1260+ < td > 7.7B</ td >
1261+ </ tr >
1262+ < tr >
1263+ < td > OpenSource</ td >
1264+ < td > < span style ="color: #28a745; font-weight: bold; "> ✓</ span > </ td >
1265+ < td > < span style ="color: #28a745; font-weight: bold; "> ✓</ span > </ td >
1266+ < td > < span style ="color: #28a745; font-weight: bold; "> ✓</ span > </ td >
1267+ < td > < span style ="color: #dc3545; font-weight: bold; "> ✗</ span > </ td >
1268+ < td > < span style ="color: #dc3545; font-weight: bold; "> ✗</ span > </ td >
1269+ < td > < span style ="color: #28a745; font-weight: bold; "> ✓</ span > </ td >
1270+ < td > < span style ="color: #28a745; font-weight: bold; "> ✓</ span > </ td >
1271+ < td > < span style ="color: #28a745; font-weight: bold; "> ✓</ span > </ td >
1272+ < td > < span style ="color: #28a745; font-weight: bold; "> ✓</ span > </ td >
1273+ < td > < span style ="color: #dc3545; font-weight: bold; "> ✗</ span > </ td >
1274+ </ tr >
12491275 < tr >
12501276 < td > AIShell1</ td >
1251- < td > 3.47</ td >
1277+ < td > 1.81</ td >
1278+ < td > 2.17</ td >
12521279 < td > 4.72</ td >
12531280 < td > 0.68</ td >
12541281 < td > 1.63</ td >
12551282 < td > 0.71</ td >
12561283 < td > 0.63</ td >
12571284 < td > 0.54</ td >
1258- < td > 1.76 </ td >
1285+ < td > 1.80 </ td >
12591286 < td > 1.22</ td >
1260- < td > 1.28</ td >
12611287 </ tr >
12621288 < tr >
12631289 < td > AIShell2</ td >
1290+ < td > -</ td >
12641291 < td > 3.47</ td >
12651292 < td > 4.68</ td >
12661293 < td > 2.27</ td >
12671294 < td > 2.76</ td >
12681295 < td > 2.86</ td >
12691296 < td > 2.10</ td >
12701297 < td > 2.58</ td >
1271- < td > 2.80</ td >
1272- < td > 2.30</ td >
1273- < td > 2.35</ td >
1298+ < td > 2.75</ td >
1299+ < td > 2.39</ td >
12741300 </ tr >
12751301 < tr >
12761302 < td > Fleurs-zh</ td >
1303+ < td > -</ td >
12771304 < td > 3.65</ td >
12781305 < td > 5.18</ td >
12791306 < td > 3.43</ td >
12801307 < td > 3.23</ td >
12811308 < td > 3.11</ td >
12821309 < td > 2.68</ td >
12831310 < td > 4.81</ td >
1284- < td > 3.47</ td >
12851311 < td > 2.56</ td >
12861312 < td > 2.53</ td >
12871313 </ tr >
12881314 < tr >
12891315 < td > Fleurs-en</ td >
1316+ < td > 5.78</ td >
12901317 < td > 6.95</ td >
12911318 < td > 6.23</ td >
12921319 < td > 9.39</ td >
12931320 < td > 9.39</ td >
12941321 < td > 6.99</ td >
12951322 < td > 3.03</ td >
12961323 < td > 10.79</ td >
1297- < td > 7.95</ td >
12981324 < td > 5.96</ td >
12991325 < td > 4.74</ td >
13001326 </ tr >
13011327 < tr >
13021328 < td > Librispeech-clean</ td >
1329+ < td > 2.00</ td >
13031330 < td > 2.17</ td >
13041331 < td > 1.86</ td >
13051332 < td > 1.58</ td >
13061333 < td > 2.8</ td >
13071334 < td > 1.32</ td >
13081335 < td > 1.17</ td >
13091336 < td > 1.84</ td >
1310- < td > 1.75</ td >
1311- < td > 1.57</ td >
1337+ < td > 1.76</ td >
13121338 < td > 1.51</ td >
13131339 </ tr >
13141340 < tr >
13151341 < td > Librispeech-other</ td >
1342+ < td > 4.19</ td >
13161343 < td > 4.43</ td >
13171344 < td > 3.43</ td >
13181345 < td > 2.84</ td >
13191346 < td > 5.69</ td >
13201347 < td > 2.63</ td >
13211348 < td > 2.42</ td >
13221349 < td > 4.52</ td >
1323- < td > 4.37</ td >
1324- < td > 3.24</ td >
1325- < td > 3.13</ td >
1350+ < td > 4.33</ td >
1351+ < td > 3.03</ td >
13261352 </ tr >
13271353 < tr >
13281354 < td > WenetSpeech Meeting</ td >
1355+ < td > 6.73</ td >
13291356 < td > 8.21</ td >
13301357 < td > 18.39</ td >
13311358 < td > 5.69</ td >
13321359 < td > 7.07</ td >
13331360 < td > 6.24</ td >
13341361 < td > 4.75</ td >
13351362 < td > 4.95</ td >
1336- < td > 8.78</ td >
1337- < td > 6.49</ td >
1338- < td > 6.53</ td >
1363+ < td > 6.60</ td >
1364+ < td > 6.17</ td >
13391365 </ tr >
13401366 < tr >
13411367 < td > WenetSpeech Net</ td >
1368+ < td > -</ td >
13421369 < td > 6.33</ td >
13431370 < td > 11.89</ td >
13441371 < td > 4.66</ td >
13451372 < td > 4.84</ td >
13461373 < td > 6.45</ td >
13471374 < td > 4.67</ td >
13481375 < td > 4.94</ td >
1349- < td > 6.28 </ td >
1376+ < td > 6.01 </ td >
13501377 < td > 5.46</ td >
1351- < td > 5.50</ td >
13521378 </ tr >
13531379 </ tbody >
13541380 </ table >
13551381 </ div >
13561382 < p style ="margin-top: 1rem; font-size: 0.9rem; color: var(--text-secondary); font-style: italic; ">
1357- < span class ="lang-en "> note : Seed-ASR* results are evaluated using the official API on volcengine</ span >
1358- < span class ="lang-zh "> 注:Seed-ASR* 结果使用 volcengine 上的官方 API 评估</ span >
1383+ < span class ="lang-en "> Note : Seed-ASR* results are evaluated using the official API on volcengine; GLM-ASR-nano* results are evaluated using the opensource checkpoint. </ span >
1384+ < span class ="lang-zh "> 注:Seed-ASR* 结果使用 volcengine 上的官方 API 评估;GLM-ASR-nano* 结果使用开源 checkpoint 评估。 </ span >
13591385 </ p >
13601386 </ div >
13611387
@@ -1368,8 +1394,8 @@ <h4 class="table-title">Industry Dataset Performance (WER %)</h4>
13681394 < tr >
13691395 < th > Test Set</ th >
13701396 < th > GLM-ASR-Nano</ th >
1371- < th > Seed-ASR</ th >
13721397 < th > Whisper-large-v3</ th >
1398+ < th > Seed-ASR</ th >
13731399 < th > FireRed-ASR</ th >
13741400 < th > Kimi-Audio</ th >
13751401 < th > Paraformer v2</ th >
@@ -1378,34 +1404,55 @@ <h4 class="table-title">Industry Dataset Performance (WER %)</h4>
13781404 </ tr >
13791405 </ thead >
13801406 < tbody >
1407+ < tr >
1408+ < td > Model Size</ td >
1409+ < td > 1.5B</ td >
1410+ < td > 1.6B</ td >
1411+ < td > -</ td >
1412+ < td > 1.1B</ td >
1413+ < td > 8B</ td >
1414+ < td > 0.2B</ td >
1415+ < td > 0.8B</ td >
1416+ < td > 7.7B</ td >
1417+ </ tr >
1418+ < tr >
1419+ < td > OpenSource</ td >
1420+ < td > < span style ="color: #28a745; font-weight: bold; "> ✓</ span > </ td >
1421+ < td > < span style ="color: #28a745; font-weight: bold; "> ✓</ span > </ td >
1422+ < td > < span style ="color: #dc3545; font-weight: bold; "> ✗</ span > </ td >
1423+ < td > < span style ="color: #28a745; font-weight: bold; "> ✓</ span > </ td >
1424+ < td > < span style ="color: #28a745; font-weight: bold; "> ✓</ span > </ td >
1425+ < td > < span style ="color: #28a745; font-weight: bold; "> ✓</ span > </ td >
1426+ < td > < span style ="color: #28a745; font-weight: bold; "> ✓</ span > </ td >
1427+ < td > < span style ="color: #dc3545; font-weight: bold; "> ✗</ span > </ td >
1428+ </ tr >
13811429 < tr >
13821430 < td > Nearfield</ td >
13831431 < td > 16.95</ td >
1384- < td > 7.20</ td >
13851432 < td > 16.58</ td >
1433+ < td > 7.20</ td >
13861434 < td > 10.10</ td >
13871435 < td > 9.02</ td >
13881436 < td > 8.11</ td >
13891437 < td > 7.79</ td >
13901438 < td > 6.31</ td >
13911439 </ tr >
13921440 < tr >
1393- < td > Fairfield </ td >
1441+ < td > Farfield </ td >
13941442 < td > 9.44</ td >
1395- < td > 4.59</ td >
13961443 < td > 22.21</ td >
1444+ < td > 4.59</ td >
13971445 < td > 7.49</ td >
13981446 < td > 10.95</ td >
13991447 < td > 9.55</ td >
14001448 < td > 5.79</ td >
14011449 < td > 4.34</ td >
14021450 </ tr >
1403-
14041451 < tr >
14051452 < td > Complex Background</ td >
14061453 < td > 23.79</ td >
1407- < td > 12.90</ td >
14081454 < td > 32.57</ td >
1455+ < td > 12.90</ td >
14091456 < td > 15.56</ td >
14101457 < td > 15.56</ td >
14111458 < td > 15.19</ td >
@@ -1415,8 +1462,8 @@ <h4 class="table-title">Industry Dataset Performance (WER %)</h4>
14151462 < tr >
14161463 < td > English General</ td >
14171464 < td > 16.47</ td >
1418- < td > 15.65</ td >
14191465 < td > 18.56</ td >
1466+ < td > 15.65</ td >
14201467 < td > 21.62</ td >
14211468 < td > 18.12</ td >
14221469 < td > 19.48</ td >
@@ -1426,71 +1473,69 @@ <h4 class="table-title">Industry Dataset Performance (WER %)</h4>
14261473 < tr >
14271474 < td > Opensource</ td >
14281475 < td > 4.67</ td >
1429- < td > 3.83</ td >
14301476 < td > 7.05</ td >
1477+ < td > 3.83</ td >
14311478 < td > 5.31</ td >
14321479 < td > 3.79</ td >
14331480 < td > 6.23</ td >
14341481 < td > 4.22</ td >
1435- < td > 3.68 </ td >
1482+ < td > 3.38 </ td >
14361483 </ tr >
1437- <!-- add dialect accent lyrics hiphop,注意顺序 -->
1438- < tr >
1439- < td > Dialect</ td >
1440- < td > 54.21</ td >
1441- < td > 29.45</ td >
1442- < td > 66.14</ td >
1443- < td > 52.82</ td >
1444- < td > 71.94</ td >
1445- < td > 41.16</ td >
1446- < td > 28.18</ td >
1447- < td > 19.55</ td >
1484+ < tr >
1485+ < td > Dialect</ td >
1486+ < td > 54.21</ td >
1487+ < td > 66.14</ td >
1488+ < td > 29.45</ td >
1489+ < td > 52.82</ td >
1490+ < td > 71.94</ td >
1491+ < td > 41.16</ td >
1492+ < td > 28.18</ td >
1493+ < td > 15.21</ td >
14481494 </ tr >
14491495 < tr >
14501496 < td > Accent</ td >
14511497 < td > 19.78</ td >
1452- < td > 10.23</ td >
14531498 < td > 36.03</ td >
1499+ < td > 10.23</ td >
14541500 < td > 14.05</ td >
14551501 < td > 27.20</ td >
14561502 < td > 17.80</ td >
14571503 < td > 12.90</ td >
1458- < td > 10.01 </ td >
1504+ < td > 10.31 </ td >
14591505 </ tr >
14601506 < tr >
14611507 < td > Lyrics</ td >
14621508 < td > 46.56</ td >
1463- < td > 30.26</ td >
14641509 < td > 54.82</ td >
1510+ < td > 30.26</ td >
14651511 < td > 42.87</ td >
14661512 < td > 65.18</ td >
14671513 < td > 50.14</ td >
14681514 < td > 30.85</ td >
1469- < td > 21.23 </ td >
1515+ < td > 21.00 </ td >
14701516 </ tr >
14711517 < tr >
14721518 < td > Hiphop</ td >
14731519 < td > 43.32</ td >
1474- < td > 29.46</ td >
14751520 < td > 46.56</ td >
1521+ < td > 29.46</ td >
14761522 < td > 33.88</ td >
14771523 < td > 57.25</ td >
14781524 < td > 43.79</ td >
14791525 < td > 30.87</ td >
1480- < td > 24.86 </ td >
1526+ < td > 28.58 </ td >
14811527 </ tr >
14821528 < tr class ="average-row ">
14831529 < td > Average</ td >
14841530 < td > 26.13</ td >
1485- < td > 15.95</ td >
14861531 < td > 33.39</ td >
1532+ < td > 15.95</ td >
14871533 < td > 22.63</ td >
14881534 < td > 31.00</ td >
14891535 < td > 23.49</ td >
14901536 < td > 16.72</ td >
1491- < td > 12.80 </ td >
1537+ < td > 12.70 </ td >
14921538 </ tr >
1493- </ tbody >
14941539 </ tbody >
14951540 </ table >
14961541 </ div >
0 commit comments