Skip to content

Commit acd885a

Browse files
authored
fix safety problems (#437)
1 parent 4186f6c commit acd885a

7 files changed

Lines changed: 30 additions & 21 deletions

File tree

csrc/deepep/ops/op_host/dispatch_ffn_combine_tiling.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,6 @@ constexpr uint64_t MB_SIZE = 1024 * 1024UL;
5151

5252
namespace optiling {
5353

54-
static int32_t CeilDev(int32_t num, int32_t div)
55-
{
56-
if (div == 0) {
57-
return 0;
58-
}
59-
return (num + div - 1) / div;
60-
}
61-
6254
static uint64_t GetMaxWindowSize()
6355
{
6456
uint16_t defaultWindowSize = 200;

csrc/deepep/ops/op_host/moe_distribute_dispatch_v2_tiling.cpp

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,6 @@ static ge::graphStatus CheckAndSetGroupInfo(const gert::TilingContext *context,
446446
auto tpWorldSizePtr = attrs->GetAttrPointer<int64_t>(ATTR_TP_WORLD_SIZE_INDEX);
447447
auto epRankIdPtr = attrs->GetAttrPointer<int64_t>(ATTR_EP_RANK_ID_INDEX);
448448
auto tpRankIdPtr = attrs->GetAttrPointer<int64_t>(ATTR_TP_RANK_ID_INDEX);
449-
int64_t epWorldSize = *epWorldSizePtr;
450449

451450
// 判空
452451
OP_TILING_CHECK((groupEpPtr == nullptr) || (strnlen(groupEpPtr, MAX_GROUP_NAME_LENGTH) == 0) ||
@@ -457,6 +456,7 @@ static ge::graphStatus CheckAndSetGroupInfo(const gert::TilingContext *context,
457456
OP_TILING_CHECK(tpWorldSizePtr == nullptr, OP_LOGE(nodeName, "tpWorldSizePtr is null."), return ge::GRAPH_FAILED);
458457
OP_TILING_CHECK(epRankIdPtr == nullptr, OP_LOGE(nodeName, "epRankIdPtr is null."), return ge::GRAPH_FAILED);
459458
OP_TILING_CHECK(tpRankIdPtr == nullptr, OP_LOGE(nodeName, "tpRankIdPtr is null."), return ge::GRAPH_FAILED);
459+
int64_t epWorldSize = *epWorldSizePtr;
460460

461461
// 判断是否有效
462462
OP_TILING_CHECK((epWorldSize < MIN_EP_WORLD_SIZE) || (epWorldSize > MAX_EP_WORLD_SIZE),
@@ -504,10 +504,8 @@ static ge::graphStatus CheckAndSetExpertInfo(const gert::TilingContext *context,
504504
auto moeExpertNumPtr = attrs->GetAttrPointer<int64_t>(ATTR_MOE_EXPERT_NUM_INDEX);
505505
auto quantModePtr = attrs->GetAttrPointer<int64_t>(ATTR_QUANT_MODE_INDEX);
506506
auto expertTokenNumsTypePtr = attrs->GetAttrPointer<int64_t>(static_cast<int>(ATTR_EXPERT_TOKEN_NUMS_TYPE_INDEX));
507-
int64_t moeExpertNum = *moeExpertNumPtr;
508-
int64_t epWorldSize = *epWorldSizePtr;
509-
int64_t sharedExpertRankNum = *sharedExpertRankNumPtr;
510507

508+
OP_TILING_CHECK(epWorldSizePtr == nullptr, OP_LOGE(nodeName, "epWorldSizePtr is null."), return ge::GRAPH_FAILED);
511509
OP_TILING_CHECK(expertShardPtr == nullptr, OP_LOGE(nodeName, "expertShardPtr is null."), return ge::GRAPH_FAILED);
512510
OP_TILING_CHECK(sharedExpertNumPtr == nullptr, OP_LOGE(nodeName, "sharedExpertNumPtr is null."),
513511
return ge::GRAPH_FAILED);
@@ -517,6 +515,9 @@ static ge::graphStatus CheckAndSetExpertInfo(const gert::TilingContext *context,
517515
OP_TILING_CHECK(quantModePtr == nullptr, OP_LOGE(nodeName, "quantModePtr is null."), return ge::GRAPH_FAILED);
518516
OP_TILING_CHECK(expertTokenNumsTypePtr == nullptr, OP_LOGE(nodeName, "expertTokenNumsTypePtr is null."),
519517
return ge::GRAPH_FAILED);
518+
int64_t sharedExpertRankNum = *sharedExpertRankNumPtr;
519+
int64_t moeExpertNum = *moeExpertNumPtr;
520+
int64_t epWorldSize = *epWorldSizePtr;
520521

521522
OP_TILING_CHECK(
522523
*expertShardPtr != 0,
@@ -568,11 +569,7 @@ static ge::graphStatus CheckAndSetSpecialExpertInfo(const gert::TilingContext *c
568569
auto zeroExpertNumPtr = attrs->GetAttrPointer<int64_t>(static_cast<int>(ATTR_ZERO_EXPERT_NUM_INDEX));
569570
auto copyExpertNumPtr = attrs->GetAttrPointer<int64_t>(static_cast<int>(ATTR_COPY_EXPERT_NUM_INDEX));
570571
auto constExpertNumPtr = attrs->GetAttrPointer<int64_t>(static_cast<int>(ATTR_CONST_EXPERT_NUM_INDEX));
571-
int64_t moeExpertNum = *(attrs->GetAttrPointer<int64_t>(ATTR_MOE_EXPERT_NUM_INDEX));
572-
int64_t zeroExpertNum = *zeroExpertNumPtr;
573-
int64_t copyExpertNum = *copyExpertNumPtr;
574-
int64_t constExpertNum = *constExpertNumPtr;
575-
int64_t zeroComputeExpertNum = zeroExpertNum + copyExpertNum + constExpertNum;
572+
auto moeExpertNumPtr = attrs->GetAttrPointer<int64_t>(ATTR_MOE_EXPERT_NUM_INDEX);
576573

577574
// 判空
578575
OP_TILING_CHECK(zeroExpertNumPtr == nullptr, OP_LOGE(nodeName, "zeroExpertNumPtr is null."),
@@ -581,7 +578,13 @@ static ge::graphStatus CheckAndSetSpecialExpertInfo(const gert::TilingContext *c
581578
return ge::GRAPH_FAILED);
582579
OP_TILING_CHECK(constExpertNumPtr == nullptr, OP_LOGE(nodeName, "constExpertNumPtr is null."),
583580
return ge::GRAPH_FAILED);
581+
OP_TILING_CHECK(moeExpertNumPtr == nullptr, OP_LOGE(nodeName, "moeExpertNumPtr is null."), return ge::GRAPH_FAILED);
584582
OP_TILING_CHECK(commAlgPtr == nullptr, OP_LOGE(nodeName, "commAlgPtr is nullptr."), return ge::GRAPH_FAILED);
583+
int64_t zeroExpertNum = *zeroExpertNumPtr;
584+
int64_t copyExpertNum = *copyExpertNumPtr;
585+
int64_t constExpertNum = *constExpertNumPtr;
586+
int64_t moeExpertNum = *moeExpertNumPtr;
587+
int64_t zeroComputeExpertNum = zeroExpertNum + copyExpertNum + constExpertNum;
585588

586589
// 判断是否有效
587590
OP_TILING_CHECK((zeroExpertNum < 0),

csrc/deepep/ops/op_kernel/check_winsize.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ __aicore__ inline void CheckWindowSize(uint64_t tilingWinSizeBytes, uint64_t rea
1515
AscendC::TBuf<AscendC::TPosition::VECCALC> exceptionBuf;
1616
tpipe_->InitBuffer(exceptionBuf, 1); // 初始化一个缓冲区
1717
AscendC::LocalTensor<int32_t> exceptionLocal = exceptionBuf.Get<int32_t>();
18-
AscendC::DataCopy(exceptionLocal[1], exceptionGlobal, 1); // 从全局地址复制数据到本地地址
18+
AscendC::DataCopy(exceptionLocal[0], exceptionGlobal, 1); // 从全局地址复制数据到本地地址
1919
}
2020
}
2121
#endif // CHECK_WINSIZE_H

csrc/deepep/ops/op_kernel/dispatch_ffn_combine_kernel/moe_init_routing_quant_v2/moe_init_routing_v2_tiling.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#pragma once
22

3+
#include <type_traits>
4+
#include <limits>
35
#include "tiling_base.h"
46

57
namespace optiling {
@@ -51,8 +53,12 @@ inline static int64_t GetPerOrLastValue(int64_t x, int64_t y)
5153
}
5254

5355
template <class T>
54-
constexpr T CeilDiv(const T dividend, const T divisor)
56+
static T CeilDiv(const T dividend, const T divisor)
5557
{
58+
static_assert(std::is_arithmetic<T>::value, "T must be an arithmetic type");
59+
if (divisor == 0 || dividend + divisor - 1 < dividend) {
60+
return std::numeric_limits<T>::max();
61+
}
5662
return (dividend + divisor - 1) / divisor;
5763
}
5864

csrc/deepep/ops/utils/op_kernel/operator/gemm/kernel/grouped_matmul_slice_m_per_token_dequant_swiglu_quant_multistage_workspace.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ constexpr int32_t BUFFER_NUM = 2;
3838
constexpr int32_t GATHER_SECOND_NUM = 2;
3939
constexpr uint32_t MAX_QUANT_ROW_ONCE = 8;
4040
constexpr uint32_t QUANT_SPACE_FACTOR = 176 * 1024 / 11; // 量化使用UB不超过176KB
41+
constexpr uint64_t CYCLE_TO_TIME = 50;
42+
constexpr uint64_t TIMEOUT_DETECTION_THRESHOLD = 5000000000UL;
4143
#define OPT_RANK_OFFSET 512
4244

4345
#define CEIL_UP(x) ((x + UB_ALIGN - 1) / UB_ALIGN * UB_ALIGN)
@@ -334,6 +336,9 @@ __aicore__ inline static void CheckSyncFlag(__gm__ uint8_t *flagAddr, uint8_t id
334336
AscendC::PipeBarrier<PIPE_ALL>();
335337
AscendC::GlobalTensor<uint8_t> global;
336338
global.SetGlobalBuffer(flagAddr + idx * SOFT_SYNC_SPACE_SIZE);
339+
340+
uint64_t timeoutCheckStart = static_cast<uint64_t>(AscendC::GetSystemCycle());
341+
uint64_t timeoutCheckEnd, timeoutCheckDuration;
337342
while (true) {
338343
__asm__ __volatile__("");
339344
AscendC::DataCacheCleanAndInvalid<uint8_t, AscendC::CacheLine::SINGLE_CACHE_LINE,
@@ -347,6 +352,9 @@ __aicore__ inline static void CheckSyncFlag(__gm__ uint8_t *flagAddr, uint8_t id
347352
__asm__ __volatile__("");
348353
break;
349354
}
355+
timeoutCheckEnd = static_cast<uint64_t>(AscendC::GetSystemCycle());
356+
timeoutCheckDuration = (timeoutCheckEnd - timeoutCheckStart) / CYCLE_TO_TIME;
357+
assert(timeoutCheckDuration < TIMEOUT_DETECTION_THRESHOLD);
350358
}
351359
AscendC::PipeBarrier<PIPE_ALL>();
352360
}

csrc/deepep/ops2/op_host/moe_distribute_combine_v2_tiling.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1428,7 +1428,7 @@ static ge::graphStatus MoeDistributeCombineV2TilingFunc(gert::TilingContext *con
14281428
if (socVersion == "Ascend910B") {
14291429
ret = MoeDistributeCombineA2TilingFuncImpl(context);
14301430
} else {
1431-
// ret = MoeDistributeCombineA3TilingFuncImpl(context);
1431+
OP_LOGE(nodeName, "socVersion is not support");
14321432
}
14331433

14341434
return ret;

csrc/deepep/ops2/op_kernel/check_winsize.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ __aicore__ inline void CheckWindowSize(uint64_t tilingWinSizeBytes, uint64_t rea
1515
AscendC::TBuf<AscendC::TPosition::VECCALC> exceptionBuf;
1616
tpipe_->InitBuffer(exceptionBuf, 1); // 初始化一个缓冲区
1717
AscendC::LocalTensor<int32_t> exceptionLocal = exceptionBuf.Get<int32_t>();
18-
AscendC::DataCopy(exceptionLocal[1], exceptionGlobal, 1); // 从全局地址复制数据到本地地址
18+
AscendC::DataCopy(exceptionLocal[0], exceptionGlobal, 1); // 从全局地址复制数据到本地地址
1919
}
2020
}
2121
#endif // CHECK_WINSIZE_H

0 commit comments

Comments
 (0)