You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
using EpilogueDispatchPolicy = cutlass::epilogue::IntelPVCEpilogue;
125
-
using EpilogueOp = cutlass::epilogue::fusion::LinearCombination<ElementAccumulator, ElementComputeEpilogue, ElementAccumulator, ElementAccumulator, cutlass::FloatRoundStyle::round_to_nearest>;
126
-
using FusionCallBacks = cutlass::epilogue::fusion::FusionCallbacks<EpilogueDispatchPolicy, EpilogueOp, TileShape, decltype(tile_shape(TiledMma()))>;
127
-
using SharedStorage = FusionCallBacks::SharedStorage;
128
-
#endif
129
-
130
122
// Design Scheduler
131
123
using TileScheduler_ = PersistentScheduler;
132
124
static_assert(cute::is_void_v<TileScheduler_> or cute::is_same_v<TileScheduler_, PersistentScheduler>, "Intel PVC does not support specializing the tile scheduler.");
@@ -135,23 +127,6 @@ using TileScheduler = typename cutlass::gemm::kernel::detail::TileSchedulerSelec
135
127
using TileSchedulerArguments = typename TileScheduler::Arguments;
136
128
using TileSchedulerParams = typename TileScheduler::Params;
137
129
138
-
#if0
139
-
// Define Epilogue
140
-
using CollectiveEpilogue = cutlass::epilogue::collective::CollectiveEpilogue<
141
-
EpilogueDispatchPolicy,
142
-
TileShape,
143
-
ElementAccumulator,
144
-
cutlass::gemm::TagToStrideC_t<cutlass::layout::RowMajor>, // Convert CUTLASS 2.x to CUTLASS 3.x representation
145
-
ElementOutput,
146
-
cutlass::gemm::TagToStrideC_t<cutlass::layout::RowMajor>, // Convert CUTLASS 2.x to CUTLASS 3.x representation
147
-
FusionCallBacks,
148
-
XE_2D_U32x4x16_LD_N, // The copy atom used to load matrix C
149
-
void, void,
150
-
XE_2D_U32x4x16_ST_N, // The copy atom used to store matrix D
151
-
void, void>;
152
-
using EpilogueParams = typename CollectiveEpilogue::Params;
153
-
#endif
154
-
155
130
using ClusterShape = typename DispatchPolicy::ClusterShape;
156
131
157
132
// Define Copy
@@ -196,12 +171,6 @@ template <typename T, int BITS>
196
171
classgemm_4bit_cutlass_kernel {
197
172
public:
198
173
// Kernel level shared memory storage
199
-
#if0
200
-
struct SharedStorage {
201
-
using EpilogueTensorStorage = typename CollectiveEpilogue::TensorStorage;
202
-
EpilogueTensorStorage epilogue;
203
-
};
204
-
#endif
205
174
structParams {
206
175
int m, n, k, l;
207
176
T* A;
@@ -413,10 +382,12 @@ class gemm_4bit_cutlass_kernel {
413
382
auto blk_shape = TileShape{};
414
383
int m_coord, n_coord, l_coord;
415
384
if (params.scheduler.raster_order_ == TileScheduler::RasterOrder::AlongN) {
385
+
if(cute::thread0()) printf("log1 ....\n");
416
386
m_coord = BlockIdxY();
417
387
n_coord = BlockIdxX();
418
388
l_coord = BlockIdxZ();
419
389
} else {
390
+
if(cute::thread0()) printf("log2 ....\n");
420
391
m_coord = BlockIdxX();
421
392
n_coord = BlockIdxY();
422
393
l_coord = BlockIdxZ();
@@ -570,20 +541,6 @@ class gemm_4bit_cutlass_kernel {
0 commit comments