We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 709d447 commit e8c51e4Copy full SHA for e8c51e4
1 file changed
infini_train/src/kernels/cuda/comm.cu
@@ -36,7 +36,7 @@ std::vector<std::shared_ptr<Tensor>> ReduceAddCoalesced(const std::vector<std::v
36
to_destination_grads[i].push_back(std::make_shared<Tensor>(grads[i][j]->To(destination)));
37
}
38
39
- // NOTE(zbl): To ensure Profiler works correctly, there should not be any other kernel calls
+ // NOTE(zbl): To ensure Profiler works correctly, there should not be any other kernel calls
40
// between GetKernel and kernel.Call, otherwise ProfileContext would be tainted
41
auto kernel = Dispatcher::Instance().GetKernel({destination.type(), "AccumulateGrad"});
42
for (int i = 0; i < grads.size(); ++i) {
0 commit comments