Skip to content

Commit dd39eab

Browse files
committed
cleanup code
Signed-off-by: Ceng23333 <441651826@qq.com>
1 parent ccb72b9 commit dd39eab

File tree

11 files changed

+21
-270
lines changed

11 files changed

+21
-270
lines changed

INFLLMV2_FLASHATTN_LOCAL_WINDOW_PATCH_NOTES.md

Lines changed: 0 additions & 67 deletions
This file was deleted.

include/infinicore/ops.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include "ops/flash_attention.hpp"
2020
#include "ops/fmin.hpp"
2121
#include "ops/fmod.hpp"
22-
#include "ops/gla_attention.hpp"
2322
#include "ops/simple_gla_attention.hpp"
2423
#include "ops/simple_gla_decode_step.hpp"
2524
#include "ops/simple_gla_recurrent_state_append.hpp"

include/infinicore/ops/gla_attention.hpp

Lines changed: 0 additions & 23 deletions
This file was deleted.

python/infinicore/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@
7575
from infinicore.ops.floor_divide import floor_divide
7676
from infinicore.ops.fmin import fmin
7777
from infinicore.ops.fmod import fmod
78-
from infinicore.ops.gla_attention import gla_attention
7978
from infinicore.ops.hypot import hypot
8079
from infinicore.ops.index_add import index_add
8180
from infinicore.ops.index_copy import index_copy
@@ -186,7 +185,6 @@
186185
"addbmm",
187186
"floor",
188187
"attention",
189-
"gla_attention",
190188
"infllmv2_varlen",
191189
"infllmv2_kvcache",
192190
"simple_gla_attention",

python/infinicore/ops/gla_attention.py

Lines changed: 0 additions & 24 deletions
This file was deleted.

src/infinicore/ops/gla_attention/gla_attention.cc

Lines changed: 0 additions & 61 deletions
This file was deleted.

src/infinicore/pybind11/ops.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
#include "ops/floor_divide.hpp"
3737
#include "ops/fmin.hpp"
3838
#include "ops/fmod.hpp"
39-
#include "ops/gla_attention.hpp"
4039
#include "ops/hardswish.hpp"
4140
#include "ops/hardtanh.hpp"
4241
#include "ops/huber_loss.hpp"
@@ -119,7 +118,6 @@ inline void bind(py::module &m) {
119118
bind_bilinear(m);
120119
bind_causal_softmax(m);
121120
bind_flash_attention(m);
122-
bind_gla_attention(m);
123121
bind_infllmv2_attention(m);
124122
bind_simple_gla_attention(m);
125123
bind_simple_gla_decode_step(m);

src/infinicore/pybind11/ops/gla_attention.hpp

Lines changed: 0 additions & 32 deletions
This file was deleted.

src/infiniop/ops/embedding/nvidia/embedding_nvidia.cu

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
#include "../../../tensor.h"
55
#include "../cuda/embedding_kernel.cuh"
66
#include "embedding_nvidia.cuh"
7-
#include <cstdio>
8-
#include <cstdlib>
97
#include <cuda_runtime.h>
108

119
template <typename T, typename IndexType>
@@ -59,14 +57,6 @@ INFINIOP_CUDA_KERNEL embeddingKernel(
5957
// Fallback to scalar copy with __ldg
6058
copyScalar<T, IndexType>(dst, src, embedding_dim);
6159
}
62-
} else {
63-
// Important for correctness debugging:
64-
// If indices are out of range, write zeros instead of leaving output
65-
// uninitialized (which can manifest as NaNs in later layers).
66-
T *dst = output + idx * embedding_dim;
67-
for (size_t i = 0; i < embedding_dim; ++i) {
68-
dst[i] = T(0);
69-
}
7060
}
7161
}
7262
}
@@ -225,22 +215,9 @@ infiniStatus_t Descriptor::calculate(
225215
// Check for kernel launch errors
226216
cudaError_t err = cudaGetLastError();
227217
if (err != cudaSuccess) {
228-
std::fprintf(stderr, "infiniopEmbedding launch failed: %s\n", cudaGetErrorString(err));
229-
std::fflush(stderr);
230218
return INFINI_STATUS_INTERNAL_ERROR;
231219
}
232220

233-
// Optional debug sync to surface runtime errors (misaligned, illegal access, etc.)
234-
const char *debug_env = std::getenv("INFINIOP_DEBUG_EMBEDDING");
235-
if (debug_env != nullptr && debug_env[0] != '\0' && debug_env[0] != '0') {
236-
err = cudaStreamSynchronize(cuda_stream);
237-
if (err != cudaSuccess) {
238-
std::fprintf(stderr, "infiniopEmbedding stream sync failed: %s\n", cudaGetErrorString(err));
239-
std::fflush(stderr);
240-
return INFINI_STATUS_INTERNAL_ERROR;
241-
}
242-
}
243-
244221
return INFINI_STATUS_SUCCESS;
245222
}
246223

xmake.lua

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ option("cutlass")
8080
set_description("Whether to compile cutlass for Nvidia GPU")
8181
option_end()
8282

83-
if has_config("cutlass") then
83+
if has_config("cutlass") then
8484
add_defines("ENABLE_CUTLASS_API")
8585
end
8686

@@ -512,11 +512,11 @@ target("infinicore_cpp_api")
512512
local TORCH_DIR = outdata
513513

514514
target:add(
515-
"includedirs",
516-
path.join(TORCH_DIR, "include"),
515+
"includedirs",
516+
path.join(TORCH_DIR, "include"),
517517
path.join(TORCH_DIR, "include/torch/csrc/api/include"),
518518
{ public = true })
519-
519+
520520
target:add(
521521
"linkdirs",
522522
path.join(TORCH_DIR, "lib"),
@@ -612,7 +612,7 @@ target("_infinicore")
612612
add_defines("BOOST_STACKTRACE_USE_NOOP")
613613
end
614614

615-
set_default(false)
615+
set_default(true)
616616
add_rules("python.library", {soabi = true})
617617
add_packages("pybind11")
618618
set_languages("cxx17")

0 commit comments

Comments
 (0)