Skip to content

Commit 97da993

Browse files
Merge pull request #934 from InfiniTensor/issue/811
issue/811 remove shortcut for cpu runtime
2 parents 180674d + 4a4b6e8 commit 97da993

File tree

5 files changed

+35
-28
lines changed

5 files changed

+35
-28
lines changed

src/infinicore/context/context_impl.cc

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,18 +29,16 @@ Runtime *ContextImpl::getCurrentRuntime() {
2929
return current_runtime_;
3030
}
3131

32-
Runtime *ContextImpl::getCpuRuntime() {
33-
return runtime_table_[int(Device::Type::CPU)][0].get();
34-
}
35-
3632
void ContextImpl::setDevice(Device device) {
3733
if (device == getCurrentRuntime()->device()) {
3834
// Do nothing if the device is already set.
3935
return;
4036
}
4137

42-
if (getCurrentRuntime()->isGraphRecording()) {
38+
thread_local bool warn_switch_runtime = false;
39+
if (getCurrentRuntime()->isGraphRecording() && !warn_switch_runtime) {
4340
spdlog::warn("Switching device runtime during graph recording may break the graph!");
41+
warn_switch_runtime = true;
4442
}
4543

4644
if (runtime_table_[int(device.getType())][device.getIndex()] == nullptr) {
@@ -104,11 +102,8 @@ infinirtStream_t getStream() {
104102
}
105103

106104
infiniopHandle_t getInfiniopHandle(Device device) {
107-
if (device.getType() == Device::Type::CPU) {
108-
return ContextImpl::singleton().getCpuRuntime()->infiniopHandle();
109-
}
110105
if (device != getDevice()) {
111-
throw std::runtime_error("Requested device doesn't match current runtime.");
106+
setDevice(device);
112107
}
113108
return ContextImpl::singleton().getCurrentRuntime()->infiniopHandle();
114109
}
@@ -127,7 +122,7 @@ std::shared_ptr<Memory> allocateMemory(size_t size) {
127122

128123
std::shared_ptr<Memory> allocateHostMemory(size_t size) {
129124
setDevice(Device::cpu());
130-
return ContextImpl::singleton().getCpuRuntime()->allocateMemory(size);
125+
return allocateMemory(size);
131126
}
132127

133128
std::shared_ptr<Memory> allocatePinnedHostMemory(size_t size) {
@@ -147,7 +142,8 @@ void memcpyD2D(void *dst, const void *src, size_t size, bool async) {
147142
}
148143

149144
void memcpyH2H(void *dst, const void *src, size_t size) {
150-
return ContextImpl::singleton().getCpuRuntime()->memcpyD2D(dst, src, size);
145+
setDevice(Device::cpu());
146+
return ContextImpl::singleton().getCurrentRuntime()->memcpyD2D(dst, src, size);
151147
}
152148

153149
// Timing API implementations

src/infinicore/context/context_impl.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@ class ContextImpl {
1919
public:
2020
Runtime *getCurrentRuntime();
2121

22-
Runtime *getCpuRuntime();
23-
2422
void setDevice(Device);
2523

2624
size_t getDeviceCount(Device::Type type);

src/infinicore/tensor/copy.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ Tensor TensorImpl::to(Device device) const {
1919

2020
void TensorImpl::copy_from(Tensor src) {
2121
if (src->shape() != this->shape()) {
22-
throw std::runtime_error("Cannot copy from tensor with different shape");
22+
throw std::runtime_error(
23+
"Cannot copy from tensor with different shape. Src: " + src->info() + " Dst: " + this->info());
2324
}
2425
if (this->device() == src->device()) {
2526
op::rearrange_(Tensor(const_cast<TensorImpl *>(this)->shared_from_this()), src);
@@ -31,11 +32,12 @@ void TensorImpl::copy_from(Tensor src) {
3132
// Use nbytes() to get the actual tensor size, not the full memory size
3233
size_t copy_size = std::min(this->nbytes(), src->nbytes());
3334
if (this->device().getType() == Device::Type::CPU) {
34-
context::setDevice(src->device());
3535
if (this->is_contiguous()) {
36+
context::setDevice(src->device());
3637
context::memcpyD2H(this->data(), src->data(), copy_size);
3738
} else {
3839
auto local_src = Tensor::empty(this->shape(), this->dtype(), this->device());
40+
context::setDevice(src->device());
3941
context::memcpyD2H(local_src->data(), src->data(), this->data_.memory->size());
4042
op::rearrange_(Tensor(const_cast<TensorImpl *>(this)->shared_from_this()), local_src);
4143
}

src/infinicore/utils.hpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,17 @@ inline struct SpdlogInitializer {
2323
#define STRINGIZE_(x) #x
2424
#define STRINGIZE(x) STRINGIZE_(x)
2525

26-
#define INFINICORE_CHECK_ERROR(call) \
27-
do { \
28-
SPDLOG_DEBUG("Entering `" #call "` at `" __FILE__ ":" STRINGIZE(__LINE__) "`."); \
29-
infiniStatus_t ret = (call); \
30-
SPDLOG_DEBUG("Exiting `" #call "` at `" __FILE__ ":" STRINGIZE(__LINE__) "`."); \
31-
if (ret != INFINI_STATUS_SUCCESS) { \
32-
throw std::runtime_error(#call " failed with error: " + std::string(infini_status_string(ret))); \
33-
} \
26+
#define INFINICORE_CHECK_ERROR(call) \
27+
do { \
28+
SPDLOG_DEBUG("Entering `" #call "` at `" __FILE__ ":" STRINGIZE(__LINE__) "`."); \
29+
infiniStatus_t ret = (call); \
30+
SPDLOG_DEBUG("Exiting `" #call "` at `" __FILE__ ":" STRINGIZE(__LINE__) "`."); \
31+
if (ret != INFINI_STATUS_SUCCESS) { \
32+
throw std::runtime_error("`" #call "` failed with error: " + std::string(infini_status_string(ret)) \
33+
+ " from " + std::string(__func__) \
34+
+ " at " + std::string(__FILE__) \
35+
+ ":" + std::to_string(__LINE__) + "."); \
36+
} \
3437
} while (false)
3538

3639
#define INFINICORE_ASSERT_TENSORS_SAME_DEVICE(FIRST___, ...) \

src/infinirt/cuda/infinirt_cuda.cu

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@
44

55
#define CHECK_CUDART(RT_API) CHECK_INTERNAL(RT_API, cudaSuccess)
66

7+
#define RUN_CUDART(RT_API) \
8+
do { \
9+
auto api_result_ = (RT_API); \
10+
if (api_result_ != (cudaSuccess)) { \
11+
{ return INFINI_STATUS_INTERNAL_ERROR; } \
12+
} \
13+
} while (0)
14+
715
// 根据宏定义选择命名空间并实现
816
#if defined(ENABLE_NVIDIA_API)
917
namespace infinirt::cuda {
@@ -40,7 +48,7 @@ infiniStatus_t streamCreate(infinirtStream_t *stream_ptr) {
4048
}
4149

4250
infiniStatus_t streamDestroy(infinirtStream_t stream) {
43-
CHECK_CUDART(cudaStreamDestroy((cudaStream_t)stream));
51+
RUN_CUDART(cudaStreamDestroy((cudaStream_t)stream));
4452
return INFINI_STATUS_SUCCESS;
4553
}
4654

@@ -105,7 +113,7 @@ infiniStatus_t eventSynchronize(infinirtEvent_t event) {
105113
}
106114

107115
infiniStatus_t eventDestroy(infinirtEvent_t event) {
108-
CHECK_CUDART(cudaEventDestroy((cudaEvent_t)event));
116+
RUN_CUDART(cudaEventDestroy((cudaEvent_t)event));
109117
return INFINI_STATUS_SUCCESS;
110118
}
111119

@@ -125,12 +133,12 @@ infiniStatus_t mallocHost(void **p_ptr, size_t size) {
125133
}
126134

127135
infiniStatus_t freeDevice(void *ptr) {
128-
CHECK_CUDART(cudaFree(ptr));
136+
RUN_CUDART(cudaFree(ptr));
129137
return INFINI_STATUS_SUCCESS;
130138
}
131139

132140
infiniStatus_t freeHost(void *ptr) {
133-
CHECK_CUDART(cudaFreeHost(ptr));
141+
RUN_CUDART(cudaFreeHost(ptr));
134142
return INFINI_STATUS_SUCCESS;
135143
}
136144

@@ -165,7 +173,7 @@ infiniStatus_t mallocAsync(void **p_ptr, size_t size, infinirtStream_t stream) {
165173
}
166174

167175
infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) {
168-
CHECK_CUDART(cudaFreeAsync(ptr, (cudaStream_t)stream));
176+
RUN_CUDART(cudaFreeAsync(ptr, (cudaStream_t)stream));
169177
return INFINI_STATUS_SUCCESS;
170178
}
171179
}

0 commit comments

Comments
 (0)