Skip to content

Commit a86e5b4

Browse files
Merge pull request #1122 from InfiniTensor/issue/1031_T1-1-41
【比赛2025秋】T1-1-41
2 parents d6ab557 + 2647cf9 commit a86e5b4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+4735
-0
lines changed

include/infiniop.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@
3636
#include "infiniop/ops/dot.h"
3737
#include "infiniop/ops/embedding.h"
3838
#include "infiniop/ops/equal.h"
39+
#include "infiniop/ops/erf.h"
40+
#include "infiniop/ops/erfc.h"
41+
#include "infiniop/ops/erfinv.h"
3942
#include "infiniop/ops/flash_attention.h"
4043
#include "infiniop/ops/flipud.h"
4144
#include "infiniop/ops/float_power.h"
@@ -70,13 +73,15 @@
7073
#include "infiniop/ops/logdet.h"
7174
#include "infiniop/ops/lp_norm.h"
7275
#include "infiniop/ops/masked_select.h"
76+
#include "infiniop/ops/matrix_power.h"
7377
#include "infiniop/ops/mul.h"
7478
#include "infiniop/ops/multi_margin_loss.h"
7579
#include "infiniop/ops/ones.h"
7680
#include "infiniop/ops/pad.h"
7781
#include "infiniop/ops/paged_attention.h"
7882
#include "infiniop/ops/paged_attention_prefill.h"
7983
#include "infiniop/ops/paged_caching.h"
84+
#include "infiniop/ops/pixel_shuffle.h"
8085
#include "infiniop/ops/quant/per_channel_quant_int8.h"
8186
#include "infiniop/ops/quant/per_tensor_quant_int8.h"
8287
#include "infiniop/ops/random_sample.h"

include/infiniop/ops/erf.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#ifndef __INFINIOP_ERF_API_H__
2+
#define __INFINIOP_ERF_API_H__
3+
4+
#include "../operator_descriptor.h"
5+
6+
typedef struct InfiniopDescriptor *infiniopErfDescriptor_t;
7+
8+
__INFINI_C __export infiniStatus_t infiniopCreateErfDescriptor(infiniopHandle_t handle,
9+
infiniopErfDescriptor_t *desc_ptr,
10+
infiniopTensorDescriptor_t y,
11+
infiniopTensorDescriptor_t x);
12+
13+
__INFINI_C __export infiniStatus_t infiniopGetErfWorkspaceSize(infiniopErfDescriptor_t desc, size_t *size);
14+
15+
__INFINI_C __export infiniStatus_t infiniopErf(infiniopErfDescriptor_t desc,
16+
void *workspace,
17+
size_t workspace_size,
18+
void *y,
19+
const void *x,
20+
void *stream);
21+
22+
__INFINI_C __export infiniStatus_t infiniopDestroyErfDescriptor(infiniopErfDescriptor_t desc);
23+
24+
#endif

include/infiniop/ops/erfc.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#ifndef __INFINIOP_ERFC_API_H__
2+
#define __INFINIOP_ERFC_API_H__
3+
4+
#include "../operator_descriptor.h"
5+
6+
typedef struct InfiniopDescriptor *infiniopErfcDescriptor_t;
7+
8+
__INFINI_C __export infiniStatus_t infiniopCreateErfcDescriptor(infiniopHandle_t handle,
9+
infiniopErfcDescriptor_t *desc_ptr,
10+
infiniopTensorDescriptor_t y,
11+
infiniopTensorDescriptor_t x);
12+
13+
__INFINI_C __export infiniStatus_t infiniopGetErfcWorkspaceSize(infiniopErfcDescriptor_t desc, size_t *size);
14+
15+
__INFINI_C __export infiniStatus_t infiniopErfc(infiniopErfcDescriptor_t desc,
16+
void *workspace,
17+
size_t workspace_size,
18+
void *y,
19+
const void *x,
20+
void *stream);
21+
22+
__INFINI_C __export infiniStatus_t infiniopDestroyErfcDescriptor(infiniopErfcDescriptor_t desc);
23+
24+
#endif

include/infiniop/ops/erfinv.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#ifndef __INFINIOP_ERFINV_API_H__
2+
#define __INFINIOP_ERFINV_API_H__
3+
4+
#include "../operator_descriptor.h"
5+
6+
typedef struct InfiniopDescriptor *infiniopErfinvDescriptor_t;
7+
8+
__INFINI_C __export infiniStatus_t infiniopCreateErfinvDescriptor(infiniopHandle_t handle,
9+
infiniopErfinvDescriptor_t *desc_ptr,
10+
infiniopTensorDescriptor_t y,
11+
infiniopTensorDescriptor_t x);
12+
13+
__INFINI_C __export infiniStatus_t infiniopGetErfinvWorkspaceSize(infiniopErfinvDescriptor_t desc, size_t *size);
14+
15+
__INFINI_C __export infiniStatus_t infiniopErfinv(infiniopErfinvDescriptor_t desc,
16+
void *workspace,
17+
size_t workspace_size,
18+
void *y,
19+
const void *x,
20+
void *stream);
21+
22+
__INFINI_C __export infiniStatus_t infiniopDestroyErfinvDescriptor(infiniopErfinvDescriptor_t desc);
23+
24+
#endif
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#ifndef __INFINIOP_MATRIX_POWER_API_H__
2+
#define __INFINIOP_MATRIX_POWER_API_H__
3+
4+
#include "../operator_descriptor.h"
5+
6+
typedef struct InfiniopDescriptor *infiniopMatrixPowerDescriptor_t;
7+
8+
__INFINI_C __export infiniStatus_t infiniopCreateMatrixPowerDescriptor(infiniopHandle_t handle,
9+
infiniopMatrixPowerDescriptor_t *desc_ptr,
10+
infiniopTensorDescriptor_t y,
11+
infiniopTensorDescriptor_t x,
12+
int n);
13+
14+
__INFINI_C __export infiniStatus_t infiniopGetMatrixPowerWorkspaceSize(infiniopMatrixPowerDescriptor_t desc, size_t *size);
15+
16+
__INFINI_C __export infiniStatus_t infiniopMatrixPower(infiniopMatrixPowerDescriptor_t desc,
17+
void *workspace,
18+
size_t workspace_size,
19+
void *y,
20+
const void *x,
21+
void *stream);
22+
23+
__INFINI_C __export infiniStatus_t infiniopDestroyMatrixPowerDescriptor(infiniopMatrixPowerDescriptor_t desc);
24+
25+
#endif
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#ifndef __INFINIOP_PIXEL_SHUFFLE_API_H__
2+
#define __INFINIOP_PIXEL_SHUFFLE_API_H__
3+
4+
#include "../operator_descriptor.h"
5+
6+
typedef struct InfiniopDescriptor *infiniopPixelShuffleDescriptor_t;
7+
8+
__INFINI_C __export infiniStatus_t infiniopCreatePixelShuffleDescriptor(infiniopHandle_t handle,
9+
infiniopPixelShuffleDescriptor_t *desc_ptr,
10+
infiniopTensorDescriptor_t y,
11+
infiniopTensorDescriptor_t x,
12+
int upscale_factor);
13+
14+
__INFINI_C __export infiniStatus_t infiniopGetPixelShuffleWorkspaceSize(infiniopPixelShuffleDescriptor_t desc, size_t *size);
15+
16+
__INFINI_C __export infiniStatus_t infiniopPixelShuffle(infiniopPixelShuffleDescriptor_t desc,
17+
void *workspace,
18+
size_t workspace_size,
19+
void *y,
20+
const void *x,
21+
void *stream);
22+
23+
__INFINI_C __export infiniStatus_t infiniopDestroyPixelShuffleDescriptor(infiniopPixelShuffleDescriptor_t desc);
24+
25+
#endif
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#include "erf_cpu.h"
2+
3+
namespace op::erf::cpu {
4+
5+
Descriptor::~Descriptor() = default;
6+
7+
infiniStatus_t Descriptor::create(
8+
infiniopHandle_t handle_,
9+
Descriptor **desc_ptr,
10+
infiniopTensorDescriptor_t out_desc,
11+
std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
12+
13+
auto handle = reinterpret_cast<device::cpu::Handle *>(handle_);
14+
auto dtype = out_desc->dtype();
15+
16+
const auto &input_desc = input_desc_vec.at(0);
17+
const auto &output_shape = out_desc->shape();
18+
const auto &input_shape = input_desc->shape();
19+
20+
CHECK_DTYPE(dtype, INFINI_DTYPE_BF16, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_F64);
21+
22+
CHECK_SAME_SHAPE(output_shape, input_shape);
23+
24+
CREATE_ELEMENTWISE_CPU_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec);
25+
26+
return INFINI_STATUS_SUCCESS;
27+
}
28+
29+
infiniStatus_t Descriptor::calculate(
30+
void *workspace,
31+
size_t workspace_size,
32+
void *output,
33+
std::vector<const void *> inputs,
34+
void *stream) const {
35+
36+
switch (_dtype) {
37+
case INFINI_DTYPE_BF16:
38+
return _device_info->calculate<ErfOp, bf16_t>(_info, output, inputs, stream);
39+
case INFINI_DTYPE_F16:
40+
return _device_info->calculate<ErfOp, fp16_t>(_info, output, inputs, stream);
41+
case INFINI_DTYPE_F32:
42+
return _device_info->calculate<ErfOp, float>(_info, output, inputs, stream);
43+
case INFINI_DTYPE_F64:
44+
return _device_info->calculate<ErfOp, double>(_info, output, inputs, stream);
45+
default:
46+
return INFINI_STATUS_BAD_TENSOR_DTYPE;
47+
}
48+
49+
return INFINI_STATUS_SUCCESS;
50+
}
51+
52+
} // namespace op::erf::cpu

src/infiniop/ops/erf/cpu/erf_cpu.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#ifndef __ERF_CPU_H__
2+
#define __ERF_CPU_H__
3+
4+
#include "../../../elementwise/cpu/elementwise_cpu.h"
5+
#include <cmath>
6+
7+
ELEMENTWISE_DESCRIPTOR(erf, cpu)
8+
9+
namespace op::erf::cpu {
10+
typedef struct ErfOp {
11+
public:
12+
static constexpr size_t num_inputs = 1;
13+
template <typename T>
14+
T operator()(const T &x) const {
15+
return std::erf(x);
16+
}
17+
} ErfOp;
18+
} // namespace op::erf::cpu
19+
20+
#endif // __ERF_CPU_H__
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#pragma once
2+
#include <cmath>
3+
#include <type_traits>
4+
5+
namespace op::cuda {
6+
7+
struct ErfOp {
8+
static constexpr size_t num_inputs = 1;
9+
10+
template <typename T>
11+
__device__ __forceinline__ T operator()(T x) const {
12+
if constexpr (std::is_same_v<T, float>) {
13+
return erff(x);
14+
} else if constexpr (std::is_same_v<T, double>) {
15+
return ::erf(x);
16+
} else {
17+
// For F16/BF16: promote to float, compute, then cast back
18+
float xf;
19+
if constexpr (std::is_same_v<T, half>) {
20+
xf = __half2float(x);
21+
return __float2half_rn(erff(xf));
22+
} else if constexpr (std::is_same_v<T, cuda_bfloat16>) {
23+
xf = __bfloat162float(x);
24+
return __float2bfloat16_rn(erff(xf));
25+
} else {
26+
xf = static_cast<float>(x);
27+
return static_cast<T>(erff(xf));
28+
}
29+
}
30+
}
31+
};
32+
33+
} // namespace op::cuda

src/infiniop/ops/erf/erf.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#ifndef __ERF_H__
2+
#define __ERF_H__
3+
4+
#include "../../elementwise/elementwise.h"
5+
6+
#define DESCRIPTOR(NAMESPACE) ELEMENTWISE_DESCRIPTOR(erf, NAMESPACE)
7+
8+
#endif // __ERF_H__

0 commit comments

Comments
 (0)