Skip to content

Commit 6431be3

Browse files
LaiQuan-conquerPanZezhong1725
authored andcommitted
issue/1031 merge T1-1-41
1 parent 6e88052 commit 6431be3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+4140
-0
lines changed

include/infiniop.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@
3030
#include "infiniop/ops/dequantize_gptq.h"
3131
#include "infiniop/ops/embedding.h"
3232
#include "infiniop/ops/equal.h"
33+
#include "infiniop/ops/erf.h"
34+
#include "infiniop/ops/erfc.h"
35+
#include "infiniop/ops/erfinv.h"
3336
#include "infiniop/ops/flash_attention.h"
3437
#include "infiniop/ops/flipud.h"
3538
#include "infiniop/ops/float_power.h"
@@ -58,12 +61,14 @@
5861
#include "infiniop/ops/logcumsumexp.h"
5962
#include "infiniop/ops/lp_norm.h"
6063
#include "infiniop/ops/masked_select.h"
64+
#include "infiniop/ops/matrix_power.h"
6165
#include "infiniop/ops/mul.h"
6266
#include "infiniop/ops/multi_margin_loss.h"
6367
#include "infiniop/ops/ones.h"
6468
#include "infiniop/ops/paged_attention.h"
6569
#include "infiniop/ops/paged_attention_prefill.h"
6670
#include "infiniop/ops/paged_caching.h"
71+
#include "infiniop/ops/pixel_shuffle.h"
6772
#include "infiniop/ops/quant/per_channel_quant_int8.h"
6873
#include "infiniop/ops/quant/per_tensor_quant_int8.h"
6974
#include "infiniop/ops/random_sample.h"

include/infiniop/ops/erf.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#ifndef __INFINIOP_ERF_API_H__
2+
#define __INFINIOP_ERF_API_H__
3+
4+
#include "../operator_descriptor.h"
5+
6+
typedef struct InfiniopDescriptor *infiniopErfDescriptor_t;
7+
8+
__C __export infiniStatus_t infiniopCreateErfDescriptor(infiniopHandle_t handle,
9+
infiniopErfDescriptor_t *desc_ptr,
10+
infiniopTensorDescriptor_t y,
11+
infiniopTensorDescriptor_t x);
12+
13+
__C __export infiniStatus_t infiniopGetErfWorkspaceSize(infiniopErfDescriptor_t desc, size_t *size);
14+
15+
__C __export infiniStatus_t infiniopErf(infiniopErfDescriptor_t desc,
16+
void *workspace,
17+
size_t workspace_size,
18+
void *y,
19+
const void *x,
20+
void *stream);
21+
22+
__C __export infiniStatus_t infiniopDestroyErfDescriptor(infiniopErfDescriptor_t desc);
23+
24+
#endif

include/infiniop/ops/erfc.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#ifndef __INFINIOP_ERFC_API_H__
2+
#define __INFINIOP_ERFC_API_H__
3+
4+
#include "../operator_descriptor.h"
5+
6+
typedef struct InfiniopDescriptor *infiniopErfcDescriptor_t;
7+
8+
__C __export infiniStatus_t infiniopCreateErfcDescriptor(infiniopHandle_t handle,
9+
infiniopErfcDescriptor_t *desc_ptr,
10+
infiniopTensorDescriptor_t y,
11+
infiniopTensorDescriptor_t x);
12+
13+
__C __export infiniStatus_t infiniopGetErfcWorkspaceSize(infiniopErfcDescriptor_t desc, size_t *size);
14+
15+
__C __export infiniStatus_t infiniopErfc(infiniopErfcDescriptor_t desc,
16+
void *workspace,
17+
size_t workspace_size,
18+
void *y,
19+
const void *x,
20+
void *stream);
21+
22+
__C __export infiniStatus_t infiniopDestroyErfcDescriptor(infiniopErfcDescriptor_t desc);
23+
24+
#endif

include/infiniop/ops/erfinv.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#ifndef __INFINIOP_ERFINV_API_H__
2+
#define __INFINIOP_ERFINV_API_H__
3+
4+
#include "../operator_descriptor.h"
5+
6+
typedef struct InfiniopDescriptor *infiniopErfinvDescriptor_t;
7+
8+
__C __export infiniStatus_t infiniopCreateErfinvDescriptor(infiniopHandle_t handle,
9+
infiniopErfinvDescriptor_t *desc_ptr,
10+
infiniopTensorDescriptor_t y,
11+
infiniopTensorDescriptor_t x);
12+
13+
__C __export infiniStatus_t infiniopGetErfinvWorkspaceSize(infiniopErfinvDescriptor_t desc, size_t *size);
14+
15+
__C __export infiniStatus_t infiniopErfinv(infiniopErfinvDescriptor_t desc,
16+
void *workspace,
17+
size_t workspace_size,
18+
void *y,
19+
const void *x,
20+
void *stream);
21+
22+
__C __export infiniStatus_t infiniopDestroyErfinvDescriptor(infiniopErfinvDescriptor_t desc);
23+
24+
#endif
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#ifndef __INFINIOP_MATRIX_POWER_API_H__
2+
#define __INFINIOP_MATRIX_POWER_API_H__
3+
4+
#include "../operator_descriptor.h"
5+
6+
typedef struct InfiniopDescriptor *infiniopMatrixPowerDescriptor_t;
7+
8+
__C __export infiniStatus_t infiniopCreateMatrixPowerDescriptor(infiniopHandle_t handle,
9+
infiniopMatrixPowerDescriptor_t *desc_ptr,
10+
infiniopTensorDescriptor_t y,
11+
infiniopTensorDescriptor_t x,
12+
int n);
13+
14+
__C __export infiniStatus_t infiniopGetMatrixPowerWorkspaceSize(infiniopMatrixPowerDescriptor_t desc, size_t *size);
15+
16+
__C __export infiniStatus_t infiniopMatrixPower(infiniopMatrixPowerDescriptor_t desc,
17+
void *workspace,
18+
size_t workspace_size,
19+
void *y,
20+
const void *x,
21+
void *stream);
22+
23+
__C __export infiniStatus_t infiniopDestroyMatrixPowerDescriptor(infiniopMatrixPowerDescriptor_t desc);
24+
25+
#endif
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#ifndef __INFINIOP_PIXEL_SHUFFLE_API_H__
2+
#define __INFINIOP_PIXEL_SHUFFLE_API_H__
3+
4+
#include "../operator_descriptor.h"
5+
6+
typedef struct InfiniopDescriptor *infiniopPixelShuffleDescriptor_t;
7+
8+
__C __export infiniStatus_t infiniopCreatePixelShuffleDescriptor(infiniopHandle_t handle,
9+
infiniopPixelShuffleDescriptor_t *desc_ptr,
10+
infiniopTensorDescriptor_t y,
11+
infiniopTensorDescriptor_t x,
12+
int upscale_factor);
13+
14+
__C __export infiniStatus_t infiniopGetPixelShuffleWorkspaceSize(infiniopPixelShuffleDescriptor_t desc, size_t *size);
15+
16+
__C __export infiniStatus_t infiniopPixelShuffle(infiniopPixelShuffleDescriptor_t desc,
17+
void *workspace,
18+
size_t workspace_size,
19+
void *y,
20+
const void *x,
21+
void *stream);
22+
23+
__C __export infiniStatus_t infiniopDestroyPixelShuffleDescriptor(infiniopPixelShuffleDescriptor_t desc);
24+
25+
#endif
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#include "erf_cpu.h"
2+
3+
namespace op::erf::cpu {
4+
5+
Descriptor::~Descriptor() = default;
6+
7+
infiniStatus_t Descriptor::create(
8+
infiniopHandle_t handle_,
9+
Descriptor **desc_ptr,
10+
infiniopTensorDescriptor_t out_desc,
11+
std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
12+
13+
auto handle = reinterpret_cast<device::cpu::Handle *>(handle_);
14+
auto dtype = out_desc->dtype();
15+
16+
const auto &input_desc = input_desc_vec.at(0);
17+
const auto &output_shape = out_desc->shape();
18+
const auto &input_shape = input_desc->shape();
19+
20+
CHECK_DTYPE(dtype, INFINI_DTYPE_BF16, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_F64);
21+
22+
CHECK_SAME_SHAPE(output_shape, input_shape);
23+
24+
CREATE_ELEMENTWISE_CPU_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec);
25+
26+
return INFINI_STATUS_SUCCESS;
27+
}
28+
29+
infiniStatus_t Descriptor::calculate(
30+
void *workspace,
31+
size_t workspace_size,
32+
void *output,
33+
std::vector<const void *> inputs,
34+
void *stream) const {
35+
36+
switch (_dtype) {
37+
case INFINI_DTYPE_BF16:
38+
return _device_info->calculate<ErfOp, bf16_t>(_info, output, inputs, stream);
39+
case INFINI_DTYPE_F16:
40+
return _device_info->calculate<ErfOp, fp16_t>(_info, output, inputs, stream);
41+
case INFINI_DTYPE_F32:
42+
return _device_info->calculate<ErfOp, float>(_info, output, inputs, stream);
43+
case INFINI_DTYPE_F64:
44+
return _device_info->calculate<ErfOp, double>(_info, output, inputs, stream);
45+
default:
46+
return INFINI_STATUS_BAD_TENSOR_DTYPE;
47+
}
48+
49+
return INFINI_STATUS_SUCCESS;
50+
}
51+
52+
} // namespace op::erf::cpu

src/infiniop/ops/erf/cpu/erf_cpu.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#ifndef __ERF_CPU_H__
2+
#define __ERF_CPU_H__
3+
4+
#include "../../../elementwise/cpu/elementwise_cpu.h"
5+
#include <cmath>
6+
7+
ELEMENTWISE_DESCRIPTOR(erf, cpu)
8+
9+
namespace op::erf::cpu {
10+
typedef struct ErfOp {
11+
public:
12+
static constexpr size_t num_inputs = 1;
13+
template <typename T>
14+
T operator()(const T &x) const {
15+
return std::erf(x);
16+
}
17+
} ErfOp;
18+
} // namespace op::erf::cpu
19+
20+
#endif // __ERF_CPU_H__
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#pragma once
2+
#include <cmath>
3+
#include <cuda_bf16.h>
4+
#include <cuda_fp16.h>
5+
#include <cuda_runtime.h>
6+
#include <type_traits>
7+
8+
namespace op::cuda {
9+
10+
struct ErfOp {
11+
static constexpr size_t num_inputs = 1;
12+
13+
template <typename T>
14+
__device__ __forceinline__ T operator()(T x) const {
15+
if constexpr (std::is_same_v<T, float>) {
16+
return erff(x);
17+
} else if constexpr (std::is_same_v<T, double>) {
18+
return erf(x);
19+
} else {
20+
// For F16/BF16: promote to float, compute, then cast back
21+
float xf;
22+
if constexpr (std::is_same_v<T, half>) {
23+
xf = __half2float(x);
24+
return __float2half_rn(erff(xf));
25+
} else if constexpr (std::is_same_v<T, cuda_bfloat16>) {
26+
xf = __bfloat162float(x);
27+
return __float2bfloat16_rn(erff(xf));
28+
} else {
29+
xf = static_cast<float>(x);
30+
return static_cast<T>(erff(xf));
31+
}
32+
}
33+
}
34+
};
35+
36+
} // namespace op::cuda

src/infiniop/ops/erf/erf.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#ifndef __ERF_H__
2+
#define __ERF_H__
3+
4+
#include "../../elementwise/elementwise.h"
5+
6+
#define DESCRIPTOR(NAMESPACE) ELEMENTWISE_DESCRIPTOR(erf, NAMESPACE)
7+
8+
#endif // __ERF_H__

0 commit comments

Comments
 (0)