Skip to content

Commit 4afaaa7

Browse files
XiaoHouXiaoHou
authored andcommitted
fix all
1 parent 74fce19 commit 4afaaa7

8 files changed

Lines changed: 354 additions & 44 deletions

File tree

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
.PHONY : build clean format install-python test-cpp test-onnx
22

3-
TYPE ?= Release
3+
TYPE ?= Debug
44
TEST ?= ON
55

66
CMAKE_OPT = -DCMAKE_BUILD_TYPE=$(TYPE)

report.xml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<testsuites tests="1" failures="1" disabled="0" errors="0" time="0." timestamp="2025-08-08T12:42:56.640" name="AllTests">
3+
<testsuite name="Concat" tests="1" failures="1" disabled="0" skipped="0" errors="0" time="0." timestamp="2025-08-08T12:42:56.640">
4+
<testcase name="NativeCpu" file="/amax/2020/hx2024/Cpp/TinyInfiniTensor/test/kernels/nativecpu/test_nativecpu_concat.cc" line="9" status="run" result="completed" time="0." timestamp="2025-08-08T12:42:56.640" classname="Concat">
5+
<failure message="unknown file&#x0A;C++ exception with description &quot;&quot; thrown in the test body.&#x0A;" type=""><![CDATA[unknown file
6+
C++ exception with description "" thrown in the test body.
7+
]]></failure>
8+
</testcase>
9+
</testsuite>
10+
</testsuites>

src/core/allocator.cc

Lines changed: 42 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -34,28 +34,41 @@ namespace infini
3434
// =================================== 作业 ===================================
3535
if (freeBlocks.empty())
3636
{
37-
used += size;
38-
peak = std::max(peak, used);
39-
freeBlocks.emplace(0, size); // Allocate from the start
40-
return 0;
37+
freeBlocks[0] = 4096; // Initially, all memory is free
4138
}
4239
for(auto it = freeBlocks.begin(); it != freeBlocks.end(); it ++){
4340
auto [addr, blockSize] = *it;
44-
if(blockSize >= size){
45-
size_t upper_addr = freeBlocks.upper_bound(addr)->first;
46-
size_t gap = upper_addr - (addr + blockSize);
47-
if(gap >= size){
48-
used += size;
49-
freeBlocks[addr + blockSize] = gap; // Update the free block after allocation
50-
return addr + blockSize;
41+
if(blockSize >= size){ //blockSize 是可用空间
42+
if(blockSize > size){
43+
// Split the block if it's larger than requested size
44+
freeBlocks[addr + size] = blockSize - size;
5145
}
46+
freeBlocks.erase(it);
47+
used += size;
48+
peak = std::max(peak, used);
49+
return it->first;
5250
}
5351
}
54-
used += size;
55-
peak = std::max(peak, used);
56-
size_t lastAddr = freeBlocks.rbegin()->first + freeBlocks.rbegin()->second;
57-
freeBlocks.emplace(lastAddr, size); // Allocate
58-
return lastAddr;
52+
53+
return 0;
54+
55+
56+
57+
// if (this->freeBlocks.empty())
58+
// this->freeBlocks[0] = 1024;
59+
// for (auto it = this->freeBlocks.begin(); it != this->freeBlocks.end(); ++it)
60+
// {
61+
// if (it->second >= size)
62+
// {
63+
// if (it->second > size)
64+
// this->freeBlocks[it->first + size] = it->second - size;
65+
// auto ans = it->first;
66+
// this->freeBlocks.erase(it);
67+
// this->used += size;
68+
// this->peak = (this->peak >= this->used) ? this->peak : this->used;
69+
// return ans;
70+
// }
71+
// }
5972
}
6073

6174
void Allocator::free(size_t addr, size_t size)
@@ -65,22 +78,21 @@ namespace infini
6578
// =================================== 作业 ===================================
6679
// TODO: 设计一个算法来回收内存
6780
// =================================== 作业 ===================================
68-
used -= size;
69-
auto next = freeBlocks.upper_bound(addr);
70-
if(next != freeBlocks.end() && addr + size == next -> first){ // 再次确保是否物理相邻
71-
// Merge with next block
72-
size += next->second;
73-
freeBlocks.erase(next);
81+
freeBlocks[addr] = size;
82+
auto it = freeBlocks.find(addr);
83+
auto nextIt = std::next(it);
84+
if (nextIt != freeBlocks.end() && it->first + it->second == nextIt->first)
85+
{
86+
it->second += nextIt->second;
87+
freeBlocks.erase(nextIt);
7488
}
75-
auto prev = freeBlocks.lower_bound(addr);
76-
if(prev != freeBlocks.begin() && prev -> first + prev->second == addr){ // 再次确保是否物理相邻
77-
// Merge with previous block
78-
size += prev->second;
79-
addr = prev->first; // Update address to the start of the merged block
80-
81-
freeBlocks.erase(prev);
89+
auto prevIt = std::prev(it);
90+
if (it != freeBlocks.begin() && prevIt->first + prevIt->second == it->first)
91+
{
92+
prevIt->second += it->second;
93+
freeBlocks.erase(it);
8294
}
83-
freeBlocks.emplace(addr, size); // Store the freed block
95+
used = used - size;
8496
}
8597

8698
void *Allocator::getPtr()

src/core/graph.cc

Lines changed: 190 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
#include <algorithm>
33
#include <numeric>
44
#include <queue>
5-
5+
#include "operators/matmul.h"
6+
#include "operators/transpose.h"
67
namespace infini
78
{
89

@@ -106,6 +107,163 @@ namespace infini
106107
// 1. 去除冗余的算子(例如,两个相邻的算子都是 transpose 算子,且做的是相反的操作,可以将其全部删除)
107108
// 2. 合并算子(例如,矩阵乘算子中含有属性transA、transB,如果其输入存在transpose,且对最后两个维度做交换,就可以将transpose融入到矩阵乘算子的属性中去)
108109
// =================================== 作业 ===================================
110+
111+
// rule1: 删除无用的transpose算子
112+
for (size_t i = 0; i < ops.size(); ++i)
113+
{
114+
Operator op = ops[i];
115+
if (op->getOpType() == OpType::Transpose)
116+
{
117+
Tensor tensor = op->getOutput();
118+
if (!tensor)
119+
continue;
120+
auto targets = tensor->getTargets();
121+
if (targets.empty())
122+
continue;
123+
Operator op_next = targets[0];
124+
if (op_next->getOpType() == OpType::Transpose)
125+
{
126+
TransposeObj *op1 = as<TransposeObj>(op).get();
127+
TransposeObj *op2 = as<TransposeObj>(op_next).get();
128+
auto op1_permute = op1->getPermute();
129+
auto op2_permute = op2->getPermute();
130+
if (op1_permute.size() != op2_permute.size())
131+
continue;
132+
bool flag = true;
133+
for (int j = 0; j < (int)op1_permute.size(); j++)
134+
{
135+
if (op1_permute[op2_permute[j]] != j)
136+
{
137+
flag = false;
138+
continue;
139+
}
140+
}
141+
if (!flag) //flag为false说明 无法合并
142+
continue;
143+
// 获取第一个转置算子的输入张量(原始输入数据)
144+
Tensor originalInput = op->getInputs()[0];
145+
146+
// 获取第一个转置算子的输出张量(第一次转置结果)
147+
Tensor firstTransposeOutput = op->getOutput();
148+
149+
// 获取第二个转置算子的输出张量(最终转置结果)
150+
Tensor secondTransposeOutput = op_next->getOutput();
151+
152+
// 获取使用最终结果的消费者算子(如矩阵乘法)
153+
Operator consumerOp = secondTransposeOutput->getTargets()[0];
154+
155+
// 保留消费者算子的其他输入(如矩阵乘法的右矩阵)
156+
Tensor consumerOtherInput = consumerOp->getInputs()[1];
157+
158+
// 重定向消费者算子的输入:跳过两个转置,直接使用原始输入
159+
consumerOp->replaceInput(consumerOp->getInputs()[0], originalInput);
160+
161+
// 更新原始输入的连接关系:
162+
originalInput->removeTarget(op); // 移除对第一个转置的引用
163+
originalInput->addTarget(consumerOp); // 添加对消费者算子的引用
164+
originalInput->setSource(nullptr); // 清除可能存在的生产者标记
165+
166+
// 清理冗余资源
167+
removeOperator(op); // 删除第一个转置算子
168+
removeOperator(op_next); // 删除第二个转置算子
169+
removeTensor(firstTransposeOutput); // 删除中间结果张量
170+
removeTensor(secondTransposeOutput); // 删除最终结果张量
171+
172+
// 更新算子间的拓扑依赖关系
173+
consumerOp->removePredecessors(op_next); // 移除与第二个转置的依赖
174+
175+
// 如果原始输入有生产者,建立新的依赖关系
176+
if (originalInput->getSource()) {
177+
consumerOp->addPredecessors(originalInput->getSource());
178+
originalInput->getSource()->addSuccessors(consumerOp);
179+
}
180+
}
181+
}
182+
}
183+
184+
// 遍历图中的所有算子,寻找可优化的矩阵乘法算子
185+
for (size_t opIndex = 0; opIndex < ops.size(); ++opIndex) {
186+
Operator currentOp = ops[opIndex];
187+
188+
// 只处理矩阵乘法算子
189+
if (currentOp->getOpType() == OpType::MatMul) {
190+
// 获取矩阵乘法的输入张量列表(左矩阵和右矩阵)
191+
TensorVec matmulInputs = currentOp->getInputs();
192+
int inputIndex = 0; // 用于标识当前是左输入(0)还是右输入(1)
193+
194+
// 检查每个输入张量
195+
for (Tensor inputTensor : matmulInputs) {
196+
inputIndex++;
197+
198+
// 检查输入张量是否有生产者算子
199+
if (inputTensor->getSource()) {
200+
Operator producerOp = inputTensor->getSource();
201+
202+
// 如果生产者是转置算子
203+
if (producerOp->getOpType() == OpType::Transpose) {
204+
TransposeObj *transposeOp = as<TransposeObj>(producerOp).get();
205+
Shape transposePerm = transposeOp->getPermute();
206+
bool isLastTwoDimsSwap = true;
207+
208+
/* 验证转置操作是否只交换最后两个维度:
209+
* 1. 前n-2个维度必须保持原顺序(即perm[j] == j)
210+
* 2. 最后两个维度必须交换(即perm[-2] == rank-1 且 perm[-1] == rank-2)
211+
*/
212+
for (int dim = 0; dim < (int)transposePerm.size() - 2; dim++) {
213+
if (transposePerm[dim] != dim) {
214+
isLastTwoDimsSwap = false;
215+
break;
216+
}
217+
}
218+
if (transposePerm[transposePerm.size() - 2] != (int)transposePerm.size() - 1 ||
219+
transposePerm[transposePerm.size() - 1] != (int)transposePerm.size() - 2) {
220+
isLastTwoDimsSwap = false;
221+
}
222+
223+
// 如果不满足条件则跳过优化
224+
if (!isLastTwoDimsSwap) continue;
225+
226+
// 获取矩阵乘法算子(用于修改转置属性)
227+
MatmulObj *matmulOp = as<MatmulObj>(currentOp).get();
228+
Tensor transposedTensor;
229+
230+
// 根据输入位置设置对应的转置标志
231+
if (inputIndex == 1) { // 左输入
232+
matmulOp->setTransA(true); // 启用左矩阵转置
233+
transposedTensor = matmulOp->getInputs(0);
234+
} else { // 右输入
235+
matmulOp->setTransB(true); // 启用右矩阵转置
236+
transposedTensor = matmulOp->getInputs(1);
237+
}
238+
239+
// 获取转置算子的输入(原始未转置的张量)
240+
Operator transposeOperator = transposedTensor->getSource();
241+
Tensor originalTensor = transposeOperator->getInputs()[0];
242+
243+
// 重定向矩阵乘法的输入:跳过转置算子,直接使用原始张量
244+
matmulOp->replaceInput(transposedTensor, originalTensor);
245+
246+
// 更新张量连接关系
247+
originalTensor->removeTarget(transposeOperator);
248+
originalTensor->addTarget(currentOp);
249+
250+
// 清理资源:删除转置算子和中间张量
251+
removeOperator(transposeOperator);
252+
removeTensor(transposedTensor);
253+
254+
// 更新拓扑关系:移除转置算子作为前驱
255+
currentOp->removePredecessors(transposeOperator);
256+
257+
// 如果原始张量有生产者,建立新的依赖关系
258+
if (originalTensor->getSource()) {
259+
currentOp->addPredecessors(originalTensor->getSource());
260+
originalTensor->getSource()->addSuccessors(currentOp);
261+
}
262+
}
263+
}
264+
}
265+
}
266+
}
109267
}
110268

111269
Tensor GraphObj::getTensor(int fuid) const
@@ -152,16 +310,41 @@ namespace infini
152310
// TODO:利用 allocator 给计算图分配内存
153311
// HINT: 获取分配好的内存指针后,可以调用 tensor 的 setDataBlob 函数给 tensor 绑定内存
154312
// =================================== 作业 ===================================
313+
// allocator.info();
314+
// void* allocatorPtr = allocator.getPtr();
315+
// for(auto it = tensors.begin(); it != tensors.end(); it++){
316+
// auto tensor = *it;
317+
// size_t size = tensor->getBytes();
318+
// size_t addr = allocator.alloc(size);
319+
// char * tmpPtr = reinterpret_cast<char*>(allocatorPtr) + addr;
320+
// Blob blob = make_ref<BlobObj>(runtime, (void *)tmpPtr);
321+
// tensor->setDataBlob(blob);
322+
// }
323+
// topological sorting first
324+
IT_ASSERT(topo_sort() == true);
155325

156-
allocator.info();
157-
for(auto it = tensors.begin(); it != tensors.end(); it++){
158-
auto tensor = *it;
326+
// =================================== 作业 ===================================
327+
// TODO:利用 allocator 给计算图分配内存
328+
// HINT: 获取分配好的内存指针后,可以调用 tensor 的 setDataBlob 函数给 tensor 绑定内存
329+
// =================================== 作业 ===================================
330+
vector<size_t> offsets;
331+
for (auto tensor : tensors)
332+
{
159333
size_t size = tensor->getBytes();
160-
size_t addr = allocator.alloc(size);
161-
IT_ASSERT(addr != 0);
162-
Blob blob = make_ref<BlobObj>(tensor->getRuntime(), (void *)(allocator.getPtr() + addr));
334+
size_t offset = allocator.alloc(size);
335+
offsets.push_back(offset);
336+
}
337+
auto it = offsets.begin();
338+
void *basePtr = allocator.getPtr();
339+
for (auto tensor : tensors)
340+
{
341+
char *charPtr = reinterpret_cast<char *>(basePtr) + *it;
342+
void *ptr = charPtr;
343+
Blob blob = make_ref<BlobObj>(runtime, ptr);
163344
tensor->setDataBlob(blob);
345+
it++;
164346
}
347+
allocator.info();
165348
}
166349

167350
Tensor GraphObj::addTensor(Shape dim, DataType dtype)

src/operators/concat.cc

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,30 @@ ConcatObj::ConcatObj(GraphObj *graph, TensorVec inputs, Tensor output, int _dim)
1010
}
1111

1212
optional<vector<Shape>> ConcatObj::inferShape(const TensorVec &inputs) {
13-
Shape dims = inputs[0]->getDims();
13+
Shape dims = inputs[0]->getDims(); // 数组的 shape
1414
auto rank = inputs[0]->getRank();
15-
1615
// =================================== 作业 ===================================
1716
// TODO:修改 dims,返回正确的 concat 后的 shape
1817
// REF: https://onnx.ai/onnx/operators/onnx__Concat.html#concat-13
1918
// =================================== 作业 ===================================
20-
return {{dims}};
19+
if(inputs.size() == 0) {
20+
return std::nullopt;
21+
}
22+
for(auto input: inputs){
23+
if(input->getDims().size() != rank)
24+
return std::nullopt;
25+
}
26+
vector<int> res(rank, 0);
27+
for(auto input: inputs){
28+
for(size_t i = 0; i < rank; i++){
29+
if(i == size_t(dim)){
30+
res[i] += input->getDims()[i];
31+
}else if (i != size_t(dim)){
32+
res[i] = input->getDims()[i];
33+
}
34+
}
35+
}
36+
return {{res}};
2137
}
2238

2339
std::string ConcatObj::toString() const {

0 commit comments

Comments
 (0)