Skip to content

Commit 5793e89

Browse files
stevesuzuki-armalexreinking
authored andcommitted
Move helpers for shuffle scalable vectors to CodeGen_ARM
Theoretically, these are llvm common and not ARM specific, but for now, keep it for ARM only to avoid any affect to other targets.
1 parent 9772743 commit 5793e89

3 files changed

Lines changed: 173 additions & 173 deletions

File tree

src/CodeGen_ARM.cpp

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,19 @@ class CodeGen_ARM : public CodeGen_Posix {
202202
void visit(const Call *) override;
203203
void visit(const LT *) override;
204204
void visit(const LE *) override;
205+
206+
llvm::Type *get_vector_type_from_value(llvm::Value *vec_or_scalar, int n);
207+
Value *concat_vectors(const std::vector<llvm::Value *> &) override;
208+
Value *slice_vector(Value *vec, int start, int extent) override;
209+
210+
/** Extract a sub vector from a vector, all the elements in the sub vector must be in the src vector.
211+
* Specialized for scalable vector */
212+
Value *extract_scalable_vector(Value *vec, int start, int extract_size);
213+
214+
/** Insert a vector into the "start" position of a base vector.
215+
* Specialized for scalable vector */
216+
Value *insert_scalable_vector(Value *base_vec, Value *new_vec, int start);
217+
205218
Value *interleave_vectors(const std::vector<Value *> &) override;
206219
Value *shuffle_vectors(Value *a, Value *b, const std::vector<int> &indices) override;
207220
void codegen_vector_reduce(const VectorReduce *, const Expr &) override;
@@ -1897,6 +1910,166 @@ void CodeGen_ARM::visit(const Shuffle *op) {
18971910
}
18981911
}
18991912

1913+
llvm::Type *CodeGen_ARM::get_vector_type_from_value(Value *vec_or_scalar, int n) {
1914+
llvm::Type *t = vec_or_scalar->getType();
1915+
llvm::Type *elt = t->isVectorTy() ? get_vector_element_type(t) : t;
1916+
return CodeGen_Posix::get_vector_type(elt, n);
1917+
}
1918+
1919+
Value *CodeGen_ARM::concat_vectors(const vector<Value *> &vecs) {
1920+
// Override only for scalable vector which includes
1921+
// the case where scalars are concatenated into scalable vector.
1922+
if (target_vscale() == 0 ||
1923+
vecs.size() <= 1 ||
1924+
isa<FixedVectorType>(vecs[0]->getType())) {
1925+
return CodeGen_Posix::concat_vectors(vecs);
1926+
}
1927+
1928+
int total_lanes = 0;
1929+
for (auto *v : vecs) {
1930+
total_lanes += get_vector_num_elements(v->getType());
1931+
}
1932+
1933+
llvm::Type *concat_type = get_vector_type(get_vector_element_type(vecs[0]->getType()), total_lanes);
1934+
Value *ret = UndefValue::get(concat_type);
1935+
int insert_index = 0;
1936+
for (auto *v : vecs) {
1937+
ret = insert_scalable_vector(ret, v, insert_index);
1938+
insert_index += get_vector_num_elements(v->getType());
1939+
}
1940+
return ret;
1941+
}
1942+
1943+
Value *CodeGen_ARM::slice_vector(llvm::Value *vec, int start, int slice_size) {
1944+
// Override only for scalable vector
1945+
if (target_vscale() == 0 ||
1946+
!is_scalable_vector(vec)) {
1947+
return CodeGen_Posix::slice_vector(vec, start, slice_size);
1948+
}
1949+
1950+
const int vec_lanes = get_vector_num_elements(vec->getType());
1951+
if (slice_size == 1) {
1952+
return builder->CreateExtractElement(vec, ConstantInt::get(i64_t, start, true));
1953+
} else if (start == 0) {
1954+
if (vec_lanes == slice_size) {
1955+
return vec;
1956+
} else if (vec_lanes < slice_size) {
1957+
return insert_scalable_vector(UndefValue::get(get_vector_type_from_value(vec, slice_size)), vec, 0);
1958+
} else {
1959+
auto *dst_type = get_vector_type_from_value(vec, slice_size);
1960+
Value *val_index = ConstantInt::get(i64_t, 0, true);
1961+
return builder->CreateExtractVector(dst_type, vec, val_index);
1962+
}
1963+
} else {
1964+
const int extract_size = std::min(vec_lanes - start, slice_size);
1965+
Value *extracted = extract_scalable_vector(vec, start, extract_size);
1966+
if (slice_size == extract_size) {
1967+
return extracted;
1968+
} else {
1969+
Value *sliced = UndefValue::get(get_vector_type_from_value(vec, slice_size));
1970+
sliced = insert_scalable_vector(sliced, extracted, 0);
1971+
return sliced;
1972+
}
1973+
}
1974+
}
1975+
1976+
Value *CodeGen_ARM::extract_scalable_vector(Value *vec, int start, int extract_size) {
1977+
internal_assert(target_vscale() > 0 && is_scalable_vector(vec));
1978+
internal_assert(start + extract_size <= get_vector_num_elements(vec->getType())); // No overrun
1979+
1980+
if (extract_size == 1) {
1981+
return builder->CreateExtractElement(vec, ConstantInt::get(i64_t, start, true));
1982+
} else {
1983+
// To follow the requirement of ‘llvm.vector.extract’ intrinsic that
1984+
// idx must be a constant multiple of the known-minimum vector length of the result type,
1985+
// the extraction is performed as multiple sub-extraction, where the worst case is extraction of scalar.
1986+
std::vector<Value *> sub_slices;
1987+
int i = 0;
1988+
while (i < extract_size) {
1989+
int sub_extract_pos = start + i;
1990+
for (int sub_extract_size = extract_size - i; sub_extract_size > 0; --sub_extract_size) {
1991+
if (sub_extract_pos % sub_extract_size == 0) {
1992+
internal_assert(sub_extract_pos % target_vscale() == 0);
1993+
Value *sub_extracted;
1994+
if (sub_extract_size == 1) {
1995+
sub_extracted = builder->CreateExtractElement(vec, sub_extract_pos);
1996+
} else {
1997+
// In vector operation, index needs to be normalized by vscale
1998+
Value *idx_val = ConstantInt::get(i64_t, sub_extract_pos / target_vscale(), true);
1999+
llvm::Type *sub_extract_type = get_vector_type_from_value(vec, sub_extract_size);
2000+
sub_extracted = builder->CreateExtractVector(sub_extract_type, vec, idx_val);
2001+
}
2002+
sub_slices.push_back(sub_extracted);
2003+
2004+
i += sub_extract_size;
2005+
break;
2006+
}
2007+
}
2008+
}
2009+
Value *extracted = concat_vectors(sub_slices);
2010+
return extracted;
2011+
}
2012+
}
2013+
2014+
Value *CodeGen_ARM::insert_scalable_vector(Value *base_vec, Value *new_vec, int start) {
2015+
const int base_lanes = get_vector_num_elements(base_vec->getType());
2016+
const int new_vec_lanes = get_vector_num_elements(new_vec->getType());
2017+
llvm::Type *element_type = get_vector_element_type(base_vec->getType());
2018+
2019+
internal_assert(start + new_vec_lanes <= base_lanes);
2020+
2021+
if (base_lanes == 1 && new_vec_lanes == 1) {
2022+
return new_vec;
2023+
}
2024+
2025+
internal_assert(target_vscale() > 0 && is_scalable_vector(base_vec));
2026+
2027+
if (!new_vec->getType()->isVectorTy()) {
2028+
return builder->CreateInsertElement(base_vec, new_vec, start);
2029+
} else if (start % new_vec_lanes == 0) {
2030+
// Most of the ordinal use cases are this pattern
2031+
// In vector operation, index needs to be normalized by vscale
2032+
Value *val_start_index = ConstantInt::get(i64_t, start / target_vscale(), true);
2033+
return builder->CreateInsertVector(base_vec->getType(), base_vec, new_vec, val_start_index);
2034+
}
2035+
2036+
// To follow the requirement of ‘llvm.vector.insert’ intrinsic that
2037+
// idx must be a constant multiple of subvec’s known minimum vector length,
2038+
// insertion is performed in multiple sub slices.
2039+
Value *ret = base_vec;
2040+
int extract_index = 0;
2041+
int insert_index = start;
2042+
int sub_slice_size = std::min(start, new_vec_lanes);
2043+
2044+
while (extract_index < new_vec_lanes) {
2045+
if (extract_index + sub_slice_size <= new_vec_lanes && // Condition to not overrun
2046+
extract_index % sub_slice_size == 0 && // Requirement of LLVM intrinsic
2047+
insert_index % sub_slice_size == 0) { // Requirement of LLVM intrinsic
2048+
2049+
internal_assert(extract_index % target_vscale() == 0);
2050+
internal_assert(insert_index % target_vscale() == 0);
2051+
2052+
if (sub_slice_size == 1) {
2053+
Value *sub_slice = builder->CreateExtractElement(new_vec, extract_index);
2054+
ret = builder->CreateInsertElement(ret, sub_slice, insert_index);
2055+
} else {
2056+
// In vector operation, index needs to be normalized by vscale
2057+
Value *val_extract_index = ConstantInt::get(i64_t, extract_index / target_vscale(), true);
2058+
Value *val_insert_index = ConstantInt::get(i64_t, insert_index / target_vscale(), true);
2059+
llvm::Type *sub_sliced_type = get_vector_type(element_type, sub_slice_size);
2060+
Value *sub_slice = builder->CreateExtractVector(sub_sliced_type, new_vec, val_extract_index);
2061+
ret = builder->CreateInsertVector(base_vec->getType(), ret, sub_slice, val_insert_index);
2062+
}
2063+
insert_index += sub_slice_size;
2064+
extract_index += sub_slice_size;
2065+
} else {
2066+
// move on to next candidate
2067+
--sub_slice_size;
2068+
}
2069+
}
2070+
return ret;
2071+
}
2072+
19002073
Value *CodeGen_ARM::interleave_vectors(const std::vector<Value *> &vecs) {
19012074
if (simd_intrinsics_disabled() || target_vscale() == 0 ||
19022075
vecs.size() < 2 ||

src/CodeGen_LLVM.cpp

Lines changed: 0 additions & 156 deletions
Original file line numberDiff line numberDiff line change
@@ -4881,10 +4881,6 @@ Value *CodeGen_LLVM::call_intrin(const llvm::Type *result_type, int intrin_lanes
48814881
}
48824882

48834883
Value *CodeGen_LLVM::slice_vector(Value *vec, int start, int size) {
4884-
if (effective_vscale > 0 && is_scalable_vector(vec)) {
4885-
return slice_scalable_vector(vec, start, size);
4886-
}
4887-
48884884
// Force the arg to be an actual vector
48894885
if (!vec->getType()->isVectorTy()) {
48904886
vec = create_broadcast(vec, 1);
@@ -4948,10 +4944,6 @@ Value *CodeGen_LLVM::concat_vectors(const vector<Value *> &v) {
49484944

49494945
internal_assert(!v.empty());
49504946

4951-
if (effective_vscale > 0 && is_scalable_vector(v[0])) {
4952-
return concat_scalable_vectors(v);
4953-
}
4954-
49554947
vector<Value *> vecs = v;
49564948

49574949
// Force them all to be actual vectors
@@ -5011,147 +5003,6 @@ Value *CodeGen_LLVM::concat_vectors(const vector<Value *> &v) {
50115003
return vecs[0];
50125004
}
50135005

5014-
Value *CodeGen_LLVM::concat_scalable_vectors(const vector<Value *> &vecs) {
5015-
internal_assert(effective_vscale > 0 && is_scalable_vector(vecs[0]));
5016-
int total_lanes = 0;
5017-
for (auto* v: vecs) {
5018-
total_lanes += get_vector_num_elements(v->getType());
5019-
}
5020-
5021-
llvm::Type *concat_type = get_vector_type(get_vector_element_type(vecs[0]->getType()), total_lanes);
5022-
Value *ret = UndefValue::get(concat_type);
5023-
int insert_index = 0;
5024-
for (auto* v: vecs) {
5025-
ret = insert_scalable_vector(ret, v, insert_index);
5026-
insert_index += get_vector_num_elements(v->getType());
5027-
}
5028-
return ret;
5029-
}
5030-
5031-
Value *CodeGen_LLVM::slice_scalable_vector(llvm::Value *vec, int start, int slice_size) {
5032-
const int vec_lanes = get_vector_num_elements(vec->getType());
5033-
if (slice_size == 1) {
5034-
return builder->CreateExtractElement(vec, ConstantInt::get(i64_t, start, true));
5035-
} else if (start == 0) {
5036-
if (vec_lanes == slice_size) {
5037-
return vec;
5038-
} else if (vec_lanes < slice_size) {
5039-
return insert_scalable_vector(UndefValue::get(get_vector_type(vec, slice_size)), vec, 0);
5040-
} else {
5041-
auto *dst_type = get_vector_type(vec, slice_size);
5042-
Value *val_index = ConstantInt::get(i64_t, 0, true);
5043-
return builder->CreateExtractVector(dst_type, vec, val_index);
5044-
}
5045-
} else {
5046-
const int extract_size = std::min(vec_lanes - start, slice_size);
5047-
Value *extracted = extract_scalable_vector(vec, start, extract_size);
5048-
if (slice_size == extract_size) {
5049-
return extracted;
5050-
} else {
5051-
Value *sliced = UndefValue::get(get_vector_type(vec, slice_size));
5052-
sliced = insert_scalable_vector(sliced, extracted, 0);
5053-
return sliced;
5054-
}
5055-
}
5056-
}
5057-
5058-
Value *CodeGen_LLVM::extract_scalable_vector(Value *vec, int start, int extract_size) {
5059-
internal_assert(is_scalable_vector(vec) && effective_vscale);
5060-
internal_assert(start + extract_size <= get_vector_num_elements(vec->getType())); // No overrun
5061-
5062-
if (extract_size == 1) {
5063-
return builder->CreateExtractElement(vec, ConstantInt::get(i64_t, start, true));
5064-
} else {
5065-
// To follow the requirement of ‘llvm.experimental.vector.extract’ intrinsic that
5066-
// idx must be a constant multiple of the known-minimum vector length of the result type,
5067-
// the extraction is performed as multiple sub-extraction, where the worst case is extraction of scalar.
5068-
std::vector<Value *> sub_slices;
5069-
int i = 0;
5070-
while (i < extract_size) {
5071-
int sub_extract_pos = start + i;
5072-
for (int sub_extract_size = extract_size - i; sub_extract_size > 0; --sub_extract_size) {
5073-
if (sub_extract_pos % sub_extract_size == 0) {
5074-
internal_assert(sub_extract_pos % effective_vscale == 0);
5075-
Value *sub_extracted;
5076-
if (sub_extract_size == 1) {
5077-
sub_extracted = builder->CreateExtractElement(vec, sub_extract_pos);
5078-
} else {
5079-
// In vector operation, index needs to be normalized by vscale
5080-
Value *idx_val = ConstantInt::get(i64_t, sub_extract_pos / effective_vscale, true);
5081-
llvm::Type *sub_extract_type = get_vector_type(vec, sub_extract_size);
5082-
sub_extracted = builder->CreateExtractVector(sub_extract_type, vec, idx_val);
5083-
}
5084-
sub_slices.push_back(sub_extracted);
5085-
5086-
i += sub_extract_size;
5087-
break;
5088-
}
5089-
}
5090-
}
5091-
Value *extracted = concat_vectors(sub_slices);
5092-
return extracted;
5093-
}
5094-
}
5095-
5096-
Value *CodeGen_LLVM::insert_scalable_vector(Value *base_vec, Value *new_vec, int start) {
5097-
// To follow the requirement of ‘llvm.experimental.vector.insert’ intrinsic that
5098-
// idx must be a constant multiple of subvec’s known minimum vector length,
5099-
// insertion is performed in multiple sub slices.
5100-
5101-
const int base_lanes = get_vector_num_elements(base_vec->getType());
5102-
const int new_vec_lanes = get_vector_num_elements(new_vec->getType());
5103-
llvm::Type *element_type = get_vector_element_type(base_vec->getType());
5104-
5105-
internal_assert(start + new_vec_lanes <= base_lanes);
5106-
5107-
if (base_lanes == 1 && new_vec_lanes == 1) {
5108-
return new_vec;
5109-
}
5110-
5111-
internal_assert(is_scalable_vector(base_vec) && effective_vscale);
5112-
if (!new_vec->getType()->isVectorTy()) {
5113-
return builder->CreateInsertElement(base_vec, new_vec, start);
5114-
} else if (start % new_vec_lanes == 0) {
5115-
// Most of the ordinal use cases are this pattern
5116-
// In vector operation, index needs to be normalized by vscale
5117-
Value *val_start_index = ConstantInt::get(i64_t, start / effective_vscale, true);
5118-
return builder->CreateInsertVector(base_vec->getType(), base_vec, new_vec, val_start_index);
5119-
}
5120-
5121-
Value *ret = base_vec;
5122-
int extract_index = 0;
5123-
int insert_index = start;
5124-
int sub_slice_size = std::min(start, new_vec_lanes);
5125-
5126-
while (extract_index < new_vec_lanes) {
5127-
if (extract_index + sub_slice_size <= new_vec_lanes && // Condition to not overrun
5128-
extract_index % sub_slice_size == 0 && // Requirement of LLVM intrinsic
5129-
insert_index % sub_slice_size == 0) { // Requirement of LLVM intrinsic
5130-
5131-
internal_assert(extract_index % effective_vscale == 0);
5132-
internal_assert(insert_index % effective_vscale == 0);
5133-
5134-
if (sub_slice_size == 1) {
5135-
Value *sub_slice = builder->CreateExtractElement(new_vec, extract_index);
5136-
ret = builder->CreateInsertElement(ret, sub_slice, insert_index);
5137-
} else {
5138-
// In vector operation, index needs to be normalized by vscale
5139-
Value *val_extract_index = ConstantInt::get(i64_t, extract_index / effective_vscale, true);
5140-
Value *val_insert_index = ConstantInt::get(i64_t, insert_index / effective_vscale, true);
5141-
llvm::Type *sub_sliced_type = get_vector_type(element_type, sub_slice_size);
5142-
Value *sub_slice = builder->CreateExtractVector(sub_sliced_type, new_vec, val_extract_index);
5143-
ret = builder->CreateInsertVector(base_vec->getType(), ret, sub_slice, val_insert_index);
5144-
}
5145-
insert_index += sub_slice_size;
5146-
extract_index += sub_slice_size;
5147-
} else {
5148-
// move on to next candidate
5149-
--sub_slice_size;
5150-
}
5151-
}
5152-
return ret;
5153-
}
5154-
51555006
Value *CodeGen_LLVM::reverse_vector(llvm::Value *vec) {
51565007
if (effective_vscale > 0) {
51575008
return builder->CreateVectorReverse(vec);
@@ -5558,13 +5409,6 @@ llvm::Type *CodeGen_LLVM::get_vector_type(llvm::Type *t, int n,
55585409
return VectorType::get(t, n, scalable);
55595410
}
55605411

5561-
llvm::Type *CodeGen_LLVM::get_vector_type(llvm::Value *vec_or_scalar, int n,
5562-
VectorTypeConstraint type_constraint) const {
5563-
llvm::Type *t = vec_or_scalar->getType();
5564-
llvm::Type *elt = t->isVectorTy() ? get_vector_element_type(t) : t;
5565-
return get_vector_type(elt, n, type_constraint);
5566-
}
5567-
55685412
llvm::Constant *CodeGen_LLVM::get_splat(int lanes, llvm::Constant *value,
55695413
VectorTypeConstraint type_constraint) const {
55705414
bool scalable = false;

0 commit comments

Comments
 (0)