@@ -202,6 +202,19 @@ class CodeGen_ARM : public CodeGen_Posix {
202202 void visit (const Call *) override ;
203203 void visit (const LT *) override ;
204204 void visit (const LE *) override ;
205+
206+ llvm::Type *get_vector_type_from_value (llvm::Value *vec_or_scalar, int n);
207+ Value *concat_vectors (const std::vector<llvm::Value *> &) override ;
208+ Value *slice_vector (Value *vec, int start, int extent) override ;
209+
210+ /* * Extract a sub vector from a vector, all the elements in the sub vector must be in the src vector.
211+ * Specialized for scalable vector */
212+ Value *extract_scalable_vector (Value *vec, int start, int extract_size);
213+
214+ /* * Insert a vector into the "start" position of a base vector.
215+ * Specialized for scalable vector */
216+ Value *insert_scalable_vector (Value *base_vec, Value *new_vec, int start);
217+
205218 Value *interleave_vectors (const std::vector<Value *> &) override ;
206219 Value *shuffle_vectors (Value *a, Value *b, const std::vector<int > &indices) override ;
207220 void codegen_vector_reduce (const VectorReduce *, const Expr &) override ;
@@ -1897,6 +1910,166 @@ void CodeGen_ARM::visit(const Shuffle *op) {
18971910 }
18981911}
18991912
1913+ llvm::Type *CodeGen_ARM::get_vector_type_from_value (Value *vec_or_scalar, int n) {
1914+ llvm::Type *t = vec_or_scalar->getType ();
1915+ llvm::Type *elt = t->isVectorTy () ? get_vector_element_type (t) : t;
1916+ return CodeGen_Posix::get_vector_type (elt, n);
1917+ }
1918+
1919+ Value *CodeGen_ARM::concat_vectors (const vector<Value *> &vecs) {
1920+ // Override only for scalable vector which includes
1921+ // the case where scalars are concatenated into scalable vector.
1922+ if (target_vscale () == 0 ||
1923+ vecs.size () <= 1 ||
1924+ isa<FixedVectorType>(vecs[0 ]->getType ())) {
1925+ return CodeGen_Posix::concat_vectors (vecs);
1926+ }
1927+
1928+ int total_lanes = 0 ;
1929+ for (auto *v : vecs) {
1930+ total_lanes += get_vector_num_elements (v->getType ());
1931+ }
1932+
1933+ llvm::Type *concat_type = get_vector_type (get_vector_element_type (vecs[0 ]->getType ()), total_lanes);
1934+ Value *ret = UndefValue::get (concat_type);
1935+ int insert_index = 0 ;
1936+ for (auto *v : vecs) {
1937+ ret = insert_scalable_vector (ret, v, insert_index);
1938+ insert_index += get_vector_num_elements (v->getType ());
1939+ }
1940+ return ret;
1941+ }
1942+
1943+ Value *CodeGen_ARM::slice_vector (llvm::Value *vec, int start, int slice_size) {
1944+ // Override only for scalable vector
1945+ if (target_vscale () == 0 ||
1946+ !is_scalable_vector (vec)) {
1947+ return CodeGen_Posix::slice_vector (vec, start, slice_size);
1948+ }
1949+
1950+ const int vec_lanes = get_vector_num_elements (vec->getType ());
1951+ if (slice_size == 1 ) {
1952+ return builder->CreateExtractElement (vec, ConstantInt::get (i64_t , start, true ));
1953+ } else if (start == 0 ) {
1954+ if (vec_lanes == slice_size) {
1955+ return vec;
1956+ } else if (vec_lanes < slice_size) {
1957+ return insert_scalable_vector (UndefValue::get (get_vector_type_from_value (vec, slice_size)), vec, 0 );
1958+ } else {
1959+ auto *dst_type = get_vector_type_from_value (vec, slice_size);
1960+ Value *val_index = ConstantInt::get (i64_t , 0 , true );
1961+ return builder->CreateExtractVector (dst_type, vec, val_index);
1962+ }
1963+ } else {
1964+ const int extract_size = std::min (vec_lanes - start, slice_size);
1965+ Value *extracted = extract_scalable_vector (vec, start, extract_size);
1966+ if (slice_size == extract_size) {
1967+ return extracted;
1968+ } else {
1969+ Value *sliced = UndefValue::get (get_vector_type_from_value (vec, slice_size));
1970+ sliced = insert_scalable_vector (sliced, extracted, 0 );
1971+ return sliced;
1972+ }
1973+ }
1974+ }
1975+
1976+ Value *CodeGen_ARM::extract_scalable_vector (Value *vec, int start, int extract_size) {
1977+ internal_assert (target_vscale () > 0 && is_scalable_vector (vec));
1978+ internal_assert (start + extract_size <= get_vector_num_elements (vec->getType ())); // No overrun
1979+
1980+ if (extract_size == 1 ) {
1981+ return builder->CreateExtractElement (vec, ConstantInt::get (i64_t , start, true ));
1982+ } else {
1983+ // To follow the requirement of ‘llvm.vector.extract’ intrinsic that
1984+ // idx must be a constant multiple of the known-minimum vector length of the result type,
1985+ // the extraction is performed as multiple sub-extraction, where the worst case is extraction of scalar.
1986+ std::vector<Value *> sub_slices;
1987+ int i = 0 ;
1988+ while (i < extract_size) {
1989+ int sub_extract_pos = start + i;
1990+ for (int sub_extract_size = extract_size - i; sub_extract_size > 0 ; --sub_extract_size) {
1991+ if (sub_extract_pos % sub_extract_size == 0 ) {
1992+ internal_assert (sub_extract_pos % target_vscale () == 0 );
1993+ Value *sub_extracted;
1994+ if (sub_extract_size == 1 ) {
1995+ sub_extracted = builder->CreateExtractElement (vec, sub_extract_pos);
1996+ } else {
1997+ // In vector operation, index needs to be normalized by vscale
1998+ Value *idx_val = ConstantInt::get (i64_t , sub_extract_pos / target_vscale (), true );
1999+ llvm::Type *sub_extract_type = get_vector_type_from_value (vec, sub_extract_size);
2000+ sub_extracted = builder->CreateExtractVector (sub_extract_type, vec, idx_val);
2001+ }
2002+ sub_slices.push_back (sub_extracted);
2003+
2004+ i += sub_extract_size;
2005+ break ;
2006+ }
2007+ }
2008+ }
2009+ Value *extracted = concat_vectors (sub_slices);
2010+ return extracted;
2011+ }
2012+ }
2013+
2014+ Value *CodeGen_ARM::insert_scalable_vector (Value *base_vec, Value *new_vec, int start) {
2015+ const int base_lanes = get_vector_num_elements (base_vec->getType ());
2016+ const int new_vec_lanes = get_vector_num_elements (new_vec->getType ());
2017+ llvm::Type *element_type = get_vector_element_type (base_vec->getType ());
2018+
2019+ internal_assert (start + new_vec_lanes <= base_lanes);
2020+
2021+ if (base_lanes == 1 && new_vec_lanes == 1 ) {
2022+ return new_vec;
2023+ }
2024+
2025+ internal_assert (target_vscale () > 0 && is_scalable_vector (base_vec));
2026+
2027+ if (!new_vec->getType ()->isVectorTy ()) {
2028+ return builder->CreateInsertElement (base_vec, new_vec, start);
2029+ } else if (start % new_vec_lanes == 0 ) {
2030+ // Most of the ordinal use cases are this pattern
2031+ // In vector operation, index needs to be normalized by vscale
2032+ Value *val_start_index = ConstantInt::get (i64_t , start / target_vscale (), true );
2033+ return builder->CreateInsertVector (base_vec->getType (), base_vec, new_vec, val_start_index);
2034+ }
2035+
2036+ // To follow the requirement of ‘llvm.vector.insert’ intrinsic that
2037+ // idx must be a constant multiple of subvec’s known minimum vector length,
2038+ // insertion is performed in multiple sub slices.
2039+ Value *ret = base_vec;
2040+ int extract_index = 0 ;
2041+ int insert_index = start;
2042+ int sub_slice_size = std::min (start, new_vec_lanes);
2043+
2044+ while (extract_index < new_vec_lanes) {
2045+ if (extract_index + sub_slice_size <= new_vec_lanes && // Condition to not overrun
2046+ extract_index % sub_slice_size == 0 && // Requirement of LLVM intrinsic
2047+ insert_index % sub_slice_size == 0 ) { // Requirement of LLVM intrinsic
2048+
2049+ internal_assert (extract_index % target_vscale () == 0 );
2050+ internal_assert (insert_index % target_vscale () == 0 );
2051+
2052+ if (sub_slice_size == 1 ) {
2053+ Value *sub_slice = builder->CreateExtractElement (new_vec, extract_index);
2054+ ret = builder->CreateInsertElement (ret, sub_slice, insert_index);
2055+ } else {
2056+ // In vector operation, index needs to be normalized by vscale
2057+ Value *val_extract_index = ConstantInt::get (i64_t , extract_index / target_vscale (), true );
2058+ Value *val_insert_index = ConstantInt::get (i64_t , insert_index / target_vscale (), true );
2059+ llvm::Type *sub_sliced_type = get_vector_type (element_type, sub_slice_size);
2060+ Value *sub_slice = builder->CreateExtractVector (sub_sliced_type, new_vec, val_extract_index);
2061+ ret = builder->CreateInsertVector (base_vec->getType (), ret, sub_slice, val_insert_index);
2062+ }
2063+ insert_index += sub_slice_size;
2064+ extract_index += sub_slice_size;
2065+ } else {
2066+ // move on to next candidate
2067+ --sub_slice_size;
2068+ }
2069+ }
2070+ return ret;
2071+ }
2072+
19002073Value *CodeGen_ARM::interleave_vectors (const std::vector<Value *> &vecs) {
19012074 if (simd_intrinsics_disabled () || target_vscale () == 0 ||
19022075 vecs.size () < 2 ||
0 commit comments