[AIROCMLIR-552] Added Broadcasting Linalg Lowering Path (#2270)

Mr-Anyone · web-flow · commit a8ae8acacbd0 · 2026-03-06T16:10:49.000-05:00
* [AIROCMLIR-552] Added broadcasting

* Address comments
diff --git a/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp b/mlir/lib/Conversion/MIGraphXToLinalg/MIGraphXToLinalg.cpp
@@ -376,6 +376,253 @@ ClipConverter::matchAndRewrite(migraphx::ClipOp op, OpAdaptor adaptor,
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// Tensor views and shape manipulation
+//===----------------------------------------------------------------------===//
+namespace {
+struct BroadcastConverter final
+    : public OpConversionPattern<migraphx::BroadcastOp> {
+  using OpConversionPattern<migraphx::BroadcastOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(migraphx::BroadcastOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final;
+};
+
+struct MultiBroadcastConverter final
+    : public OpConversionPattern<migraphx::MultiBroadcastOp> {
+  using OpConversionPattern<migraphx::MultiBroadcastOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(migraphx::MultiBroadcastOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final;
+};
+} // namespace
+
+/// Reshape the input Value into a new RankedTensorType with newShape
+/// The input must have type RankedTensorType.
+static Value reshapeValue(ConversionPatternRewriter &rewriter, Value input,
+                          ArrayRef<int64_t> newShape) {
+  // Although there is a tensor.reshape op, we use tensor.collapse_shape
+  // and tensor.expand_shape since rock-view-to-transform pass doesn't
+  // support tensor.reshape
+  RankedTensorType currentType = cast<RankedTensorType>(input.getType());
+  Location loc = input.getLoc();
+  int64_t inputRank = currentType.getRank();
+  int64_t outputRank = static_cast<int64_t>(newShape.size());
+
+  if (currentType.getShape() == newShape) {
+    return input;
+  }
+
+  SmallVector<ReassociationIndices> collapseReassociation(1);
+  SmallVector<ReassociationIndices> expandReassociation(1);
+  collapseReassociation[0].resize(inputRank);
+  expandReassociation[0].resize(outputRank);
+  std::iota(collapseReassociation[0].begin(), collapseReassociation[0].end(),
+            0);
+  std::iota(expandReassociation[0].begin(), expandReassociation[0].end(), 0);
+  input = tensor::CollapseShapeOp::create(rewriter, loc, input,
+                                          collapseReassociation);
+  if (cast<RankedTensorType>(input.getType()).getShape() == newShape) {
+    return input;
+  }
+  RankedTensorType resultType =
+      RankedTensorType::get(newShape, currentType.getElementType());
+  input = tensor::ExpandShapeOp::create(rewriter, loc, resultType, input,
+                                        expandReassociation);
+  return input;
+}
+
+LogicalResult
+BroadcastConverter::matchAndRewrite(migraphx::BroadcastOp op, OpAdaptor adaptor,
+                                    ConversionPatternRewriter &rewriter) const {
+  Location loc = op.getLoc();
+  migraphx::MIXRShapedType input = op.getInput().getType();
+  migraphx::MIXRShapedType output = op.getOutput().getType();
+
+  RankedTensorType outputType =
+      dyn_cast<RankedTensorType>(getTypeConverter()->convertType(output));
+  if (!outputType) {
+    return op.emitError("cannot convert output type to ranked tensor type");
+  }
+
+  uint64_t axis = op.getAxis();
+  uint64_t outputRank = output.getRank();
+
+  uint64_t inputRank = input.getRank();
+  SmallVector<int64_t, 4> dimensionAttr;
+  llvm::transform(llvm::seq<int64_t>(0, axis),
+                  std::back_inserter(dimensionAttr),
+                  [](int64_t val) { return val; });
+  for (auto [index, dim] : llvm::enumerate(input.getShape())) {
+    // the one in the input dimension can also be broadcasted
+    if (dim == 1) {
+      dimensionAttr.push_back(index + axis);
+    }
+  }
+  llvm::transform(llvm::seq<int64_t>(axis + inputRank, outputRank),
+                  std::back_inserter(dimensionAttr),
+                  [](int64_t val) { return val; });
+
+  // We have to remove the one dimension because it is possible that we are
+  // broadcasting that to a different dimension
+  auto reshaped =
+      reshapeValue(rewriter, adaptor.getInput(),
+                   llvm::filter_to_vector(
+                       input.getShape(), [](int64_t val) { return val != 1; }));
+  auto init = tensor::EmptyOp::create(rewriter, loc, outputType.getShape(),
+                                      outputType.getElementType());
+  auto result =
+      linalg::BroadcastOp::create(rewriter, loc, reshaped, init, dimensionAttr);
+  rewriter.replaceOp(op, result);
+
+  return success();
+}
+
+LogicalResult MultiBroadcastConverter::matchAndRewrite(
+    migraphx::MultiBroadcastOp op, OpAdaptor adaptor,
+    ConversionPatternRewriter &rewriter) const {
+  Location loc = op->getLoc();
+  migraphx::MIXRShapedType outMIXRType = op.getOutput().getType();
+  RankedTensorType outType =
+      cast<RankedTensorType>(getTypeConverter()->convertType(outMIXRType));
+  ArrayRef<int64_t> outShape = outType.getShape();
+  ArrayRef<int64_t> outStrides = outMIXRType.getStrides();
+  uint32_t inRank =
+      cast<RankedTensorType>(adaptor.getInput().getType()).getRank();
+  uint32_t outRank = outType.getRank();
+  Type elemType = outType.getElementType();
+
+  assert(outRank >= inRank && "MultiBroadcastOp shouldn't reduce rank. This "
+                              "should be an invariant of this operation");
+
+  // If it's a splat constant, broadcast it trivially
+  if (auto constOp = adaptor.getInput().getDefiningOp<arith::ConstantOp>()) {
+    if (auto denseAttr = dyn_cast<DenseElementsAttr>(constOp.getValue())) {
+      if (denseAttr && denseAttr.isSplat()) {
+        auto bcastConstAttr = DenseElementsAttr::get(
+            outType, denseAttr.getSplatValue<Attribute>());
+        rewriter.replaceOpWithNewOp<arith::ConstantOp>(op, outType,
+                                                       bcastConstAttr);
+        return success();
+      }
+    }
+  }
+
+  // Determine broadcast dimensions (stride == 0) and non-broadcast shape
+  SmallVector<int64_t, 4> broadcastDimensions;
+  SmallVector<int64_t, 4> nonBroadcastShape;
+  for (auto [i, stride, shape] : llvm::enumerate(outStrides, outShape)) {
+    if (stride == 0) {
+      broadcastDimensions.push_back(i);
+    } else {
+      nonBroadcastShape.push_back(shape);
+    }
+  }
+
+  // If no dimensions need broadcasting, just reshape to match output shape
+  if (broadcastDimensions.empty()) {
+    Value result = reshapeValue(rewriter, adaptor.getInput(), outShape);
+    rewriter.replaceOp(op, result);
+    return success();
+  }
+
+  // Reshape input to match the non-broadcast dimensions of the output
+  Value input = reshapeValue(rewriter, adaptor.getInput(), nonBroadcastShape);
+
+  auto init = tensor::EmptyOp::create(rewriter, loc, outShape, elemType);
+  auto result = linalg::BroadcastOp::create(rewriter, loc, input, init,
+                                            broadcastDimensions);
+  rewriter.replaceOp(op, result);
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// Misc. ops
+//===----------------------------------------------------------------------===//
+namespace {
+struct LiteralConverter final
+    : public OpConversionPattern<migraphx::LiteralOp> {
+  using OpConversionPattern<migraphx::LiteralOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(migraphx::LiteralOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const final;
+};
+} // namespace
+
+LogicalResult
+LiteralConverter::matchAndRewrite(migraphx::LiteralOp op, OpAdaptor adaptor,
+                                  ConversionPatternRewriter &rewriter) const {
+  migraphx::MIXRShapedType type = op.getResult().getType();
+  RankedTensorType newType =
+      dyn_cast<RankedTensorType>(getTypeConverter()->convertType(type));
+  if (!newType) {
+    return op.emitError("expected RankedTensorType as output");
+  }
+
+  ElementsAttr value = op.getValue();
+  if (value.getType() != newType) {
+    if (value.isSplat()) {
+      // Get the original splat value (for example SI8 value)
+      Attribute splatValue = value.getSplatValue<Attribute>();
+
+      // Reinterpret the splatValue under the new type (for example SI8 -> I8),
+      // preserving bytes
+      Attribute newSplatValue;
+      if (auto intAttr = dyn_cast<IntegerAttr>(splatValue))
+        newSplatValue =
+            IntegerAttr::get(newType.getElementType(), intAttr.getValue());
+      else if (auto floatAttr = dyn_cast<FloatAttr>(splatValue))
+        newSplatValue =
+            FloatAttr::get(newType.getElementType(), floatAttr.getValue());
+      else if (auto boolAttr = dyn_cast<BoolAttr>(splatValue))
+        // Convert BoolAttr into IntegerAttr so we don't run target
+        // materialization for type conversion. Match the result type of
+        // TypeConverter
+        newSplatValue =
+            IntegerAttr::get(newType.getElementType(), boolAttr.getValue());
+      else
+        return failure();
+
+      // Create the new SplatElementsAttr (for example I8 type) with preserved
+      // value bytes
+      value = SplatElementsAttr::get(newType, newSplatValue);
+    } else {
+      // For non-splat attributes, we need to convert each element to the new
+      // type
+      SmallVector<Attribute> convertedElements;
+      convertedElements.reserve(value.getNumElements());
+
+      for (auto it : value.getValues<Attribute>()) {
+        Attribute convertedElement;
+        if (auto intAttr = dyn_cast<IntegerAttr>(it))
+          convertedElement =
+              IntegerAttr::get(newType.getElementType(), intAttr.getValue());
+        else if (auto floatAttr = dyn_cast<FloatAttr>(it))
+          convertedElement =
+              FloatAttr::get(newType.getElementType(), floatAttr.getValue());
+        else if (auto boolAttr = dyn_cast<BoolAttr>(it))
+          // Convert BoolAttr into IntegerAttr so we don't run target
+          // materialization for type conversion. Match the result type of
+          // TypeConverter
+          convertedElement =
+              IntegerAttr::get(newType.getElementType(), boolAttr.getValue());
+        else
+          return failure();
+
+        convertedElements.push_back(convertedElement);
+      }
+
+      value = DenseElementsAttr::get(newType, convertedElements);
+    }
+  }
+
+  rewriter.replaceOpWithNewOp<arith::ConstantOp>(op, newType, value);
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // populateMIGraphXToLinalg* method
 //===----------------------------------------------------------------------===//
@@ -396,7 +643,9 @@ void mlir::migraphx::populateMIGraphXToLinalgConversionPatterns(
            ElementwiseConverter<migraphx::SqrtOp, linalg::SqrtOp>,
            ElementwiseConverter<migraphx::TanhOp, linalg::TanhOp>,
            ElementwiseConverter<migraphx::RecipOp, linalg::ReciprocalOp>,
-           ReluConverter, ClipConverter>(converter, patterns.getContext());
+           ReluConverter, ClipConverter, BroadcastConverter,
+           MultiBroadcastConverter, LiteralConverter>(converter,
+                                                      patterns.getContext());
 }
 
 void mlir::migraphx::populateMIGraphXFuncBoundaryToLinalgConversionPatterns(
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-not-implemented.mlir b/mlir/test/Conversion/MIGraphXToLinalg/migraphx-to-linalg-not-implemented.mlir
@@ -109,18 +109,6 @@ func.func @func_slice(%arg0: !migraphx.shaped<1x1xf32, 1x1>, %arg1: !migraphx.sh
   func.return
 }
 
-func.func @func_broadcast(%arg0: !migraphx.shaped<1x1xf32, 1x1>, %arg1: !migraphx.shaped<1x1xf32, 1x1>) {
-  // expected-error @+1{{failed to legalize operation 'migraphx.broadcast'}}
-  migraphx.broadcast %arg0 {axis = 0 : i64, out_lens = [1, 1]}: <1x1xf32, 1x1> -> <1x1xf32, 1x1>
-  func.return
-}
-
-func.func @func_multibroadcast(%arg0: !migraphx.shaped<1x1xi8, 1x1>, %arg1: !migraphx.shaped<1x1xf32, 1x1>) {
-  // expected-error @+1{{failed to legalize operation 'migraphx.multibroadcast'}}
-  migraphx.multibroadcast %arg0 {out_lens = [1, 1]}: <1x1xi8, 1x1> -> <1x1xi8, 1x1>
-  func.return
-}
-
 func.func @func_quant_dot(%arg0: !migraphx.shaped<1x1xf8E4M3FN, 1x1>, %arg1: !migraphx.shaped<1x1xf8E4M3FN, 1x1>) {
   // expected-error @+1{{failed to legalize operation 'migraphx.quant_dot'}}
   migraphx.quant_dot %arg0, %arg1: <1x1xf8E4M3FN, 1x1>, <1x1xf8E4M3FN, 1x1> -> <1x1xf32, 1x1>
diff --git a/mlir/test/Conversion/MIGraphXToLinalg/mixr-to-linalg-ops.mlir b/mlir/test/Conversion/MIGraphXToLinalg/mixr-to-linalg-ops.mlir
@@ -151,3 +151,91 @@ func.func @clip_i32(%arg0: !migraphx.shaped<64x64xi32, 64x1>, %arg1: !migraphx.s
   %0 = migraphx.clip %arg0, %arg1, %arg2 : <64x64xi32, 64x1>, <64x64xi32, 64x1>, <64x64xi32, 64x1> -> <64x64xi32, 64x1>
   return %0 : !migraphx.shaped<64x64xi32, 64x1>
 }
+
+  // Literal/Broadcasting test
+
+// CHECK-LABEL: @matmul_broadcast_op(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor{{.*}}, %[[arg2:.*]]: tensor{{.*}})
+// CHECK-DAG:  %[[expanded:.*]] = tensor.expand_shape %[[arg0]] {{.*}} into tensor<64x64x2304xf16>
+// CHECK-DAG:  %[[expanded_0:.*]] = tensor.expand_shape %[[arg1]] {{.*}} into tensor<64x64x768xf16>
+// CHECK-DAG:  %[[expanded_1:.*]] = tensor.expand_shape %[[arg2]] {{.*}} into tensor<1x768x2304xf16>
+// CHECK-DAG:  %[[collapsed:.*]] = tensor.collapse_shape %[[expanded_1]] {{.*}} into tensor<1769472xf16>
+// CHECK-DAG:  %[[expanded_2:.*]] = tensor.expand_shape %[[collapsed]] {{.*}} into tensor<768x2304xf16>
+// CHECK-DAG:  %[[broadcasted:.*]] = linalg.broadcast ins(%[[expanded_2]] : tensor<768x2304xf16>) outs({{.*}} : tensor<64x768x2304xf16>) dimensions = [0]
+// CHECK-DAG:  %[[cst:.*]] = arith.constant dense<0.000000e+00> : tensor<64x64x2304xf16>
+// CHECK-DAG:  %[[matmul:.*]] = linalg.batch_matmul ins(%[[expanded_0]], %[[broadcasted]] : {{.*}}) outs(%[[cst]] : {{.*}})
+// CHECK-DAG:  %[[add:.*]] = linalg.add ins(%[[matmul]], %[[expanded]] : {{.*}}) outs({{.*}})
+// CHECK-DAG:  %[[collapsed_3:.*]] = tensor.collapse_shape %[[add]]
+// CHECK-DAG:  return %[[collapsed_3]]
+func.func @matmul_broadcast_op(%arg0: !migraphx.shaped<64x64x2304xf16, 147456x2304x1>, %arg1: !migraphx.shaped<64x64x768xf16, 49152x768x1>, %arg2: !migraphx.shaped<1x768x2304xf16, 1769472x2304x1>) -> !migraphx.shaped<64x64x2304xf16, 147456x2304x1> {
+  %0 = migraphx.broadcast %arg2 {axis = 0, out_lens = [64, 768, 2304]} : <1x768x2304xf16, 1769472x2304x1> -> <64x768x2304xf16, 0x2304x1>
+  %1 = migraphx.dot %arg1, %0 : <64x64x768xf16, 49152x768x1>, <64x768x2304xf16, 0x2304x1> -> <64x64x2304xf16, 147456x2304x1>
+  %2 = migraphx.add %1, %arg0 : <64x64x2304xf16, 147456x2304x1>, <64x64x2304xf16, 147456x2304x1> -> <64x64x2304xf16, 147456x2304x1>
+  return %2 : !migraphx.shaped<64x64x2304xf16, 147456x2304x1>
+}
+
+// CHECK-LABEL: @mbcast_add(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}}, %[[arg1:.*]]: tensor{{.*}})
+// CHECK-DAG:  %[[expanded:.*]] = tensor.expand_shape %[[arg0]] {{.*}} into tensor<1x64x112x112xf32>
+// CHECK-DAG:  %[[expanded_0:.*]] = tensor.expand_shape %[[arg1]] {{.*}} into tensor<1x64x1x1xf32>
+// CHECK-DAG:  %[[collapsed:.*]] = tensor.collapse_shape %[[expanded_0]] {{.*}} into tensor<64xf32>
+// CHECK-DAG:  %[[broadcasted:.*]] = linalg.broadcast ins(%[[collapsed]] : tensor<64xf32>) outs({{.*}} : tensor<1x64x112x112xf32>) dimensions = [0, 2, 3]
+// CHECK-DAG:  %[[add:.*]] = linalg.add ins(%[[expanded]], %[[broadcasted]] : {{.*}}) outs({{.*}})
+// CHECK-DAG:  %[[collapsed_2:.*]] = tensor.collapse_shape %[[add]]
+// CHECK-DAG:  return %[[collapsed_2]]
+func.func @mbcast_add(
+    %arg0: !migraphx.shaped<1x64x112x112xf32, 802816x12544x112x1>,
+    %arg1: !migraphx.shaped<1x64x1x1xf32, 64x1x1x1>
+) -> !migraphx.shaped<1x64x112x112xf32, 802816x12544x112x1> {
+  %0 = migraphx.multibroadcast %arg1 {out_lens = [1, 64, 112, 112]} : <1x64x1x1xf32, 64x1x1x1> -> <1x64x112x112xf32, 0x1x0x0>
+  %1 = migraphx.add %arg0, %0 : <1x64x112x112xf32, 802816x12544x112x1>, <1x64x112x112xf32, 0x1x0x0> -> <1x64x112x112xf32, 802816x12544x112x1>
+  return %1 : !migraphx.shaped<1x64x112x112xf32, 802816x12544x112x1>
+
+}
+// CHECK-LABEL: @literal_splat_f32()
+// CHECK-DAG:  %[[cst:.*]] = arith.constant dense<0.000000e+00> : tensor<4x3xf32>
+// CHECK-DAG:  %[[collapsed:.*]] = tensor.collapse_shape %[[cst]]
+// CHECK-DAG:  return %[[collapsed]]
+func.func @literal_splat_f32() -> !migraphx.shaped<4x3xf32, 3x1> {
+  %0 = migraphx.literal (dense<0.0> : tensor<4x3xf32>) : <4x3xf32, 3x1>
+  return %0 : !migraphx.shaped<4x3xf32, 3x1>
+}
+
+// CHECK-LABEL: @literal(
+// CHECK-SAME: %[[arg0:.*]]: tensor{{.*}})
+// CHECK-DAG:  %[[cst:.*]] = arith.constant dense<1.000000e+00> : tensor<16xf32>
+// CHECK-DAG:  %[[collapsed:.*]] = tensor.collapse_shape %[[cst]]
+// CHECK-DAG:  return %[[collapsed]]
+func.func @literal(%arg0: !migraphx.shaped<16xf32, 1>) -> !migraphx.shaped<16xf32, 1> {
+  %cst = migraphx.literal (dense<1.0> : tensor<16xf32>) : <16xf32, 1>
+  return %cst : !migraphx.shaped<16xf32, 1>
+}
+
+// CHECK-LABEL: @literal_dense_si32
+// CHECK-DAG:   %[[cst:.*]] = arith.constant dense<{{.*}}> : tensor<2x2xi32>
+func.func @literal_dense_si32() -> !migraphx.shaped<2x2xsi32, 2x1> {
+  %0 = migraphx.literal (dense<[[0, 1], [2, 3]]> : tensor<2x2xsi32>) : <2x2xsi32, 2x1>
+  return %0 : !migraphx.shaped<2x2xsi32, 2x1>
+}
+
+// CHECK-LABEL: @scalar_multibroadcast_test
+// CHECK-DAG: %[[cst_0:.*]] = arith.constant dense<{{.*}}> : tensor<2x2xf32>
+// CHECK-DAG: %[[zero:.*]] = tensor.empty
+// CHECK-DAG: %[[one:.*]] = linalg.add ins(%[[cst_0]], %[[cst_0]] : {{.*}}) outs(%[[zero]] : {{.*}})
+func.func @scalar_multibroadcast_test() -> !migraphx.shaped<2x2xf32, 2x1> {
+  %test = migraphx.literal (dense<0.0> : tensor<f32>) : <f32>
+  %result = migraphx.multibroadcast %test {out_dyn_dims = [], out_lens = [2, 2]} : <f32> -> <2x2xf32, 0x0>
+  %sum = migraphx.add %result, %result : <2x2xf32, 0x0>, <2x2xf32, 0x0> -> <2x2xf32, 2x1>
+  return %sum : !migraphx.shaped<2x2xf32, 2x1>
+}
+
+// CHECK-LABEL: @scalar_broadcast_test
+// CHECK-DAG:   %[[cst:.*]] = arith.constant dense<0.000000e+00> : tensor<f32>
+// CHECK-DAG:   %[[zero:.*]] = tensor.empty()
+// CHECK-DAG:   %[[broadcasted:.*]] = linalg.broadcast ins(%[[cst]] : {{.*}}) outs(%[[zero]] : {{.*}}) dimensions = [0, 1]
+func.func @scalar_broadcast_test() -> !migraphx.shaped<2x2xf32, 2x1> {
+  %test = migraphx.literal (dense<0.0> : tensor<f32>) : <f32>
+  %result = migraphx.broadcast %test {axis = 1 : i64, out_lens = [2, 2]} : <f32> -> <2x2xf32, 0x0>
+  %sum = migraphx.add %result, %result : <2x2xf32, 0x0>, <2x2xf32, 0x0> -> <2x2xf32, 2x1>
+  return %sum : !migraphx.shaped<2x2xf32, 2x1>
+}