[MLIR][XeGPU] Add distribution pattern for convertLayoutOp (#184826)

2026-03-12 20:55:38 -07:00
parent 1cf130d3f1
commit e51e9afe68
2 changed files with 35 additions and 2 deletions
--- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSgToWiDistributeExperimental.cpp
+++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSgToWiDistributeExperimental.cpp
@@ -828,6 +828,26 @@ struct SgToWiStoreScatter : public OpConversionPattern<xegpu::StoreScatterOp> {
  }
 };

+/// Folds a subgroup-level ConvertLayout op with compatible lane layouts.
+struct SgToWiConvertLayout
+    : public OpConversionPattern<xegpu::ConvertLayoutOp> {
+  using OpConversionPattern<xegpu::ConvertLayoutOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(xegpu::ConvertLayoutOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto inputLayout = op.getInputLayoutAttr();
+    auto targetLayout = op.getTargetLayoutAttr();
+
+    if (!inputLayout.isCompatibleWith(targetLayout, xegpu::LayoutKind::Lane)) {
+      return rewriter.notifyMatchFailure(
+          op, "lowering incompatible convert_layout not yet supported");
+    }
+    rewriter.replaceOp(op, adaptor.getSource());
+    return success();
+  }
+};
+
 struct XeGPUSgToWiDistributeExperimentalPass
    : public xegpu::impl::XeGPUSgToWiDistributeExperimentalBase<
          XeGPUSgToWiDistributeExperimentalPass> {
@@ -1033,6 +1053,6 @@ void xegpu::populateXeGPUSgToWiDistributeTypeConversionAndLegality(
  patterns.add<SgToWiCreateNdDesc, SgToWiLoadNd, SgToWiStoreNd, SgToWiDpas,
               SgToWiElementWise, SgToWiArithConstant, SgToWiPrefetchNd,
               SgToWiLoadGather, SgToWiStoreScatter, SgToWiVectorReduction,
-               SgToWiMultiDimReduction, SgToWiLoadMatrix, SgToWiStoreMatrix>(
-      typeConverter, patterns.getContext());
+               SgToWiMultiDimReduction, SgToWiLoadMatrix, SgToWiStoreMatrix,
+               SgToWiConvertLayout>(typeConverter, patterns.getContext());
 }
--- a/mlir/test/Dialect/XeGPU/sg-to-wi-experimental-unit.mlir
+++ b/mlir/test/Dialect/XeGPU/sg-to-wi-experimental-unit.mlir
@@ -460,6 +460,19 @@ gpu.func @vector_multi_reduction_dim0_distributed_dim1_reduction(%laneid: index)
      [1] : vector<16x12xf32> to vector<16xf32>
  gpu.return
 }
+
+// CHECK-LABEL: gpu.func @convert_layout_removed_when_compatible
+// CHECK-NOT: xegpu.convert_layout
+gpu.func @convert_layout_removed_when_compatible() {
+  %0 = "some_op"()
+    {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>}
+    : () -> vector<16xf32>
+  %1 = xegpu.convert_layout %0
+    <{input_layout = #xegpu.layout<lane_layout = [16], lane_data = [1]>,
+    target_layout = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [0]>}>
+    : vector<16xf32>
+  gpu.return
+}
 }

 // -----