[MLIR][XeGPU] Add distribution pattern for convertLayoutOp (#184826)

This commit is contained in:
Nishant Patel
2026-03-12 20:55:38 -07:00
committed by GitHub
parent 1cf130d3f1
commit e51e9afe68
2 changed files with 35 additions and 2 deletions

View File

@@ -828,6 +828,26 @@ struct SgToWiStoreScatter : public OpConversionPattern<xegpu::StoreScatterOp> {
}
};
/// Folds a subgroup-level ConvertLayout op with compatible lane layouts.
struct SgToWiConvertLayout
: public OpConversionPattern<xegpu::ConvertLayoutOp> {
using OpConversionPattern<xegpu::ConvertLayoutOp>::OpConversionPattern;
LogicalResult
matchAndRewrite(xegpu::ConvertLayoutOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
auto inputLayout = op.getInputLayoutAttr();
auto targetLayout = op.getTargetLayoutAttr();
if (!inputLayout.isCompatibleWith(targetLayout, xegpu::LayoutKind::Lane)) {
return rewriter.notifyMatchFailure(
op, "lowering incompatible convert_layout not yet supported");
}
rewriter.replaceOp(op, adaptor.getSource());
return success();
}
};
struct XeGPUSgToWiDistributeExperimentalPass
: public xegpu::impl::XeGPUSgToWiDistributeExperimentalBase<
XeGPUSgToWiDistributeExperimentalPass> {
@@ -1033,6 +1053,6 @@ void xegpu::populateXeGPUSgToWiDistributeTypeConversionAndLegality(
patterns.add<SgToWiCreateNdDesc, SgToWiLoadNd, SgToWiStoreNd, SgToWiDpas,
SgToWiElementWise, SgToWiArithConstant, SgToWiPrefetchNd,
SgToWiLoadGather, SgToWiStoreScatter, SgToWiVectorReduction,
SgToWiMultiDimReduction, SgToWiLoadMatrix, SgToWiStoreMatrix>(
typeConverter, patterns.getContext());
SgToWiMultiDimReduction, SgToWiLoadMatrix, SgToWiStoreMatrix,
SgToWiConvertLayout>(typeConverter, patterns.getContext());
}

View File

@@ -460,6 +460,19 @@ gpu.func @vector_multi_reduction_dim0_distributed_dim1_reduction(%laneid: index)
[1] : vector<16x12xf32> to vector<16xf32>
gpu.return
}
// CHECK-LABEL: gpu.func @convert_layout_removed_when_compatible
// CHECK-NOT: xegpu.convert_layout
gpu.func @convert_layout_removed_when_compatible() {
%0 = "some_op"()
{layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>}
: () -> vector<16xf32>
%1 = xegpu.convert_layout %0
<{input_layout = #xegpu.layout<lane_layout = [16], lane_data = [1]>,
target_layout = #xegpu.slice<#xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>, dims = [0]>}>
: vector<16xf32>
gpu.return
}
}
// -----