diff --git a/examples/xegpu_matmul/matmul.py b/examples/xegpu_matmul/matmul.py index d86478e..277f9c5 100644 --- a/examples/xegpu_matmul/matmul.py +++ b/examples/xegpu_matmul/matmul.py @@ -324,8 +324,6 @@ def parse_cli(): "bufferized", "xegpu-initial", "xegpu-wg", - "xegpu-sg", - "xegpu-inst", "final", ], help="Dump kernel IR at different stages of lowering.", diff --git a/examples/xegpu_matmul/schedule.py b/examples/xegpu_matmul/schedule.py index 5a7133b..1dac3d9 100644 --- a/examples/xegpu_matmul/schedule.py +++ b/examples/xegpu_matmul/schedule.py @@ -381,12 +381,7 @@ def convert_layout(value, input, target): def bundle_xegpu_to_binary(mod, stop_at_stage: str = "") -> ir.Module: """Schedule for lowering xegpu wg level to binary.""" - # This schedule corresponds to upstream MLIR XeVM lowering pipeline - # and is payload independent. - - # This pipeline causes performance regression with the existing - # xegpu transform ops. - # FIXME Use anchor layouts in transform ops. + # upstream xegpu/xevm pipeline is payload independent. mod = apply_registered_pass( mod, "gpu-lower-to-xevm-pipeline", options={"xegpu-op-level": "workgroup"} )