diff --git a/examples/xegpu_matmul/matmul.py b/examples/xegpu_matmul/matmul.py
index d86478e..277f9c5 100644
--- a/examples/xegpu_matmul/matmul.py
+++ b/examples/xegpu_matmul/matmul.py
@@ -324,8 +324,6 @@ def parse_cli():
             "bufferized",
             "xegpu-initial",
             "xegpu-wg",
-            "xegpu-sg",
-            "xegpu-inst",
             "final",
         ],
         help="Dump kernel IR at different stages of lowering.",
diff --git a/examples/xegpu_matmul/schedule.py b/examples/xegpu_matmul/schedule.py
index 5a7133b..1dac3d9 100644
--- a/examples/xegpu_matmul/schedule.py
+++ b/examples/xegpu_matmul/schedule.py
@@ -381,12 +381,7 @@ def convert_layout(value, input, target):
 
 def bundle_xegpu_to_binary(mod, stop_at_stage: str = "") -> ir.Module:
     """Schedule for lowering xegpu wg level to binary."""
-    # This schedule corresponds to upstream MLIR XeVM lowering pipeline
-    # and is payload independent.
-
-    # This pipeline causes performance regression with the existing
-    # xegpu transform ops.
-    # FIXME Use anchor layouts in transform ops.
+    # upstream xegpu/xevm pipeline is payload independent.
     mod = apply_registered_pass(
         mod, "gpu-lower-to-xevm-pipeline", options={"xegpu-op-level": "workgroup"}
     )