From 87ef6e83193d4f843b465da442e2086927de0d4d Mon Sep 17 00:00:00 2001 From: Tuomas Karna Date: Mon, 29 Dec 2025 19:24:20 +0200 Subject: [PATCH 1/4] use anchor layouts + propagation in schedule, do not set desc layout --- examples/xegpu_matmul/schedule.py | 62 ++++++++++++++++--------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/examples/xegpu_matmul/schedule.py b/examples/xegpu_matmul/schedule.py index 1dac3d9..535e2c7 100644 --- a/examples/xegpu_matmul/schedule.py +++ b/examples/xegpu_matmul/schedule.py @@ -271,22 +271,27 @@ def convert_layout(value, input, target): tile_a, nb_prefetch=nb_prefetch, ) - xegpu.set_desc_layout( - desc_prefetch_a, - sg_layout=prefetch_layout_a, - sg_data=prefetch_tile_a, - inst_data=prefetch_inst_data, - ) + layout_prefetch_a = { + "sg_layout": prefetch_layout_a, + "sg_data": prefetch_tile_a, + "inst_data": prefetch_inst_data, + } + pf_ops = transform.get_consumers_of_result(anytype, desc_prefetch_a, 0) + for pf in transform.split_handle((anytype,) * (nb_prefetch + 1), pf_ops): + xegpu.set_op_layout_attr(pf, **layout_prefetch_a) + desc_prefetch_b = xegpu.insert_prefetch( tile_b, nb_prefetch=nb_prefetch, ) - xegpu.set_desc_layout( - desc_prefetch_b, - sg_layout=prefetch_layout_b, - sg_data=prefetch_tile_b, - inst_data=prefetch_inst_data, - ) + layout_prefetch_b = { + "sg_layout": prefetch_layout_b, + "sg_data": prefetch_tile_b, + "inst_data": prefetch_inst_data, + } + pf_ops = transform.get_consumers_of_result(anytype, desc_prefetch_b, 0) + for pf in transform.split_handle((anytype,) * (nb_prefetch + 1), pf_ops): + xegpu.set_op_layout_attr(pf, **layout_prefetch_b) # A tile load layout layout_load_a = { @@ -295,10 +300,9 @@ def convert_layout(value, input, target): "inst_data": load_tile_a, } desc_op_a = xegpu.get_desc_op(tile_a) - desc_op_a = xegpu.set_desc_layout( - target=desc_op_a, - **layout_load_a, - ) + # A tile load op anchor layout + load_op_a = transform.get_consumers_of_result(anytype, desc_op_a, 0) + xegpu.set_op_layout_attr(load_op_a, **layout_load_a) # A tile dpas layout layout_dpas_a = layout_load_a.copy() layout_dpas_a["inst_data"] = dpas_shape_a @@ -311,10 +315,9 @@ def convert_layout(value, input, target): "inst_data": load_tile_b, } desc_op_b = xegpu.get_desc_op(tile_b) - desc_op_b = xegpu.set_desc_layout( - target=desc_op_b, - **layout_load_b, - ) + # B tile load op anchor layout + load_op_b = transform.get_consumers_of_result(anytype, desc_op_b, 0) + xegpu.set_op_layout_attr(load_op_b, **layout_load_b) # B tile dpas layout layout_dpas_b = layout_load_b.copy() layout_dpas_b["inst_data"] = dpas_shape_b @@ -327,17 +330,15 @@ def convert_layout(value, input, target): "inst_data": dpas_shape_c, } desc_op_c = xegpu.get_desc_op(tile_c) - desc_op_c = xegpu.set_desc_layout(desc_op_c, **output_layout) - # C tile dpas layout - xegpu.set_op_layout_attr(dpas_op, result=True, index=0, **output_layout) + # C tile load/store op anchor layout + desc_c_users = transform.get_consumers_of_result(anytype, desc_op_c, 0) + load_op_c, store_op_c = transform.split_handle((anytype, anytype), desc_c_users) + xegpu.set_op_layout_attr(load_op_c, **output_layout) + # C tile dpas anchor layout + xegpu.set_op_layout_attr(dpas_op, index=0, **layout_dpas_a) + xegpu.set_op_layout_attr(dpas_op, index=1, **layout_dpas_b) + xegpu.set_op_layout_attr(dpas_op, index=2, **output_layout) - if has_relu: - # for post ops we need to add C layout manually - max_op = match(gpu_func, ops={"arith.maximumf"}) - xegpu.set_op_layout_attr(max_op, result=True, index=0, **output_layout) - # find zero constant buffer and annotate it - const_buffer = transform.get_producer_of_operand(anytype, max_op, 1) - xegpu.set_op_layout_attr(const_buffer, result=True, index=0, **output_layout) if has_bias: # for post ops we need to add C layout manually add_op = match(gpu_func, ops={"arith.addf"}) @@ -364,6 +365,7 @@ def convert_layout(value, input, target): mask = transform.get_producer_of_operand(anytype, bcast_load, 2) xegpu.set_op_layout_attr(mask, result=True, index=0, **output_layout_dim1) raise NotImplementedError("Bias layout propagation is not supported.") + transform.apply_cse(gpu_func) canonicalize(gpu_func) From 91cd14206a673e460da86c77f5a9d1d9c6e15808 Mon Sep 17 00:00:00 2001 From: Tuomas Karna Date: Fri, 13 Feb 2026 15:16:42 +0200 Subject: [PATCH 2/4] update example readme --- examples/xegpu_matmul/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/xegpu_matmul/README.md b/examples/xegpu_matmul/README.md index 9de0271..07ff181 100644 --- a/examples/xegpu_matmul/README.md +++ b/examples/xegpu_matmul/README.md @@ -20,7 +20,7 @@ Set `LLVM_INSTALL_DIR` and use the below script to checkout and compile LLVM loc ```bash export LLVM_INSTALL_DIR=<...> -export LLVM_VERSION=83765f435d1c +export LLVM_VERSION=48566b21a485 git clone https://github.com/llvm/llvm-project.git cd llvm-project @@ -34,7 +34,6 @@ cmake ../llvm -G Ninja \ -DLLVM_BUILD_EXAMPLES=OFF \ -DLLVM_TARGETS_TO_BUILD="host" \ -DLLVM_ENABLE_ASSERTIONS=ON \ - -DLLVM_ENABLE_RTTI=ON \ -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD="SPIRV" \ -DLLVM_INSTALL_GTEST=ON \ -DMLIR_ENABLE_LEVELZERO_RUNNER=1 \ From 61abcce2ac3f4aed4367fcc6daf2078f0bdbd3d8 Mon Sep 17 00:00:00 2001 From: Tuomas Karna Date: Fri, 13 Feb 2026 17:17:19 +0200 Subject: [PATCH 3/4] simplify bias layout annotation --- examples/xegpu_matmul/schedule.py | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/examples/xegpu_matmul/schedule.py b/examples/xegpu_matmul/schedule.py index 535e2c7..b5827be 100644 --- a/examples/xegpu_matmul/schedule.py +++ b/examples/xegpu_matmul/schedule.py @@ -340,32 +340,14 @@ def convert_layout(value, input, target): xegpu.set_op_layout_attr(dpas_op, index=2, **output_layout) if has_bias: - # for post ops we need to add C layout manually + # annotate the 1d load of the broadcast op with a slice layout add_op = match(gpu_func, ops={"arith.addf"}) - xegpu.set_op_layout_attr(add_op, result=True, index=0, **output_layout) - - # annotate broadcast op operands bcast_op = transform.get_producer_of_operand(anytype, add_op, 0) - xegpu.set_op_layout_attr(bcast_op, result=True, index=0, **output_layout) bcast_load = transform.get_producer_of_operand(anytype, bcast_op, 0) xegpu.set_op_layout_attr( bcast_load, result=True, index=0, **output_layout, slice_dims=[0] ) - output_layout_dim1 = { - "sg_layout": [sg_layout[1]], - "sg_data": [sg_tile[1]], - "inst_data": [dpas_shape_c[1]], - } - offset = transform.get_producer_of_operand(anytype, bcast_load, 1) - xegpu.set_op_layout_attr(offset, result=True, index=0, **output_layout_dim1) - aux1 = transform.get_producer_of_operand(anytype, offset, 0) - xegpu.set_op_layout_attr(aux1, result=True, index=0, **output_layout_dim1) - aux2 = transform.get_producer_of_operand(anytype, offset, 1) - xegpu.set_op_layout_attr(aux2, result=True, index=0, **output_layout_dim1) - mask = transform.get_producer_of_operand(anytype, bcast_load, 2) - xegpu.set_op_layout_attr(mask, result=True, index=0, **output_layout_dim1) raise NotImplementedError("Bias layout propagation is not supported.") - transform.apply_cse(gpu_func) canonicalize(gpu_func) From 0fc99c25ad9135490d61b013c10a644ebd5ee54c Mon Sep 17 00:00:00 2001 From: Tuomas Karna Date: Sun, 15 Feb 2026 22:41:14 +0200 Subject: [PATCH 4/4] bump llvm version --- examples/xegpu_matmul/README.md | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/xegpu_matmul/README.md b/examples/xegpu_matmul/README.md index 07ff181..3acf9ad 100644 --- a/examples/xegpu_matmul/README.md +++ b/examples/xegpu_matmul/README.md @@ -20,7 +20,7 @@ Set `LLVM_INSTALL_DIR` and use the below script to checkout and compile LLVM loc ```bash export LLVM_INSTALL_DIR=<...> -export LLVM_VERSION=48566b21a485 +export LLVM_VERSION=45bee6efe9d6 git clone https://github.com/llvm/llvm-project.git cd llvm-project diff --git a/pyproject.toml b/pyproject.toml index d81e33a..9814bee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "lighthouse" dynamic = ["version"] requires-python = ">=3.10,<3.13" # Bounds are due to torch-mlir's packaging dependencies = [ - "mlir-python-bindings==20260211+f932646bf" + "mlir-python-bindings==20260215+45bee6efe" ] [dependency-groups]