From b80b9f847e5899904872f424098905ca0bdafc00 Mon Sep 17 00:00:00 2001 From: joschif Date: Fri, 29 Aug 2025 16:23:06 +0200 Subject: [PATCH 01/15] perf: better wknn calculation --- src/cellflow/preprocessing/_wknn.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/cellflow/preprocessing/_wknn.py b/src/cellflow/preprocessing/_wknn.py index 222a9dcf..e5ce0172 100644 --- a/src/cellflow/preprocessing/_wknn.py +++ b/src/cellflow/preprocessing/_wknn.py @@ -134,10 +134,12 @@ def transfer_labels( ) wknn = ref_adata.uns[wknn_key] + clusters_onehot = pd.get_dummies(ref_adata.obs["Clusters"]) + clusters_mat = sparse.csr_matrix(clusters_onehot > 0).astype(int) scores = pd.DataFrame( - wknn @ pd.get_dummies(ref_adata.obs[label_key]), - columns=pd.get_dummies(ref_adata.obs[label_key]).columns, + wknn @ clusters_mat, + columns=clusters_onehot.columns, index=query_adata.obs_names, ) @@ -162,13 +164,11 @@ def _nn2adj( if n2 is None: n2 = np.max(indices.flatten()) - df = pd.DataFrame( - { - "i": np.repeat(range(indices.shape[0]), indices.shape[1]), - "j": indices.flatten(), - "x": distances.flatten(), - } - ) + df = pd.DataFrame({ + "i": np.repeat(range(indices.shape[0]), indices.shape[1]), + "j": indices.flatten(), + "x": distances.flatten(), + }) adj = sparse.csr_matrix((np.repeat(1, df.shape[0]), (df["i"], df["j"])), shape=(n1, n2)) return adj From c0adf3da2dfa8c8c949e0eecc5e3ca55842d26b3 Mon Sep 17 00:00:00 2001 From: joschif Date: Fri, 29 Aug 2025 16:35:10 +0200 Subject: [PATCH 02/15] perf: keep matrix sparse --- src/cellflow/preprocessing/_wknn.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/cellflow/preprocessing/_wknn.py b/src/cellflow/preprocessing/_wknn.py index e5ce0172..09673b9a 100644 --- a/src/cellflow/preprocessing/_wknn.py +++ b/src/cellflow/preprocessing/_wknn.py @@ -137,13 +137,9 @@ def transfer_labels( clusters_onehot = pd.get_dummies(ref_adata.obs["Clusters"]) clusters_mat = sparse.csr_matrix(clusters_onehot > 0).astype(int) - scores = pd.DataFrame( - wknn @ clusters_mat, - columns=clusters_onehot.columns, - index=query_adata.obs_names, - ) + scores = wknn @ clusters_mat - query_adata.obs[f"{label_key}_transfer"] = scores.idxmax(1) + query_adata.obs[f"{label_key}_transfer"] = scores.argmax(1) query_adata.obs[f"{label_key}_transfer_score"] = scores.max(1) if copy: From 813f3170564042bf2ca8e43e5674496c92e2e7a0 Mon Sep 17 00:00:00 2001 From: joschif Date: Fri, 29 Aug 2025 16:43:52 +0200 Subject: [PATCH 03/15] fix: idx to label --- src/cellflow/preprocessing/_wknn.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cellflow/preprocessing/_wknn.py b/src/cellflow/preprocessing/_wknn.py index 09673b9a..368a348d 100644 --- a/src/cellflow/preprocessing/_wknn.py +++ b/src/cellflow/preprocessing/_wknn.py @@ -134,12 +134,13 @@ def transfer_labels( ) wknn = ref_adata.uns[wknn_key] - clusters_onehot = pd.get_dummies(ref_adata.obs["Clusters"]) + clusters_onehot = pd.get_dummies(ref_adata.obs["Clusters"].astype(str)) clusters_mat = sparse.csr_matrix(clusters_onehot > 0).astype(int) scores = wknn @ clusters_mat + label_indices = np.array((scores).argmax(1)).flatten() - query_adata.obs[f"{label_key}_transfer"] = scores.argmax(1) + query_adata.obs[f"{label_key}_transfer"] = clusters_onehot.columns[label_indices] query_adata.obs[f"{label_key}_transfer_score"] = scores.max(1) if copy: From 2f48a9e35637cb1c6d8ccba801e1ac3006a9ef10 Mon Sep 17 00:00:00 2001 From: joschif Date: Fri, 29 Aug 2025 16:44:39 +0200 Subject: [PATCH 04/15] fix: `label_key` --- src/cellflow/preprocessing/_wknn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cellflow/preprocessing/_wknn.py b/src/cellflow/preprocessing/_wknn.py index 368a348d..98f81c1c 100644 --- a/src/cellflow/preprocessing/_wknn.py +++ b/src/cellflow/preprocessing/_wknn.py @@ -134,7 +134,7 @@ def transfer_labels( ) wknn = ref_adata.uns[wknn_key] - clusters_onehot = pd.get_dummies(ref_adata.obs["Clusters"].astype(str)) + clusters_onehot = pd.get_dummies(ref_adata.obs[label_key].astype(str)) clusters_mat = sparse.csr_matrix(clusters_onehot > 0).astype(int) scores = wknn @ clusters_mat From f7ef2cd4e8458f84f805d17ea548b914baa2dfbb Mon Sep 17 00:00:00 2001 From: joschif Date: Fri, 29 Aug 2025 16:45:26 +0200 Subject: [PATCH 05/15] fix: int conversion befor sparse --- src/cellflow/preprocessing/_wknn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cellflow/preprocessing/_wknn.py b/src/cellflow/preprocessing/_wknn.py index 98f81c1c..b1df68f5 100644 --- a/src/cellflow/preprocessing/_wknn.py +++ b/src/cellflow/preprocessing/_wknn.py @@ -135,7 +135,7 @@ def transfer_labels( wknn = ref_adata.uns[wknn_key] clusters_onehot = pd.get_dummies(ref_adata.obs[label_key].astype(str)) - clusters_mat = sparse.csr_matrix(clusters_onehot > 0).astype(int) + clusters_mat = sparse.csr_matrix((clusters_onehot > 0).astype(int)) scores = wknn @ clusters_mat label_indices = np.array((scores).argmax(1)).flatten() From 93fd1413ee4011f9cf1cf6cd3e5443e5e2c99793 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 29 Aug 2025 14:57:18 +0000 Subject: [PATCH 06/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/cellflow/preprocessing/_wknn.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/cellflow/preprocessing/_wknn.py b/src/cellflow/preprocessing/_wknn.py index b1df68f5..38721825 100644 --- a/src/cellflow/preprocessing/_wknn.py +++ b/src/cellflow/preprocessing/_wknn.py @@ -161,11 +161,13 @@ def _nn2adj( if n2 is None: n2 = np.max(indices.flatten()) - df = pd.DataFrame({ - "i": np.repeat(range(indices.shape[0]), indices.shape[1]), - "j": indices.flatten(), - "x": distances.flatten(), - }) + df = pd.DataFrame( + { + "i": np.repeat(range(indices.shape[0]), indices.shape[1]), + "j": indices.flatten(), + "x": distances.flatten(), + } + ) adj = sparse.csr_matrix((np.repeat(1, df.shape[0]), (df["i"], df["j"])), shape=(n1, n2)) return adj From ab9a605e1abdefce4a7c7c3177b8f06c38d20d42 Mon Sep 17 00:00:00 2001 From: joschif Date: Fri, 29 Aug 2025 17:03:39 +0200 Subject: [PATCH 07/15] style: rename vars --- src/cellflow/preprocessing/_wknn.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/cellflow/preprocessing/_wknn.py b/src/cellflow/preprocessing/_wknn.py index 38721825..df95acf9 100644 --- a/src/cellflow/preprocessing/_wknn.py +++ b/src/cellflow/preprocessing/_wknn.py @@ -134,13 +134,13 @@ def transfer_labels( ) wknn = ref_adata.uns[wknn_key] - clusters_onehot = pd.get_dummies(ref_adata.obs[label_key].astype(str)) - clusters_mat = sparse.csr_matrix((clusters_onehot > 0).astype(int)) + labels_onehot = pd.get_dummies(ref_adata.obs[label_key].astype(str)) + labels_mat = sparse.csr_matrix((labels_onehot > 0).astype(int)) - scores = wknn @ clusters_mat + scores = wknn @ labels_mat label_indices = np.array((scores).argmax(1)).flatten() - query_adata.obs[f"{label_key}_transfer"] = clusters_onehot.columns[label_indices] + query_adata.obs[f"{label_key}_transfer"] = labels_onehot.columns[label_indices] query_adata.obs[f"{label_key}_transfer_score"] = scores.max(1) if copy: @@ -161,13 +161,11 @@ def _nn2adj( if n2 is None: n2 = np.max(indices.flatten()) - df = pd.DataFrame( - { - "i": np.repeat(range(indices.shape[0]), indices.shape[1]), - "j": indices.flatten(), - "x": distances.flatten(), - } - ) + df = pd.DataFrame({ + "i": np.repeat(range(indices.shape[0]), indices.shape[1]), + "j": indices.flatten(), + "x": distances.flatten(), + }) adj = sparse.csr_matrix((np.repeat(1, df.shape[0]), (df["i"], df["j"])), shape=(n1, n2)) return adj From a534a6dce31f77314f3b6ccd8f2d796fdfa27758 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 29 Aug 2025 15:03:49 +0000 Subject: [PATCH 08/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/cellflow/preprocessing/_wknn.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/cellflow/preprocessing/_wknn.py b/src/cellflow/preprocessing/_wknn.py index df95acf9..d5eb2a08 100644 --- a/src/cellflow/preprocessing/_wknn.py +++ b/src/cellflow/preprocessing/_wknn.py @@ -161,11 +161,13 @@ def _nn2adj( if n2 is None: n2 = np.max(indices.flatten()) - df = pd.DataFrame({ - "i": np.repeat(range(indices.shape[0]), indices.shape[1]), - "j": indices.flatten(), - "x": distances.flatten(), - }) + df = pd.DataFrame( + { + "i": np.repeat(range(indices.shape[0]), indices.shape[1]), + "j": indices.flatten(), + "x": distances.flatten(), + } + ) adj = sparse.csr_matrix((np.repeat(1, df.shape[0]), (df["i"], df["j"])), shape=(n1, n2)) return adj From 06dbd5ea275cb637030cc53b081ebd1ceedbdcde Mon Sep 17 00:00:00 2001 From: joschif Date: Fri, 29 Aug 2025 17:05:10 +0200 Subject: [PATCH 09/15] fix: `max_scores` --- src/cellflow/preprocessing/_wknn.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cellflow/preprocessing/_wknn.py b/src/cellflow/preprocessing/_wknn.py index df95acf9..3518a618 100644 --- a/src/cellflow/preprocessing/_wknn.py +++ b/src/cellflow/preprocessing/_wknn.py @@ -139,9 +139,10 @@ def transfer_labels( scores = wknn @ labels_mat label_indices = np.array((scores).argmax(1)).flatten() + max_scores = np.array(scores.max(1)).flatten() query_adata.obs[f"{label_key}_transfer"] = labels_onehot.columns[label_indices] - query_adata.obs[f"{label_key}_transfer_score"] = scores.max(1) + query_adata.obs[f"{label_key}_transfer_score"] = max_scores if copy: return query_adata From 55f9277b4ebd9c4b1bc62e1ae239a580d3dad581 Mon Sep 17 00:00:00 2001 From: joschif Date: Fri, 29 Aug 2025 17:07:17 +0200 Subject: [PATCH 10/15] style: remove `()` --- src/cellflow/preprocessing/_wknn.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/cellflow/preprocessing/_wknn.py b/src/cellflow/preprocessing/_wknn.py index 25bf680f..a2aafad3 100644 --- a/src/cellflow/preprocessing/_wknn.py +++ b/src/cellflow/preprocessing/_wknn.py @@ -138,7 +138,7 @@ def transfer_labels( labels_mat = sparse.csr_matrix((labels_onehot > 0).astype(int)) scores = wknn @ labels_mat - label_indices = np.array((scores).argmax(1)).flatten() + label_indices = np.array(scores.argmax(1)).flatten() max_scores = np.array(scores.max(1)).flatten() query_adata.obs[f"{label_key}_transfer"] = labels_onehot.columns[label_indices] @@ -162,13 +162,11 @@ def _nn2adj( if n2 is None: n2 = np.max(indices.flatten()) - df = pd.DataFrame( - { - "i": np.repeat(range(indices.shape[0]), indices.shape[1]), - "j": indices.flatten(), - "x": distances.flatten(), - } - ) + df = pd.DataFrame({ + "i": np.repeat(range(indices.shape[0]), indices.shape[1]), + "j": indices.flatten(), + "x": distances.flatten(), + }) adj = sparse.csr_matrix((np.repeat(1, df.shape[0]), (df["i"], df["j"])), shape=(n1, n2)) return adj From 651c718aaefda45729cce688a6f4a4aa10128bae Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 29 Aug 2025 15:07:27 +0000 Subject: [PATCH 11/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/cellflow/preprocessing/_wknn.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/cellflow/preprocessing/_wknn.py b/src/cellflow/preprocessing/_wknn.py index a2aafad3..ce9dec14 100644 --- a/src/cellflow/preprocessing/_wknn.py +++ b/src/cellflow/preprocessing/_wknn.py @@ -162,11 +162,13 @@ def _nn2adj( if n2 is None: n2 = np.max(indices.flatten()) - df = pd.DataFrame({ - "i": np.repeat(range(indices.shape[0]), indices.shape[1]), - "j": indices.flatten(), - "x": distances.flatten(), - }) + df = pd.DataFrame( + { + "i": np.repeat(range(indices.shape[0]), indices.shape[1]), + "j": indices.flatten(), + "x": distances.flatten(), + } + ) adj = sparse.csr_matrix((np.repeat(1, df.shape[0]), (df["i"], df["j"])), shape=(n1, n2)) return adj From 4895bdc6c664e0b71039fd7e7452ce7bc846301e Mon Sep 17 00:00:00 2001 From: joschif Date: Fri, 29 Aug 2025 17:34:15 +0200 Subject: [PATCH 12/15] fix: dense conversion of scores --- src/cellflow/preprocessing/_wknn.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/cellflow/preprocessing/_wknn.py b/src/cellflow/preprocessing/_wknn.py index ce9dec14..41b45485 100644 --- a/src/cellflow/preprocessing/_wknn.py +++ b/src/cellflow/preprocessing/_wknn.py @@ -139,7 +139,7 @@ def transfer_labels( scores = wknn @ labels_mat label_indices = np.array(scores.argmax(1)).flatten() - max_scores = np.array(scores.max(1)).flatten() + max_scores = scores.max(1).toarray().flatten() query_adata.obs[f"{label_key}_transfer"] = labels_onehot.columns[label_indices] query_adata.obs[f"{label_key}_transfer_score"] = max_scores @@ -162,13 +162,11 @@ def _nn2adj( if n2 is None: n2 = np.max(indices.flatten()) - df = pd.DataFrame( - { - "i": np.repeat(range(indices.shape[0]), indices.shape[1]), - "j": indices.flatten(), - "x": distances.flatten(), - } - ) + df = pd.DataFrame({ + "i": np.repeat(range(indices.shape[0]), indices.shape[1]), + "j": indices.flatten(), + "x": distances.flatten(), + }) adj = sparse.csr_matrix((np.repeat(1, df.shape[0]), (df["i"], df["j"])), shape=(n1, n2)) return adj From a48520e07ff26dc7680aaa9f94c42fe9724ac874 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 29 Aug 2025 15:35:44 +0000 Subject: [PATCH 13/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/cellflow/preprocessing/_wknn.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/cellflow/preprocessing/_wknn.py b/src/cellflow/preprocessing/_wknn.py index 41b45485..0863e1d1 100644 --- a/src/cellflow/preprocessing/_wknn.py +++ b/src/cellflow/preprocessing/_wknn.py @@ -162,11 +162,13 @@ def _nn2adj( if n2 is None: n2 = np.max(indices.flatten()) - df = pd.DataFrame({ - "i": np.repeat(range(indices.shape[0]), indices.shape[1]), - "j": indices.flatten(), - "x": distances.flatten(), - }) + df = pd.DataFrame( + { + "i": np.repeat(range(indices.shape[0]), indices.shape[1]), + "j": indices.flatten(), + "x": distances.flatten(), + } + ) adj = sparse.csr_matrix((np.repeat(1, df.shape[0]), (df["i"], df["j"])), shape=(n1, n2)) return adj From b80193fc7f5138fa37b245df4a642bb5f65f624b Mon Sep 17 00:00:00 2001 From: joschif Date: Fri, 29 Aug 2025 18:14:40 +0200 Subject: [PATCH 14/15] fix: convert to category --- src/cellflow/preprocessing/_wknn.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/cellflow/preprocessing/_wknn.py b/src/cellflow/preprocessing/_wknn.py index 0863e1d1..012aa91b 100644 --- a/src/cellflow/preprocessing/_wknn.py +++ b/src/cellflow/preprocessing/_wknn.py @@ -142,6 +142,7 @@ def transfer_labels( max_scores = scores.max(1).toarray().flatten() query_adata.obs[f"{label_key}_transfer"] = labels_onehot.columns[label_indices] + query_adata.obs[f"{label_key}_transfer"] = query_adata.obs[f"{label_key}_transfer"].astype("category") query_adata.obs[f"{label_key}_transfer_score"] = max_scores if copy: @@ -162,13 +163,11 @@ def _nn2adj( if n2 is None: n2 = np.max(indices.flatten()) - df = pd.DataFrame( - { - "i": np.repeat(range(indices.shape[0]), indices.shape[1]), - "j": indices.flatten(), - "x": distances.flatten(), - } - ) + df = pd.DataFrame({ + "i": np.repeat(range(indices.shape[0]), indices.shape[1]), + "j": indices.flatten(), + "x": distances.flatten(), + }) adj = sparse.csr_matrix((np.repeat(1, df.shape[0]), (df["i"], df["j"])), shape=(n1, n2)) return adj From e9261dc5728dc36de4e4866c2b3950115607757d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 29 Aug 2025 16:14:51 +0000 Subject: [PATCH 15/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/cellflow/preprocessing/_wknn.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/cellflow/preprocessing/_wknn.py b/src/cellflow/preprocessing/_wknn.py index 012aa91b..4cede888 100644 --- a/src/cellflow/preprocessing/_wknn.py +++ b/src/cellflow/preprocessing/_wknn.py @@ -163,11 +163,13 @@ def _nn2adj( if n2 is None: n2 = np.max(indices.flatten()) - df = pd.DataFrame({ - "i": np.repeat(range(indices.shape[0]), indices.shape[1]), - "j": indices.flatten(), - "x": distances.flatten(), - }) + df = pd.DataFrame( + { + "i": np.repeat(range(indices.shape[0]), indices.shape[1]), + "j": indices.flatten(), + "x": distances.flatten(), + } + ) adj = sparse.csr_matrix((np.repeat(1, df.shape[0]), (df["i"], df["j"])), shape=(n1, n2)) return adj