diff --git a/configs/results/npm1.yaml b/configs/results/npm1.yaml index 87b28c4..0a4ad9f 100644 --- a/configs/results/npm1.yaml +++ b/configs/results/npm1.yaml @@ -26,10 +26,4 @@ data_paths: "/data/npm1/pc.yaml", ] classification_label: ["STR_connectivity_cc_thresh"] -regression_label: - [ - "mean_surface_area", - "mean_volume", - "avg_dists", - "std_dists", - ] +regression_label: ["mean_surface_area", "mean_volume", "avg_dists", "std_dists"] diff --git a/configs/results/npm1_64_res.yaml b/configs/results/npm1_64_res.yaml index f0bb9f7..a78a156 100644 --- a/configs/results/npm1_64_res.yaml +++ b/configs/results/npm1_64_res.yaml @@ -41,10 +41,4 @@ data_paths: "/data/npm1_64_res/pc.yaml", ] classification_label: ["STR_connectivity_cc_thresh"] -regression_label: - [ - "mean_surface_area", - "mean_volume", - "avg_dists", - "std_dists", - ] +regression_label: ["mean_surface_area", "mean_volume", "avg_dists", "std_dists"] diff --git a/docs/PREPROCESSING.md b/docs/PREPROCESSING.md index e529c11..dfdb0ae 100644 --- a/docs/PREPROCESSING.md +++ b/docs/PREPROCESSING.md @@ -52,14 +52,14 @@ src # Polymorphic structures: Generate SDFs -Edit the data paths in the following files to match the location of your copy of the data, then run both. - +Use the segmentation data for polymorphic structures as input to the SDF generation step. ``` src └── br └── data    └── preprocessing       └── sdf_preprocessing -          ├── image_sdfs.py <- Create 32**3 resolution SDF images -          └── pc_sdfs.py <- Sample point clouds from 32**3 resolution SDF images +          ├── image_sdfs.py <- Create scaled meshes, and 32**3 resolution SDF and seg images +          ├── get_max_bounding_box.py <- Get bounds of the largest scaled mesh +          └── pc_sdfs.py <- Sample point clouds from scaled meshes ``` diff --git a/src/br/data/preprocessing/pc_preprocessing/pcna.py b/src/br/data/preprocessing/pc_preprocessing/pcna.py index 5e1f080..7e3c15d 100644 --- a/src/br/data/preprocessing/pc_preprocessing/pcna.py +++ b/src/br/data/preprocessing/pc_preprocessing/pcna.py @@ -131,7 +131,7 @@ def main(args): main(args) """ - Example run: - + Example run: + python src/br/data/preprocessing/pc_preprocessing/pcna --save_path "./make_pcs_test" --preprocessed_manifest "./subpackages/image_preprocessing/tmp_output_pcna/processed/manifest.parquet" --global_path "./subpackages/image_preprocessing/" """ diff --git a/src/br/data/preprocessing/pc_preprocessing/punctate_cyto.py b/src/br/data/preprocessing/pc_preprocessing/punctate_cyto.py index d1a97ae..ed56792 100644 --- a/src/br/data/preprocessing/pc_preprocessing/punctate_cyto.py +++ b/src/br/data/preprocessing/pc_preprocessing/punctate_cyto.py @@ -163,7 +163,7 @@ def main(args): main(args) """ - Example run: + Example run: - python src/br/data/preprocessing/pc_preprocessing/punctate_cyto.py --save_path "./make_pcs_test" --preprocessed_manifest "./subpackages/image_preprocessing/tmp_output_variance/processed/manifest.parquet" --global_path "./subpackages/image_preprocessing/" + python punctate_cyto.py --save_path "./make_pcs_test" --preprocessed_manifest "./subpackages/image_preprocessing/tmp_output_variance/processed/manifest.parquet" --global_path "./subpackages/image_preprocessing/" """ diff --git a/src/br/data/preprocessing/pc_preprocessing/punctate_nuc.py b/src/br/data/preprocessing/pc_preprocessing/punctate_nuc.py index 3c551c0..be25390 100644 --- a/src/br/data/preprocessing/pc_preprocessing/punctate_nuc.py +++ b/src/br/data/preprocessing/pc_preprocessing/punctate_nuc.py @@ -135,7 +135,7 @@ def main(args): main(args) """ - Example run: - + Example run: + python src/br/data/preprocessing/pc_preprocessing/punctate_nuc.py --save_path "./make_pcs_test" --preprocessed_manifest "./subpackages/image_preprocessing/tmp_output_variance/processed/manifest.parquet" --global_path "./subpackages/image_preprocessing/" """ diff --git a/src/br/data/preprocessing/sdf_preprocessing/get_max_bounding_box.py b/src/br/data/preprocessing/sdf_preprocessing/get_max_bounding_box.py new file mode 100644 index 0000000..69a4754 --- /dev/null +++ b/src/br/data/preprocessing/sdf_preprocessing/get_max_bounding_box.py @@ -0,0 +1,94 @@ + +import pandas as pd +from aicsimageio import AICSImage +from monai.transforms import FillHoles +from tqdm import tqdm +from br.data.utils import ( + get_mesh_from_image, +) +from multiprocessing import Pool +from pathlib import Path +import argparse + + +def get_bounds(r): + return_df = {'x_delta': [], 'y_delta': [], 'z_delta': [], 'cell_id': [], 'max_delta': []} + cellid = r['CellId'] + + hole_fill_transform = FillHoles() + seg = AICSImage(r["crop_seg_masked"]).data.squeeze() + seg = hole_fill_transform(seg).numpy() + + mesh, _, _ = get_mesh_from_image(seg, sigma=0, lcc=False, denoise=False) + + bounds = mesh.GetBounds() + x_delta = bounds[1] - bounds[0] + y_delta = bounds[3] - bounds[2] + z_delta = bounds[5] - bounds[4] + max_delta = max([x_delta, y_delta, z_delta]) + return_df['x_delta'].append(x_delta) + return_df['y_delta'].append(y_delta) + return_df['z_delta'].append(z_delta) + return_df['cell_id'].append(cellid) + return_df['max_delta'].append(max_delta) + + return return_df + +def main(args): + # make save path directory + Path(args.save_path).mkdir(parents=True, exist_ok=True) + + df = pd.read_csv(args.manifest) + + if args.global_path: + df["crop_seg_masked"] = df["crop_seg_masked"].apply(lambda x: args.global_path + x) + + all_rows = [] + for _, row in df.iterrows(): + all_rows.append(row) + + with Pool(1) as p: + jobs = tuple( + tqdm( + p.imap_unordered( + get_bounds, + all_rows, + ), + total=len(all_rows), + desc="get_bounds", + ) + ) + jobs = [i for i in jobs if i is not None] + return_df = pd.DataFrame(jobs).reset_index(drop=True) + + return_df.to_csv(Path(args.save_path) / Path('bounds.csv')) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Script for getting global mesh bounds for polymorphic structures from WTC-11 hIPS single cell image dataset" + ) + parser.add_argument("--save_path", type=str, required=True, help="Path to save results.") + parser.add_argument( + "--manifest", + type=str, + required=True, + help="Path to preprocessed single cell image manifest.", + ) + parser.add_argument( + "--global_path", + type=str, + default=None, + required=False, + help="Path to append to relative paths in preprocessed manifest", + ) + + + args = parser.parse_args() + main(args) + + """ + Example run: + python get_max_bounding_box.py --save_path './test_img/' --manifest ""../../../../../morphology_appropriate_representation_learning/preprocessed_data/npm1/manifest.csv" --global_path "../../../../../" + """ + diff --git a/src/br/data/preprocessing/sdf_preprocessing/image_sdfs.py b/src/br/data/preprocessing/sdf_preprocessing/image_sdfs.py index 7f252c7..45ff0c0 100644 --- a/src/br/data/preprocessing/sdf_preprocessing/image_sdfs.py +++ b/src/br/data/preprocessing/sdf_preprocessing/image_sdfs.py @@ -1,46 +1,136 @@ +import argparse +from multiprocessing import Pool +from pathlib import Path + import numpy as np import pandas as pd import pyvista as pv from aicsimageio import AICSImage from monai.transforms import FillHoles +from tqdm import tqdm +from br.analysis.analysis_utils import str2bool from br.data.utils import ( get_mesh_from_image, - get_scaled_seg_from_mesh, - get_sdf_from_mesh, + get_scaled_mesh, + get_sdf_from_mesh_vtk, + voxelize_scaled_mesh, ) -path = PATH_TO_SINGLE_CELL_DATA - -parent_out_dir = "..." -path = "" -out_dir_scaled_seg = "./" -out_dir_scaled_sdf = "./" -out_dir_mesh = "./" -df = pd.read_csv(path) -hole_fill_transform = FillHoles() - -for i, r in df.iterrows(): +def process(r): cellid = r["CellId"] + scale_factor = r["scale_factor"] + vox_resolution = r["vox_resolution"] out_path_sdf = f"{out_dir_scaled_sdf}/{cellid}" out_path_seg = f"{out_dir_scaled_seg}/{cellid}" out_path_mesh = f"{out_dir_mesh}/{cellid}.stl" - - seg = AICSImage(r["crop_seg"]).data.squeeze() + hole_fill_transform = FillHoles() + seg = AICSImage(r["crop_seg_masked"]).data.squeeze() seg = hole_fill_transform(seg).numpy() mesh, _, _ = get_mesh_from_image(seg, sigma=0, lcc=False, denoise=False) pv.wrap(mesh).save(out_path_mesh) - sdf, scale_factor = get_sdf_from_mesh( - path=None, vox_resolution=32, scale_factor=None, vpolydata=mesh + sdf, scale_factor = get_sdf_from_mesh_vtk( + None, vox_resolution=vox_resolution, scale_factor=scale_factor, vpolydata=mesh ) np.save(out_path_sdf, sdf) + vox_shape = (vox_resolution, vox_resolution, vox_resolution) + scaled_mesh, _ = get_scaled_mesh(None, int(vox_resolution), scale_factor, mesh, True) + + scaled_seg = voxelize_scaled_mesh(scaled_mesh) + com = scaled_seg.shape + pad = [] + for i, j in zip(vox_shape, com): + pad.append((int(i - j) // 2, int(i - j) // 2)) + scaled_seg = np.pad(scaled_seg, pad) - scaled_seg, _ = get_scaled_seg_from_mesh( - path=None, vox_resolution=32, scale_factor=scale_factor, vpolydata=mesh - ) np.save(out_path_seg, scaled_seg) + + +def main(args): + + # make save path directory + Path(args.save_path).mkdir(parents=True, exist_ok=True) + + global out_dir_scaled_seg + global out_dir_scaled_sdf + global out_dir_mesh + + out_dir_scaled_seg = Path(args.save_path) / Path("outputs_seg") + out_dir_scaled_sdf = Path(args.save_path) / Path("outputs_sdf") + out_dir_mesh = Path(args.save_path) / Path("outputs_mesh") + + Path(out_dir_scaled_seg).mkdir(parents=True, exist_ok=True) + Path(out_dir_scaled_sdf).mkdir(parents=True, exist_ok=True) + Path(out_dir_mesh).mkdir(parents=True, exist_ok=True) + + scale_factor_path = Path(args.manifest).parent / Path("scale_factor.npz") + + sc_factor_data = np.load(scale_factor_path, allow_pickle=True) + scale_factor_dict = dict(zip(sc_factor_data["keys"], sc_factor_data["values"])) + + df = pd.read_csv(args.manifest) + + if args.global_path: + df["crop_seg_masked"] = df["crop_seg_masked"].apply(lambda x: args.global_path + x) + + if args.debug: + df = df.sample(n=5).reset_index(drop=True) + + all_rows = [] + for _, row in tqdm(df.iterrows(), total=len(df)): + row["scale_factor"] = scale_factor_dict[row["CellId"]] + row["vox_resolution"] = args.vox_resolution + all_rows.append(row) + + with Pool(40) as p: + _ = tuple( + tqdm( + p.imap_unordered( + process, + all_rows, + ), + total=len(all_rows), + desc="compute_everything", + ) + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Script for computing scaled segmentations and SDFs for polymorphic structures from WTC-11 hIPS single cell image dataset" + ) + parser.add_argument("--save_path", type=str, required=True, help="Path to save results.") + parser.add_argument( + "--manifest", + type=str, + required=True, + help="Path to preprocessed single cell image manifest.", + ) + parser.add_argument( + "--vox_resolution", + type=int, + required=True, + help="Resolution to voxelize images to", + ) + parser.add_argument("--debug", type=str2bool, default=False, help="Enable debug mode.") + parser.add_argument( + "--global_path", + type=str, + default=None, + required=False, + help="Path to append to relative paths in preprocessed manifest", + ) + + args = parser.parse_args() + main(args) + + """ + Example run: + + python image_sdfs.py --save_path "./test_img/" --manifest ""../../../../../morphology_appropriate_representation_learning/preprocessed_data/npm1/manifest.csv" --vox_resolution 32 --debug True --global_path "../../../../../" + """ diff --git a/src/br/data/preprocessing/sdf_preprocessing/pc_sdfs.py b/src/br/data/preprocessing/sdf_preprocessing/pc_sdfs.py index b1b471b..2e0a10c 100644 --- a/src/br/data/preprocessing/sdf_preprocessing/pc_sdfs.py +++ b/src/br/data/preprocessing/sdf_preprocessing/pc_sdfs.py @@ -1,9 +1,12 @@ +import argparse import os +from multiprocessing import Pool +from pathlib import Path import mesh_to_sdf import numpy as np -import pandas as pd import trimesh +from tqdm import tqdm os.environ["PYOPENGL_PLATFORM"] = "egl" @@ -35,15 +38,14 @@ def sample_iou_points_and_sdf_vals(mesh, n_iou_points, cube_dim=32, padding=0, p return out_dict, sdf_vals -parent_out_dir = "..." -path = "" -out_dir = "./" -df = pd.read_csv(path) +def process_mesh(cellid): + in_mesh_path = Path(mesh_path) / Path(cellid + ".stl") -for i, row in df.iterrows(): - in_mesh_path = row["mesh_path_noalign"] - cellid = row["CellId"] - # out_dir = f"{parent_out_dir}/{cellid}" + this_out_dir = out_dir / Path(cellid) + out_dir_points = Path(this_out_dir) / Path("points") + out_dir_pointcloud = Path(this_out_dir) / Path("pointcloud") + + Path(this_out_dir).mkdir(parents=True, exist_ok=True) mesh = trimesh.load(in_mesh_path) bbox = mesh.bounding_box.bounds @@ -53,16 +55,72 @@ def sample_iou_points_and_sdf_vals(mesh, n_iou_points, cube_dim=32, padding=0, p prep_mesh = mesh.apply_translation(-loc) prep_mesh = prep_mesh.apply_scale(1 / scale_factor) - surface_data_dict = sample_points(prep_mesh, int(32**3)) + surface_data_dict = sample_points(prep_mesh, int(vox_resolution**3)) surface_data_dict["loc"] = loc surface_data_dict["scale"] = scale_factor volume_data_dict, sdf_vals = sample_iou_points_and_sdf_vals( - prep_mesh, int(32**3), cube_dim=1 + prep_mesh, int(vox_resolution**3), cube_dim=1 ) volume_data_dict["loc"] = loc volume_data_dict["scale"] = scale_factor - np.savez(f"{out_dir}/points", **volume_data_dict) - np.savez(f"{out_dir}/pointcloud", **surface_data_dict) - np.save(f"{out_dir}/df.npy", sdf_vals) + np.savez(out_dir_points, **volume_data_dict) + np.savez(out_dir_pointcloud, **surface_data_dict) + np.save(f"{this_out_dir}/df.npy", sdf_vals) + + +def main(args): + # make save path directory + Path(args.save_path).mkdir(parents=True, exist_ok=True) + + global out_dir + global vox_resolution + global mesh_path + + mesh_path = args.scaled_mesh_path + + vox_resolution = args.vox_resolution + + out_dir = Path(args.save_path) + + cell_ids_to_process = [i.split(".")[0] for i in os.listdir(mesh_path)] + + with Pool(1) as p: + _ = tuple( + tqdm( + p.imap_unordered( + process_mesh, + cell_ids_to_process, + ), + total=len(cell_ids_to_process), + desc="compute_everything", + ) + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Script for computing SDF pointclouds for polymorphic structures from WTC-11 hIPS single cell image dataset" + ) + parser.add_argument("--save_path", type=str, required=True, help="Path to save results.") + parser.add_argument( + "--scaled_mesh_path", + type=str, + required=True, + help="Path to folder containing scaled meshes", + ) + parser.add_argument( + "--vox_resolution", + type=int, + required=True, + help="Resolution to voxelize images to", + ) + + args = parser.parse_args() + main(args) + + """ + Example run: + python pc_sdfs.py --save_path "./test_pcs/" --scaled_mesh_path "./test_img/outputs_mesh/" --vox_resolution 32 + """