diff --git a/.gitignore b/.gitignore index 77e510b..0d7d982 100644 --- a/.gitignore +++ b/.gitignore @@ -1,23 +1,9 @@ -Neural-Scene-Flow-Fields/kid-running -Neural-Scene-Flow-Fields/nsff_scripts/models/*.pt -Neural-Scene-Flow-Fields/nsff_scripts/models/*.pth -Neural-Scene-Flow-Fields/nsff_scripts/*.pt -Neural-Scene-Flow-Fields/nsff_scripts/midas* -Neural-Scene-Flow-Fields/nsff_scripts/viz_* -Neural-Scene-Flow-Fields/nsff_exp/logs +data/* +slurms/*.out +Nerual-Scene-Flow-Fields/nsff_exp/logs Neural-Scene-Flow-Fields/nsff_exp/*.png Neural-Scene-Flow-Fields/nsff_exp/*.mp4 -Neural-Scene-Flow-Fields/nsff_exp/*.npy +Neural-Scene-Flow-Fields/nsff_exp/*.pcd Neural-Scene-Flow-Fields/nsff_exp/*.ply -Neural-Scene-Flow-Fields/nsff_scripts/*.zip -data/* -slurms/* -*.zip -Neural-Scene-Flow-Fields/nsff_scripts/__pycache__/* -Neural-Scene-Flow-Fields/nsff_exp/__pycache__/* -dino_utils/__pycache__/* -benchmarks/*.png -benchmarks/__pycache__/* -dino_utils/*.png -dino_utils/*.pt -webpage/resource/* \ No newline at end of file +Neural-Scene-Flow-Fields/nsff_scripts/model* +Neural-Scene-Flow-Fields/nsff_scripts/viz_* diff --git a/Neural-Scene-Flow-Fields/nsff_exp/cluster_dino_per_scene.py b/Neural-Scene-Flow-Fields/nsff_exp/cluster_dino_per_scene.py new file mode 100644 index 0000000..361a67c --- /dev/null +++ b/Neural-Scene-Flow-Fields/nsff_exp/cluster_dino_per_scene.py @@ -0,0 +1,224 @@ +import sys +sys.path.append("../../dino_utils") +from cosegmentation import * +from pca import * +from pyramid import * +import torch +import os +import cv2 +import faiss +from tqdm import tqdm +import pickle +from run_nerf_helpers import d3_41_colors_rgb +import imageio +import torch.nn.functional as F +import copy + +from torchvision.utils import make_grid + + + +def preprocess_feats(feats, sample_interval, skip_norm=False): + all_descriptors = torch.cat(feats, dim=0).contiguous() + normalized_all_descriptors = all_descriptors.float().cpu().numpy() + #print(np.unique(normalized_all_descriptors)) + if not skip_norm: + faiss.normalize_L2(normalized_all_descriptors) + #print(np.unique(normalized_all_descriptors)) + sampled_descriptors_list = [x[::sample_interval, :] for x in feats] + all_sampled_descriptors_list = torch.cat(sampled_descriptors_list, dim=0).contiguous() + normalized_all_sampled_descriptors = all_sampled_descriptors_list.float().cpu().numpy() + if not skip_norm: + faiss.normalize_L2(normalized_all_sampled_descriptors) + return normalized_all_descriptors, normalized_all_sampled_descriptors + +@torch.no_grad() +def cluster_feats(root_dir, out_dir, load_size, stride, model_type, facet, layer, bin, num_components=64, sample_interval=5, n_cluster=25, elbow=0.975, similarity_thresh=0.5, thresh=0.07, votes_percentage=70): + + device='cuda' if torch.cuda.is_available() else 'cpu' + extractor = ViTExtractor(model_type, stride, device=device) + saliency_extractor = extractor + + + os.makedirs(out_dir, exist_ok=True) + feats = None + sals = None + H = None + W = None + num_samples_per_image = [] + #img_dirs = [] + ##tmp_idx = 0 + #while f'{tmp_idx}.png' in os.listdir(root_dir): + # img_dirs.append(f'{tmp_idx}.png') + # tmp_idx += 1 + # #sorted(os.listdir(os.path.join(root_dir, name))) + # #assert False, img_dirs + img_dirs = [img for img in os.listdir(root_dir)] + img_dirs = sorted(img_dirs) + num_img = len(img_dirs) + #assert False, img_dirs + for img in img_dirs: + #print(img) + if not img.endswith('.png'): + continue + if H is None: + tmp = cv2.imread(os.path.join(root_dir, img)) + H = tmp.shape[0] + W = tmp.shape[1] + #assert False, [H, W] + batch, _ = extractor.preprocess(os.path.join(root_dir, img), load_size) + + feat_raw = extractor.extract_descriptors(batch.to(device), layer, facet, bin) + feat_raw = feat_raw.view(batch.shape[0], extractor.num_patches[0], extractor.num_patches[1], -1) + sal_raw = saliency_extractor.extract_saliency_maps(batch.to(device)) + sal_raw = sal_raw.view(batch.shape[0], extractor.num_patches[0], extractor.num_patches[1], -1) + + if feats is None: + feats = feat_raw + sals = sal_raw + else: + feats = torch.cat([feats, feat_raw], dim=0) + sals = torch.cat([sals, sal_raw], dim=0) + num_samples_per_image.append(H*W) + feats = torch.nn.functional.normalize(feats, p=2.0, dim=-1, eps=1e-12, out=None) + old_shape = feats.shape + feats = feats.view(-1, feats.shape[-1]) + pca = PCA(n_components=num_components).fit(feats.cpu()) + pca_feats = pca.transform(feats.cpu()) + feats = pca_feats.reshape((old_shape[0], old_shape[1], old_shape[2], -1)) + feats = torch.nn.functional.interpolate(torch.from_numpy(feats).permute(0, 3, 1, 2), (H, W), mode="nearest").permute(0, 2, 3, 1) + pca_color = PCA(n_components=3).fit(feats.view(-1, feats.shape[-1]).cpu().numpy()) + #print("I am done") + pca_feats = pca_color.transform(feats.view(-1, feats.shape[-1]).cpu().numpy()) + #print("I am done") + pca_feats = pca_feats.reshape((-1, H, W, pca_feats.shape[-1])) + for comp_idx in range(3): + comp = pca_feats[..., comp_idx] + comp_min = comp.min(axis=(0, 1)) + comp_max = comp.max(axis=(0, 1)) + comp_img = (comp - comp_min) / (comp_max - comp_min) + pca_feats[..., comp_idx] = comp_img + + feats = torch.nn.functional.normalize(feats, p=2.0, dim=-1, eps=1e-12, out=None).numpy() + sals = torch.nn.functional.interpolate(sals.permute(0, 3, 1, 2), (H, W), mode="nearest").permute(0, 2, 3, 1).view(sals.shape[0], -1) + + + for save_id in range(len(pca_feats)): + cv2.imwrite(os.path.join(out_dir, f"feat_{save_id}.png"), pca_feats[save_id] * 255.) + cv2.imwrite(os.path.join(out_dir, f"sal_{save_id}.png"), sals.view(-1, H, W).cpu().numpy()[save_id] * 255.) + #assert False, "Pause and modify below" + feature = feats.reshape((-1, num_components)).astype(np.float32) + sampled_feature = np.ascontiguousarray(feature[::sample_interval]) + sum_of_squared_dists = [] + n_cluster_range = list(range(1, n_cluster)) + for n_clu in tqdm(n_cluster_range): + algorithm = faiss.Kmeans(d=feature.shape[-1], k=n_clu, gpu=False, niter=300, nredo=10, seed=1234, verbose=False) + algorithm.train(sampled_feature) + squared_distances, labels = algorithm.index.search(feature, 1) + objective = squared_distances.sum() + sum_of_squared_dists.append(objective / feature.shape[0]) + if (len(sum_of_squared_dists) > 1 and sum_of_squared_dists[-1] > elbow * sum_of_squared_dists[-2]): + break + faiss.write_index(algorithm.index, os.path.join(out_dir, "large.index")) + num_labels = np.max(n_clu) + 1 + labels_per_image_no_merge_no_salient = np.split(labels, np.cumsum(num_samples_per_image)) + + centroids = algorithm.centroids + sims = -np.ones((len(centroids), len(centroids))) + #assert samples["dinos"].shape[-1] == 64 + for c1 in range(len(centroids)): + item_1 = centroids[c1][:64] + for c2 in range(c1+1, len(centroids)): + item_2 = centroids[c2][:64] + sims[c1, c2] = np.dot(item_1, item_2) / (np.linalg.norm(item_1) * np.linalg.norm(item_2)) + print(c1, c2, sims[c1, c2]) + label_mapper = {} + for c2 in range(len(centroids)): + for c1 in range(c2): + if sims[c1, c2] > similarity_thresh: + if c1 in label_mapper: + label_mapper[c2] = label_mapper[c1] + else: + label_mapper[c2] = c1 + break + pickle.dump(label_mapper, open(os.path.join(out_dir, "label_mapper.pkl"), 'wb')) + for key in label_mapper: + print(key, label_mapper[key]) + for c1 in range(len(centroids)): + key = len(centroids) - c1 - 1 + if key in label_mapper: + labels[labels == key] = label_mapper[key] + labels_per_image_no_salient = np.split(labels, np.cumsum(num_samples_per_image)) + + votes = np.zeros(num_labels) + for image_labels, saliency_map in zip(labels_per_image_no_salient, sals): + #assert False, [saliency_map.shape, (image_labels[:, 0] == 0).shape] + for label in range(num_labels): + label_saliency = saliency_map[image_labels[:, 0] == label].mean() + if label_saliency > thresh: + votes[label] += 1 + print(votes) + salient_labels = np.where(votes >= np.ceil(num_img * votes_percentage / 100)) + with open(os.path.join(out_dir, "salient.npy"), "wb") as f: + np.save(f, salient_labels) + + + labels[~np.isin(labels, salient_labels)] = -1 + labels_per_image = np.split(labels, np.cumsum(num_samples_per_image)) + #assert False, labels_per_image[0].shape + #os.makedirs(os.path.join(out_dir, "train"), exist_ok=True) + for idx, (image_labels_no_merge_no_salient, image_labels_no_salient, final_labels) in enumerate(zip(labels_per_image_no_merge_no_salient, labels_per_image_no_salient, labels_per_image)): + #assert False, [image_labels_no_merge_no_salient.shape, final_labels.shape] + #assert False, [type(final_labels), final_labels.shape] + img_clu = d3_41_colors_rgb[np.resize(final_labels, (H, W))] + #assert False, img_clu.shape + #img_clu.reshape((H, W, 3)) + cv2.imwrite(os.path.join(out_dir, f"{idx}.png"), img_clu) + + + + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='cluster sems') + + parser.add_argument('--root_dir', type=str, required=True, help='The root dir of image sets.') + parser.add_argument("--out_dir", type=str, required=True) + #parser.add_argument("--wfeat_id", type=int, required=True) + #parser.add_argument("--wsal_id", type=int, required=True) + ''' + parser.add_argument('--max_cluster', type=int, required=True, help='how many clusters') + parser.add_argument('--depth_ratio', type=float, default=0, help="how much depth information to use") + parser.add_argument('--pixel_ratio', type=float, default=0, help="how much pixel information to use") + parser.add_argument('--pts_ratio', type=float, default=0, help="how much 3D points information to use") + parser.add_argument('--use_gt_dino', action="store_true", help="whether use gt dino feature without reconstruction") + parser.add_argument('--use_gt_sal', action="store_true", help="whether use gt saliency feature without reconstruction") + parser.add_argument('--votes_percentage', default=75, type=int, help="percentage of votes needed for a cluster to " + "be considered salient.") + parser.add_argument('--thresh', default=0.065, type=float, help='saliency maps threshold to distinguish fg / bg.') + ''' + + parser.add_argument('--load_size', default=128, type=int, help='load size of the input images. If None maintains' + 'original image size, if int resizes each image' + 'such that the smaller side is this number.') + parser.add_argument('--stride', default=4, type=int, help="""stride of first convolution layer. + small stride -> higher resolution.""") + parser.add_argument('--model_type', default='dino_vits8', type=str, + help="""type of model to extract. + Choose from [dino_vits8 | dino_vits16 | dino_vitb8 | dino_vitb16 | vit_small_patch8_224 | + vit_small_patch16_224 | vit_base_patch8_224 | vit_base_patch16_224]""") + parser.add_argument('--facet', default='key', type=str, help="""facet to create descriptors from. + options: ['key' | 'query' | 'value' | 'token']""") + parser.add_argument('--layer', default=11, type=int, help="layer to create descriptors from.") + parser.add_argument('--bin', default='False', type=str2bool, help="create a binned descriptor if True.") + parser.add_argument('--remove_outliers', default='False', type=str2bool, help="Remove outliers using cls token.") + parser.add_argument('--load_algo', default='', type=str, help="load a trained kmeans or not") + + args = parser.parse_args() + + #root_dir = "../../data/test_data" + #out_dir = "../../data/dino_masks" + cluster_feats(args.root_dir, args.out_dir, + args.load_size, args.stride, args.model_type, args.facet, args.layer, args.bin, num_components=64) + + \ No newline at end of file diff --git a/Neural-Scene-Flow-Fields/nsff_exp/configs/config_apple.txt b/Neural-Scene-Flow-Fields/nsff_exp/configs/config_apple.txt new file mode 100644 index 0000000..b0b1eee --- /dev/null +++ b/Neural-Scene-Flow-Fields/nsff_exp/configs/config_apple.txt @@ -0,0 +1,48 @@ +expname = experiment_apple + +basedir = ./logs +datadir = ../../data/DyCheck/apple/dense +final_height = 400 +dataset_type = llff + +factor = 2 +llffhold = 10 + +N_rand = 1024 +N_samples = 128 +N_importance = 0 +netwidth = 256 + +use_viewdirs = True +raw_noise_std = 1e0 +no_ndc = False +lindisp = False + +no_batching = True +spherify = False +decay_depth_w = True +decay_optical_flow_w = True +use_motion_mask = False +num_extra_sample = 512 +chain_sf = True + +w_depth = 0.04 +w_optical_flow = 0.02 +w_sm = 0.1 +w_sf_reg = 0.1 +w_cycle = 1.0 + +start_frame = 0 +end_frame = 48 +decay_iteration = 30 + +dino_coe = 0.04 +sal_coe = 0.04 + +depth_full = False + +shallow_dino = True +use_tanh = True +prep_dino = True + +use_multi_dino = True diff --git a/Neural-Scene-Flow-Fields/nsff_exp/configs/config_space-out.txt b/Neural-Scene-Flow-Fields/nsff_exp/configs/config_space-out.txt new file mode 100644 index 0000000..b231850 --- /dev/null +++ b/Neural-Scene-Flow-Fields/nsff_exp/configs/config_space-out.txt @@ -0,0 +1,48 @@ +expname = experiment_space-out + +basedir = ./logs +datadir = ../../data/DyCheck/space-out/dense +final_height = 400 +dataset_type = llff + +factor = 2 +llffhold = 10 + +N_rand = 1024 +N_samples = 128 +N_importance = 0 +netwidth = 256 + +use_viewdirs = True +raw_noise_std = 1e0 +no_ndc = False +lindisp = False + +no_batching = True +spherify = False +decay_depth_w = True +decay_optical_flow_w = True +use_motion_mask = False +num_extra_sample = 512 +chain_sf = True + +w_depth = 0.04 +w_optical_flow = 0.02 +w_sm = 0.1 +w_sf_reg = 0.1 +w_cycle = 1.0 + +start_frame = 0 +end_frame = 48 +decay_iteration = 30 + +dino_coe = 0.04 +sal_coe = 0.04 + +depth_full = False + +shallow_dino = True +use_tanh = True +prep_dino = True + +use_multi_dino = True diff --git a/Neural-Scene-Flow-Fields/nsff_exp/configs/config_spin.txt b/Neural-Scene-Flow-Fields/nsff_exp/configs/config_spin.txt new file mode 100644 index 0000000..bc92f65 --- /dev/null +++ b/Neural-Scene-Flow-Fields/nsff_exp/configs/config_spin.txt @@ -0,0 +1,48 @@ +expname = experiment_spin + +basedir = ./logs +datadir = ../../data/DyCheck/spin/dense +final_height = 400 +dataset_type = llff + +factor = 2 +llffhold = 10 + +N_rand = 1024 +N_samples = 128 +N_importance = 0 +netwidth = 256 + +use_viewdirs = True +raw_noise_std = 1e0 +no_ndc = False +lindisp = False + +no_batching = True +spherify = False +decay_depth_w = True +decay_optical_flow_w = True +use_motion_mask = False +num_extra_sample = 512 +chain_sf = True + +w_depth = 0.04 +w_optical_flow = 0.02 +w_sm = 0.1 +w_sf_reg = 0.1 +w_cycle = 1.0 + +start_frame = 0 +end_frame = 48 +decay_iteration = 30 + +dino_coe = 0.04 +sal_coe = 0.04 + +depth_full = False + +shallow_dino = True +use_tanh = True +prep_dino = True + +use_multi_dino = True diff --git a/Neural-Scene-Flow-Fields/nsff_exp/postprocess.py b/Neural-Scene-Flow-Fields/nsff_exp/postprocess.py index a809d11..c203159 100644 --- a/Neural-Scene-Flow-Fields/nsff_exp/postprocess.py +++ b/Neural-Scene-Flow-Fields/nsff_exp/postprocess.py @@ -13,6 +13,12 @@ #https://medium.com/swlh/image-processing-with-python-connected-components-and-region-labeling-3eef1864b951 +def config_parser(): + import configargparse + parser = configargparse.ArgumentParser() + parser.add_argument("--root_dir", type=str, required=True) + return parser + square = np.ones((3, 3)) def multi_dil(im, num, element=square): for i in range(num): @@ -30,7 +36,11 @@ def multi_ero(im, num, element=square): if __name__ == "__main__": scenes = ["DynamicFace-2", "Truck-2","Umbrella", "Balloon1-2", "Balloon2-2", "playground", "Jumping", "Skating-2", ] - root_dir = "../../data/ours_1018" + parser = config_parser() + args = parser.parse_args() + #root_dir = "../../data/ours_1018" + root_dir = args.root_dir + out_dir = root_dir + "_processed" for scene in tqdm(scenes): diff --git a/Neural-Scene-Flow-Fields/nsff_exp/postprocess_crf.py b/Neural-Scene-Flow-Fields/nsff_exp/postprocess_crf.py index 6d0e30c..3fe265d 100644 --- a/Neural-Scene-Flow-Fields/nsff_exp/postprocess_crf.py +++ b/Neural-Scene-Flow-Fields/nsff_exp/postprocess_crf.py @@ -16,6 +16,7 @@ def config_parser(): import configargparse parser = configargparse.ArgumentParser() + parser.add_argument("--root_dir", type=str, required=True) parser.add_argument("--compact_depth", type=str, default="20") parser.add_argument("--compact_rgb", type=str, default="10") parser.add_argument("--sdim_depth", type=str, default="40") @@ -39,6 +40,7 @@ def config_parser(): args.compact_rgb = int(args.compact_rgb) args.sdim_depth = int(args.sdim_depth) args.sdim_rgb = int(args.sdim_rgb) + root_dir = args.root_dir scenes = ["Umbrella", "Skating-2", "DynamicFace-2", "Truck-2", "Balloon1-2", @@ -53,7 +55,7 @@ def config_parser(): "Truck-2": "../../Neural-Scene-Flow-Fields/nsff_exp/logs/experiment_truck_sal_multi_F00-30/render_2D-010_path_360001", "Umbrella": "../../Neural-Scene-Flow-Fields/nsff_exp/logs/experiment_Umbrella_sal_multi_F00-30/render_2D-010_path_360001" } - root_dir = "../../data/no_sal/oracle" + #root_dir = "../../data/no_sal/oracle" out_dir = root_dir + f"_crfs/{args.compact_depth}_{args.compact_rgb}_{args.sdim_depth}_{args.sdim_rgb}" for scene in tqdm(scenes): diff --git a/Neural-Scene-Flow-Fields/nsff_exp/postprocess_crf_per_scene.py b/Neural-Scene-Flow-Fields/nsff_exp/postprocess_crf_per_scene.py new file mode 100644 index 0000000..abb5325 --- /dev/null +++ b/Neural-Scene-Flow-Fields/nsff_exp/postprocess_crf_per_scene.py @@ -0,0 +1,119 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from skimage.io import imread, imsave +from skimage.color import rgb2gray +from skimage.morphology import (erosion, dilation, closing, opening, + area_closing, area_opening) +from skimage.measure import label, regionprops, regionprops_table +import os +import copy +from tqdm import tqdm +import cv2 + +import pydensecrf.densecrf as dcrf +from pydensecrf.utils import unary_from_labels, create_pairwise_bilateral, create_pairwise_gaussian +def config_parser(): + import configargparse + parser = configargparse.ArgumentParser() + parser.add_argument("--root_dir", type=str, required=True) + parser.add_argument("--render_dir", type=str, required=True) + parser.add_argument("--compact_depth", type=str, default="20") + parser.add_argument("--compact_rgb", type=str, default="10") + parser.add_argument("--sdim_depth", type=str, default="40") + parser.add_argument("--sdim_rgb", type=str, default="40") + return parser +#https://medium.com/swlh/image-processing-with-python-connected-components-and-region-labeling-3eef1864b951 + + +#def imread(f): +# if f.endswith('png'): +# return imageio.imread(f, ignoregamma=True) +# else: +# return imageio.imread(f) + +if __name__ == "__main__": + + parser = config_parser() + args = parser.parse_args() + + args.compact_depth = int(args.compact_depth) + args.compact_rgb = int(args.compact_rgb) + args.sdim_depth = int(args.sdim_depth) + args.sdim_rgb = int(args.sdim_rgb) + root_dir = args.root_dir + render_dir = args.render_dir + + + #scenes = ["Umbrella", "Skating-2", "DynamicFace-2", "Truck-2", "Balloon1-2", + #"Balloon2-2", "playground", "Jumping",] + #render_map = { + # "Balloon1-2": "../../Neural-Scene-Flow-Fields/nsff_exp/logs/experiment_balloon1-2_2_multi_F00-30/render_2D-010_path_360001", + # "Balloon2-2": "../../Neural-Scene-Flow-Fields/nsff_exp/logs/experiment_Balloon2-2_2_multi_F00-30/render_2D-010_path_360001", + # "DynamicFace-2": "../../Neural-Scene-Flow-Fields/nsff_exp/logs/experiment_dynamicFace_sal_multi_F00-30/render_2D-010_path_360001", + # "Jumping": "../../Neural-Scene-Flow-Fields/nsff_exp/logs/experiment_jumping_sal_multi_F00-30/render_2D-010_path_360001", + # "playground": "../../Neural-Scene-Flow-Fields/nsff_exp/logs/experiment_playground_sal_multi_F00-30/render_2D-010_path_360001", + # "Skating-2": "../../Neural-Scene-Flow-Fields/nsff_exp/logs/experiment_skating_sal_multi_F00-30/render_2D-010_path_360001", + # "Truck-2": "../../Neural-Scene-Flow-Fields/nsff_exp/logs/experiment_truck_sal_multi_F00-30/render_2D-010_path_360001", + # "Umbrella": "../../Neural-Scene-Flow-Fields/nsff_exp/logs/experiment_Umbrella_sal_multi_F00-30/render_2D-010_path_360001" + # } + #root_dir = "../../data/no_sal/oracle" + out_dir = root_dir + f"_crfs/{args.compact_depth}_{args.compact_rgb}_{args.sdim_depth}_{args.sdim_rgb}" + + #for scene in tqdm(scenes): + + assert os.path.exists(root_dir) + os.makedirs(out_dir, exist_ok=True) + image_id = 0 + while os.path.exists(os.path.join(root_dir, f"{image_id}.png")): + rgb_img = cv2.imread(os.path.join(render_dir, f"{image_id}_rgb.png")) + depth_img = cv2.imread(os.path.join(render_dir, f"{image_id}_depth.png")) + img = cv2.imread(os.path.join(root_dir, f"{image_id}.png")) + #assert False, imsave("test.png", img) + #assert False, img.shape + #(288, 54x, 3) + unique_colors = np.unique(img.reshape((-1, 3)), axis=0)[:,:] + #assert False, [unique_colors, unique_colors.dtype, unique_colors.shape] + U = cv2.imread(os.path.join(root_dir, f"{image_id}.png"), cv2.IMREAD_GRAYSCALE) + labels = np.unique(U) + mydict = {} + for i in range(len(labels)): + mydict[labels[i]] = i + U = np.vectorize(mydict.get)(U) + n_labels = np.max(U) + 1 + HAS_UNK = False + U = unary_from_labels(U, n_labels, gt_prob=0.7, zero_unsure=HAS_UNK) + d = dcrf.DenseCRF2D(depth_img.shape[1], depth_img.shape[0], U.shape[0]) + d.setUnaryEnergy(U) + + feats = create_pairwise_gaussian(sdims=(3, 3), shape=depth_img.shape[:2]) + d.addPairwiseEnergy(feats, compat=15, + kernel=dcrf.DIAG_KERNEL, + normalization=dcrf.NORMALIZE_SYMMETRIC) + feats = create_pairwise_bilateral(sdims=(args.sdim_depth, args.sdim_depth), schan=(13, 13, 13), + img=depth_img, chdim=2) + d.addPairwiseEnergy(feats, compat=args.compact_depth, + kernel=dcrf.DIAG_KERNEL, + normalization=dcrf.NORMALIZE_SYMMETRIC) + feats = create_pairwise_bilateral(sdims=(args.sdim_rgb, args.sdim_rgb), schan=(13, 13, 13), + img=rgb_img, chdim=2) + d.addPairwiseEnergy(feats, compat=args.compact_rgb, + kernel=dcrf.DIAG_KERNEL, + normalization=dcrf.NORMALIZE_SYMMETRIC) + + Q = d.inference(5) + + #ids = list(range(len(unique_colors))) + #tmp = np.zeros_like(img).astype(int)-1 + #for color, idx in zip(unique_colors, ids): + #assert False, [pred_mask.shape, color.shape] + #print(color) + # if color[0][0][0] == 0 and color[0][0][1] == 0 and color[0][0][2] == 0: + # continue + # tmp[img == color] = idx + #img = tmp[..., 0] + #assert False, imsave("test.png", img*50.) + + cv2.imwrite(os.path.join(out_dir, f"{image_id}.png"), unique_colors[np.argmax(Q, axis=0), :].reshape(depth_img.shape)) + image_id += 1 + diff --git a/Neural-Scene-Flow-Fields/nsff_exp/postprocess_oracle.py b/Neural-Scene-Flow-Fields/nsff_exp/postprocess_oracle.py new file mode 100644 index 0000000..a519628 --- /dev/null +++ b/Neural-Scene-Flow-Fields/nsff_exp/postprocess_oracle.py @@ -0,0 +1,61 @@ +import argparse +import numpy as np +import cv2 +import os +import json +from tqdm import tqdm + +def config_parser(): + import configargparse + parser = configargparse.ArgumentParser() + parser.add_argument("--raw_folder", type=str, default="../../data/no_sal") + parser.add_argument("--gt_folder", type=str, default="../../data/gt_masks") + parser.add_argument("--threshold", type=float, default=0.3) + parser.add_argument("--flip_fg", action="store_true") + #parser.add_argument("--out_folder", type=str, default="") + #parser.add_argument("--compact_rgb", type=str, default="20") + #parser.add_argument("--sdim_depth", type=str, default="40") + #parser.add_argument("--sdim_rgb", type=str, default="20") + return parser + +if __name__ == "__main__": + parser = config_parser() + args = parser.parse_args() + + #scenes = [ "Skating-2", "Balloon1-2", "Balloon2-2", "DynamicFace-2", "Jumping", "playground", "Truck-2", "Umbrella"] + out_folder = os.path.join(args.raw_folder, "oracle") + + os.makedirs(out_folder, exist_ok=True) + # find the background colors based on first frame + gt_img = cv2.imread(os.path.join(args.gt_folder, "00000.png.png"), cv2.IMREAD_GRAYSCALE) + raw_img = cv2.imread(os.path.join(args.raw_folder, "0.png")) + #assert False, [gt_img.shape, raw_img.shape] + is_foreground = gt_img < 10 + if args.flip_fg: + is_foreground = gt_img > 10 + #cv2.imwrite("test.png", is_foreground*255.) + #assert False, "Pause" + unique_colors = np.unique(raw_img.reshape((-1, 3)), axis=0) + #assert False, [is_foreground.shape, unique_colors] + #print(unique_colors) + background = [] + for color in unique_colors: + region = (raw_img[..., 0:1] == color[0]) & (raw_img[..., 1:2] == color[1])& (raw_img[..., 2:3] == color[2]) + #print(color, region.shape) + #cv2.imwrite(f"test_{color}.png", raw_img) + #input() + #assert False, region.shape + region = region[..., 0] + ratio = np.sum(region & is_foreground) / np.sum(region).astype(float) + print(color, ratio) + if ratio < args.threshold: + background.append(color) + + idx = 0 + while os.path.exists(os.path.join(args.raw_folder, f"{idx}.png")): + raw_img = cv2.imread(os.path.join(args.raw_folder, f"{idx}.png")) + for color in background: + region = (raw_img[..., 0:1] == color[0]) & (raw_img[..., 1:2] == color[1])& (raw_img[..., 2:3] == color[2]) + raw_img[region[..., 0]] *= 0 + cv2.imwrite(os.path.join(out_folder, f"{idx}.png"), raw_img) + idx += 1 \ No newline at end of file diff --git a/Neural-Scene-Flow-Fields/nsff_exp/postprocess_per_scene.py b/Neural-Scene-Flow-Fields/nsff_exp/postprocess_per_scene.py new file mode 100644 index 0000000..4e136a0 --- /dev/null +++ b/Neural-Scene-Flow-Fields/nsff_exp/postprocess_per_scene.py @@ -0,0 +1,134 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from skimage.io import imread, imsave +from skimage.color import rgb2gray +from skimage.morphology import (erosion, dilation, closing, opening, + area_closing, area_opening) +from skimage.measure import label, regionprops, regionprops_table +import os +import copy +from tqdm import tqdm +import cv2 + +#https://medium.com/swlh/image-processing-with-python-connected-components-and-region-labeling-3eef1864b951 + +def config_parser(): + import configargparse + parser = configargparse.ArgumentParser() + parser.add_argument("--root_dir", type=str, required=True) + return parser + +square = np.ones((3, 3)) +def multi_dil(im, num, element=square): + for i in range(num): + im = dilation(im, element) + return im +def multi_ero(im, num, element=square): + for i in range(num): + im = erosion(im, element) + return im +#def imread(f): +# if f.endswith('png'): +# return imageio.imread(f, ignoregamma=True) +# else: +# return imageio.imread(f) + +if __name__ == "__main__": + #scenes = ["DynamicFace-2", "Truck-2","Umbrella", "Balloon1-2", "Balloon2-2", "playground", "Jumping", "Skating-2", ] + parser = config_parser() + args = parser.parse_args() + #root_dir = "../../data/ours_1018" + root_dir = args.root_dir + + out_dir = root_dir + "_processed" + #for scene in tqdm(scenes): + + #assert os.path.exists(os.path.join(root_dir, scene)) + os.makedirs(out_dir, exist_ok=True) + image_id = 0 + while os.path.exists(os.path.join(root_dir, f"{image_id}.png")): + img = cv2.imread(os.path.join(root_dir, f"{image_id}.png")) + #assert False, imsave("test.png", img) + #assert False, img.shape + #(288, 54x, 3) + unique_colors = np.unique(img.reshape((-1, 3)), axis=0)[:,None, None, :] + ids = list(range(len(unique_colors))) + tmp = np.zeros_like(img).astype(int)-1 + for color, idx in zip(unique_colors, ids): + #assert False, [pred_mask.shape, color.shape] + #print(color) + if color[0][0][0] == 0 and color[0][0][1] == 0 and color[0][0][2] == 0: + continue + tmp[img == color] = idx + img = tmp[..., 0] + #assert False, imsave("test.png", img*50.) + ''' + # translate back to colored cluster image + tmp = np.zeros((img.shape[0], img.shape[1], 3)) + for color, idx in zip(unique_colors, ids): + tmp[img == idx] = color + assert False, imsave("test.png", tmp) + ''' + old_img = copy.deepcopy(img) + + + + #dilation and erosion + #img = multi_dil(img, 2) + #img = area_closing(img, 500) + img = multi_ero(img, 3) + img = opening(img) + img = multi_dil(img, 3) + img = area_closing(img, 500) + is_obj = np.zeros_like(img) + is_obj[img > 0] = 1 + + #assert False, imsave("test.png", is_obj) + + ''' + + label_im = label(img, connectivity=2) + regions = regionprops(label_im) + #assert False, imsave("test.png", label_im) + + list_of_index = [] + for num, x in enumerate(regions): + area = np.sum(label_im == num) + #convex_area = x.convex_area + if (area>100): + list_of_index.append(num) + #print(list_of_index) + to_collapse = ~np.isin(label_im, list_of_index) + #assert False, imsave("test.png", to_collapse) + ''' + + img = old_img + img[is_obj == 0] *= 0 + old_img = copy.deepcopy(img) + label_im = label(img, connectivity=2) + imsave(os.path.join(out_dir, f"{image_id}_label.png"), label_im) + #regions = regionprops(label_im) + list_of_index = [] + for num, x in enumerate(np.unique(label_im)): + area = np.sum(label_im == x) + #convex_area = x.convex_area + #print(num, x,) + if area > 0.01*img.shape[0]*img.shape[1]: + list_of_index.append(num) + #print(list_of_index) + #assert False, np.unique(label_im) + to_collapse = ~np.isin(label_im, list_of_index) + imsave(os.path.join(out_dir, f"{image_id}_small.png"), to_collapse) + + #img[to_collapse] = + # translate back to colored cluster image + img = old_img + tmp = np.zeros((img.shape[0], img.shape[1], 3)) + for color, idx in zip(unique_colors, ids): + tmp[img == idx] = color + tmp[to_collapse, :] *= 0 + #assert False, imsave("test.png", tmp) + cv2.imwrite(os.path.join(out_dir, f"{image_id}.png"), tmp) + image_id += 1 + diff --git a/Neural-Scene-Flow-Fields/nsff_exp/render_utils.py b/Neural-Scene-Flow-Fields/nsff_exp/render_utils.py index d038c35..27b3134 100644 --- a/Neural-Scene-Flow-Fields/nsff_exp/render_utils.py +++ b/Neural-Scene-Flow-Fields/nsff_exp/render_utils.py @@ -9,7 +9,7 @@ import torch.nn.functional as F from run_nerf_helpers import * from tqdm import tqdm -#import open3d as o3d +import open3d as o3d from vis_dino import * from sklearn.cluster import SpectralClustering, DBSCAN import pickle @@ -92,7 +92,7 @@ def splat_full_img(ret, ratio, R_w2t, t_w2t, j, H, W, focal, fwd_flow, splat_raw import softsplat assert ret["raw_dino"] is not None - + assert ret["raw_sal"] is not None pts_ref = ret['pts_ref'][:, :, j, :3] @@ -126,7 +126,7 @@ def splat_full_img(ret, ratio, R_w2t, t_w2t, j, H, W, focal, fwd_flow, splat_raw #for k in ret: # print(k, ret[k].device) device = ret["raw_dino"].device - raw_rgba = torch.cat([ret['raw_rgb'][:, :, j, :].to(device), ret['raw_alpha'][:, :, j].unsqueeze(-1).to(device), ret["raw_dino"][:, :, j, :]], dim=-1) + raw_rgba = torch.cat([ret['raw_rgb'][:, :, j, :].to(device), ret['raw_alpha'][:, :, j].unsqueeze(-1).to(device), ret["raw_dino"][:, :, j, :], ret["raw_sal"][:, :, j, :]], dim=-1) #ret["raw_dino"] = ret["raw_dino"].cuda() raw_rgba = raw_rgba.permute(2, 0, 1).unsqueeze(0).contiguous().cuda() @@ -159,7 +159,7 @@ def splat_full_img(ret, ratio, R_w2t, t_w2t, j, H, W, focal, fwd_flow, splat_raw #raw_rgba = raw_rgba.cpu() #raw_rgba = None #assert False, [ret["raw_rgb_rigid"].shape, ret["raw_alpha_rigid"].shape, ret["raw_dino_rigid"].shape] - raw_rgba = torch.cat([ret['raw_rgb_rigid'][:, :, j, :].to(device), ret['raw_alpha_rigid'][:, :, j].unsqueeze(-1).to(device), ret["raw_dino_rigid"][:, :, j, :]], dim=-1) + raw_rgba = torch.cat([ret['raw_rgb_rigid'][:, :, j, :].to(device), ret['raw_alpha_rigid'][:, :, j].unsqueeze(-1).to(device), ret["raw_dino_rigid"][:, :, j, :], ret["raw_sal_rigid"][:, :, j, :]], dim=-1) raw_rgba = raw_rgba.permute(2, 0, 1).unsqueeze(0).contiguous().cuda() splat_raw['splat_raw_rgba_rig'] = softsplat.FunctionSoftsplat(tenInput=raw_rgba, @@ -319,6 +319,9 @@ def render_slowmo_full(disps, render_poses, bt_poses, save_dino_dir = os.path.join(savedir, 'dinos') save_dino_dy_dir = os.path.join(savedir, 'dinos_dy') save_dino_rig_dir = os.path.join(savedir, 'dinos_rig') + save_sal_dir = os.path.join(savedir, 'sals') + save_sal_dy_dir = os.path.join(savedir, 'sals_dy') + save_sal_rig_dir = os.path.join(savedir, 'sals_rig') save_depth_dir = os.path.join(savedir, 'depths') save_depth_dy_dir = os.path.join(savedir, 'depths_dy') save_depth_rig_dir = os.path.join(savedir, 'depths_rig') @@ -330,6 +333,9 @@ def render_slowmo_full(disps, render_poses, bt_poses, os.makedirs(save_dino_dir, exist_ok=True) #os.makedirs(save_dino_dy_dir, exist_ok=True) #os.makedirs(save_dino_rig_dir, exist_ok=True) + os.makedirs(save_sal_dir, exist_ok=True) + os.makedirs(save_sal_dy_dir, exist_ok=True) + os.makedirs(save_sal_rig_dir, exist_ok=True) os.makedirs(save_depth_dir, exist_ok=True) os.makedirs(save_depth_dy_dir, exist_ok=True) os.makedirs(save_depth_rig_dir, exist_ok=True) @@ -349,6 +355,9 @@ def render_slowmo_full(disps, render_poses, bt_poses, "final_dino": None, "final_dino_dy": None, "final_dino_rig": None, + "final_sal": None, + "final_sal_dy": None, + "final_sal_rig": None, "final_blend": None, "z_vals": None, "render_pose": None, @@ -367,16 +376,19 @@ def render_slowmo_full(disps, render_poses, bt_poses, filename = os.path.join(save_img_dir, '{:03d}.jpg'.format(i)) #if os.path.exists(filename): # continue + #cur_time = target_idx - 10. flow_time = int(np.floor(cur_time)) + ratio = cur_time - np.floor(cur_time) + #ratio = float(i % 40)/40. print('cur_time ', i, cur_time, ratio) t = time.time() - int_rot, int_trans = linear_pose_interp(render_poses[flow_time, :3, 3], - render_poses[flow_time, :3, :3], - render_poses[flow_time + 1, :3, 3], - render_poses[flow_time + 1, :3, :3], - ratio) + int_rot, int_trans = linear_pose_interp(render_poses[flow_time*0 + 12, :3, 3], + render_poses[flow_time*0 + 12, :3, :3], + render_poses[flow_time*0 + 12 + 1, :3, 3], + render_poses[flow_time*0 + 12 + 1, :3, :3], + 0.5) int_poses = np.concatenate((int_rot, int_trans[:, np.newaxis]), 1) int_poses = np.concatenate([int_poses[:3, :4], np.array([0.0, 0.0, 0.0, 1.0])[np.newaxis, :]], axis=0) @@ -401,7 +413,8 @@ def render_slowmo_full(disps, render_poses, bt_poses, c2w=tmp["render_pose"], return_sem=True, **render_kwargs) - + for k in ret1: + ret1[k] = ret1[k].cpu() #for k in ret1: # print(k) # torch.save(ret1[k], f'{savedir}/{k}_1.pt') @@ -422,9 +435,11 @@ def render_slowmo_full(disps, render_poses, bt_poses, c2w=tmp["render_pose"], return_sem=True, **render_kwargs) + + for k in ret2: + ret2[k] = ret2[k].cpu() ret2["rays_o"] = None ret2["rays_d"] = None - #ret2["raw_dino"] = ret2["raw_dino"].cpu() #ret2["raw_alpha"] /= ret2["raw_blend_w"] #ret2["raw_alpha_rigid"] /= 1. - ret2["raw_blend_w"] @@ -448,6 +463,10 @@ def render_slowmo_full(disps, render_poses, bt_poses, tmp["final_dino_dy"] = torch.zeros((ret1["raw_dino"].shape[-1], H, W)) tmp["final_dino_rig"] = torch.zeros((ret1["raw_dino"].shape[-1], H, W)) + tmp["final_sal"] = torch.zeros((ret1["raw_sal"].shape[-1], H, W)) + tmp["final_sal_dy"] = torch.zeros((ret1["raw_sal"].shape[-1], H, W)) + tmp["final_sal_rig"] = torch.zeros((ret1["raw_sal"].shape[-1], H, W)) + tmp["final_blend"] = torch.zeros((1, H, W)) tmp["z_vals"] = ret1['z_vals'] @@ -492,16 +511,30 @@ def render_slowmo_full(disps, render_poses, bt_poses, # splat_alpha = splat_alpha1 * (1. - ratio) + splat_alpha2 * ratio # final_rgb += T_i * (splat_alpha1 * (1. - ratio) * splat_rgb1 + splat_alpha2 * ratio * splat_rgb2) - tmp["final_dino"] += tmp["T_i"] * (splat_raw_1["splat_raw_rgba_dy"][0, 3:4, :, :] * splat_raw_1["splat_raw_rgba_dy"][0, 4:, :, :] + \ - splat_raw_1["splat_raw_rgba_rig"][0, 3:4, :, :] * splat_raw_1["splat_raw_rgba_rig"][0, 4:, :, :] ) * (1.0 - ratio) - tmp["final_dino"] += tmp["T_i"] * (splat_raw_2["splat_raw_rgba_dy"][0, 3:4, :, :] * splat_raw_2["splat_raw_rgba_dy"][0, 4:, :, :] + \ - splat_raw_2["splat_raw_rgba_rig"][0, 3:4, :, :] * splat_raw_2["splat_raw_rgba_rig"][0, 4:, :, :] ) * ratio + tmp["final_dino"] += tmp["T_i"] * (splat_raw_1["splat_raw_rgba_dy"][0, 3:4, :, :] * splat_raw_1["splat_raw_rgba_dy"][0, 4:-1, :, :] + \ + splat_raw_1["splat_raw_rgba_rig"][0, 3:4, :, :] * splat_raw_1["splat_raw_rgba_rig"][0, 4:-1, :, :] ) * (1.0 - ratio) + tmp["final_dino"] += tmp["T_i"] * (splat_raw_2["splat_raw_rgba_dy"][0, 3:4, :, :] * splat_raw_2["splat_raw_rgba_dy"][0, 4:-1, :, :] + \ + splat_raw_2["splat_raw_rgba_rig"][0, 3:4, :, :] * splat_raw_2["splat_raw_rgba_rig"][0, 4:-1, :, :] ) * ratio - tmp["final_dino_dy"] += tmp["T_i_dy"] * splat_raw_1["splat_raw_rgba_dy"][0, 3:4, :, :] * splat_raw_1["splat_raw_rgba_dy"][0, 4:, :, :] * (1.0 - ratio) - tmp["final_dino_dy"] += tmp["T_i_dy"] * splat_raw_2["splat_raw_rgba_dy"][0, 3:4, :, :] * splat_raw_2["splat_raw_rgba_dy"][0, 4:, :, :] * ratio + tmp["final_dino_dy"] += tmp["T_i_dy"] * splat_raw_1["splat_raw_rgba_dy"][0, 3:4, :, :] * splat_raw_1["splat_raw_rgba_dy"][0, 4:-1, :, :] * (1.0 - ratio) + tmp["final_dino_dy"] += tmp["T_i_dy"] * splat_raw_2["splat_raw_rgba_dy"][0, 3:4, :, :] * splat_raw_2["splat_raw_rgba_dy"][0, 4:-1, :, :] * ratio - tmp["final_dino_rig"] += tmp["T_i_rig"] * splat_raw_1["splat_raw_rgba_rig"][0, 3:4, :, :] * splat_raw_1["splat_raw_rgba_rig"][0, 4:, :, :] * (1.0 - ratio) - tmp["final_dino_rig"] += tmp["T_i_rig"] * splat_raw_2["splat_raw_rgba_rig"][0, 3:4, :, :] * splat_raw_2["splat_raw_rgba_rig"][0, 4:, :, :] * ratio + tmp["final_dino_rig"] += tmp["T_i_rig"] * splat_raw_1["splat_raw_rgba_rig"][0, 3:4, :, :] * splat_raw_1["splat_raw_rgba_rig"][0, 4:-1, :, :] * (1.0 - ratio) + tmp["final_dino_rig"] += tmp["T_i_rig"] * splat_raw_2["splat_raw_rgba_rig"][0, 3:4, :, :] * splat_raw_2["splat_raw_rgba_rig"][0, 4:-1, :, :] * ratio + + #assert False, "indices wrong!" + tmp["final_sal"] += tmp["T_i"] * (splat_raw_1["splat_raw_rgba_dy"][0, 3:4, :, :] * splat_raw_1["splat_raw_rgba_dy"][0, -1:, :, :] + \ + splat_raw_1["splat_raw_rgba_rig"][0, 3:4, :, :] * splat_raw_1["splat_raw_rgba_rig"][0, -1:, :, :] ) * (1.0 - ratio) + tmp["final_sal"] += tmp["T_i"] * (splat_raw_2["splat_raw_rgba_dy"][0, 3:4, :, :] * splat_raw_2["splat_raw_rgba_dy"][0, -1:, :, :] + \ + splat_raw_2["splat_raw_rgba_rig"][0, 3:4, :, :] * splat_raw_2["splat_raw_rgba_rig"][0, -1:, :, :] ) * ratio + + tmp["final_sal_dy"] += tmp["T_i_dy"] * splat_raw_1["splat_raw_rgba_dy"][0, 3:4, :, :] * splat_raw_1["splat_raw_rgba_dy"][0, -1:, :, :] * (1.0 - ratio) + tmp["final_sal_dy"] += tmp["T_i_dy"] * splat_raw_2["splat_raw_rgba_dy"][0, 3:4, :, :] * splat_raw_2["splat_raw_rgba_dy"][0, -1:, :, :] * ratio + + tmp["final_sal_rig"] += tmp["T_i_rig"] * splat_raw_1["splat_raw_rgba_rig"][0, 3:4, :, :] * splat_raw_1["splat_raw_rgba_rig"][0, -1:, :, :] * (1.0 - ratio) + tmp["final_sal_rig"] += tmp["T_i_rig"] * splat_raw_2["splat_raw_rgba_rig"][0, 3:4, :, :] * splat_raw_2["splat_raw_rgba_rig"][0, -1:, :, :] * ratio + + # blending field is just coming from rigid network; no need to blend with dynamic model @@ -550,6 +583,10 @@ def render_slowmo_full(disps, render_poses, bt_poses, depth8_dy = to8b(torch.clamp(tmp["final_depth_dy"]/depth_threshold, 0., 1.).permute(1, 2, 0).repeat(1, 1, 3).cpu().numpy()) depth8_rig = to8b(torch.clamp(tmp["final_depth_rig"]/depth_threshold, 0., 1.).permute(1, 2, 0).repeat(1, 1, 3).cpu().numpy()) + sal8 = to8b(torch.clamp(tmp["final_sal"], 0., 1.).permute(1, 2, 0).repeat(1, 1, 3).cpu().numpy()) + sal8_dy = to8b(torch.clamp(tmp["final_sal_dy"], 0., 1.).permute(1, 2, 0).repeat(1, 1, 3).cpu().numpy()) + sal8_rig = to8b(torch.clamp(tmp["final_sal_rig"], 0., 1.).permute(1, 2, 0).repeat(1, 1, 3).cpu().numpy()) + blend8 = to8b(tmp["final_blend"].permute(1, 2, 0).repeat(1, 1, 3).cpu().numpy()) #assert False, [H, W, hwf] @@ -583,6 +620,14 @@ def render_slowmo_full(disps, render_poses, bt_poses, #assert False, torch.stack([tmp["final_dino"], tmp["final_dino_dy"], tmp["final_dino_rig"]], dim=0).shape torch.save(torch.stack([tmp["final_dino"], tmp["final_dino_dy"], tmp["final_dino_rig"]], dim=0), filename) + filename = os.path.join(save_sal_dir, '{:03d}.jpg'.format(i)) + imageio.imwrite(filename, sal8) + filename = os.path.join(save_sal_dy_dir, '{:03d}.jpg'.format(i)) + imageio.imwrite(filename, sal8_dy) + filename = os.path.join(save_sal_rig_dir, '{:03d}.jpg'.format(i)) + imageio.imwrite(filename, sal8_rig) + + #filename = os.path.join(save_dino_dy_dir, '{:03d}.pt'.format(i)) #torch.save(tmp["final_dino_dy"], filename) @@ -834,6 +879,7 @@ def render_pcd_cluster(index, salient_labels, render_poses, bt_poses, tmp["final_labels"] = -torch.ones((H*W, num_sample)).long().cpu() for j in tqdm(range(0, num_sample)): + assert False, "splat_full_img changed by adding saliency, indices moved! " splat_full_img(ret1, ratio, tmp["R_w2t"], tmp["t_w2t"], j, H, W, focal, True, splat_raw=splat_raw_1) splat_full_img(ret2, 1. - ratio, tmp["R_w2t"], tmp["t_w2t"], j, H, W, focal, False, splat_raw=splat_raw_2) #assert False, [splat_alpha_dy_1.shape, splat_rgb_dy_1.shape] @@ -1316,6 +1362,394 @@ def render_sal_3D(render_poses, img_idx_embeds, tmp[entry] = None assert False, "Pause" +def render_pcd(render_poses, + hwf, chunk, render_kwargs, + dino_weight, + flow_weight, + quant_index=None, + index=None, + label_mapper=None, + salient_labels=None, + savedir=None, + render_factor=0, + alpha_threshold=0.2, + trs_threshold=0.2, + sal_threshold=0., + sample_interval=200, + n_cluster=25, + thresh=0.11, + #motion thresh = 0.02 + votes_percentage=70, + render_mode=False, + weighted_sample=True, + use_motion_filter=False, + similarity_thresh=.5, + elbow=0.975): + # import scipy.io + torch.manual_seed(0) + np.random.seed(0) + H, W, focal = hwf + + if render_factor!=0: + # Render downsampled for speed + H = H//render_factor + W = W//render_factor + focal = focal/render_factor + #assert False, [H, W, focal] + t = time.time() + + count = 0 + + #save_pcd_dir = os.path.join(savedir, 'pcds') + # save_depth_dir = os.path.join(savedir, 'depths') + #save_cls_dir = os.path.join(savedir, 'cls') + #os.makedirs(savedir, exist_ok=True) + #os.makedirs(save_pcd_dir, exist_ok=True) + # os.makedirs(save_depth_dir, exist_ok=True) + tmp = { + "final_dino": None, + "final_cluster": None, + "z_vals": None, + "render_pose": None, + "R_w2t": None, + "t_w2t": None, + "alpha_final": None, + "points": None, + "dinos": None, + "sals": None, + "raw_opq": None, + "raw_opq_rigid": None, + "density": None + } + + + samples = { + "dinos": None, + "times": None, + "sals": None, + "points": None + + } + + if render_mode: + render_poses = np.concatenate([render_poses, np.repeat(render_poses[:1], 24, axis=0), render_poses[:12]], axis=0) + #assert False, render_poses.shape + times = list(range(24)) + list(range(24)) + [0]*12 + else: + times = list(range(24)) + + num_img = 24 + #num_img = 2 + + '''delete this!~''' + #render_poses = render_poses[:2] + #times = list(range(2)) + #num_img = 2 + + + '''store all non-opaque points and shuffle''' + ret = {} + num_samples_per_image = [] + samples = {} + #saliency_maps_list = [] + #rgb_list = [] + labels = None + for i in tqdm(range(render_poses.shape[0])): + cur_time = times[i] + #flow_time = int(np.floor(cur_time)) + #ratio = cur_time - np.floor(cur_time) + #print('cur_time ', i, cur_time, ratio) + #t = time.time() + + #int_rot, int_trans = linear_pose_interp(render_poses[flow_time, :3, 3], + # render_poses[flow_time, :3, :3], + # render_poses[flow_time + 1, :3, 3], + # render_poses[flow_time + 1, :3, :3], + # ratio) + int_rot = render_poses[cur_time, :3, :3] + int_trans = render_poses[cur_time, :3, 3] + int_poses = np.concatenate((int_rot, int_trans[:, np.newaxis]), 1) + int_poses = np.concatenate([int_poses[:3, :4], np.array([0.0, 0.0, 0.0, 1.0])[np.newaxis, :]], axis=0) + + #int_poses = np.dot(int_poses, bt_poses[i]) + + tmp["render_pose"] = torch.Tensor(int_poses).to(device) + + tmp["R_w2t"] = tmp["render_pose"][:3, :3].transpose(0, 1) + tmp["t_w2t"] = -torch.matmul(tmp["R_w2t"], tmp["render_pose"][:3, 3:4]) + + img_idx_embed = cur_time/float(num_img) * 2. - 1.0 + #img_idx_embed_1 = (np.floor(cur_time))/float(num_img) * 2. - 1.0 + #img_idx_embed_2 = (np.floor(cur_time) + 1)/float(num_img) * 2. - 1.0 + + print('img_idx_embed ', cur_time, img_idx_embed) + + ret = render_sm(img_idx_embed, 0, False, + num_img, + H, W, focal, + chunk=1024*16, + c2w=tmp["render_pose"], + return_sem=True, + **render_kwargs) + #for key in ret: + # if key in ["raw_dino", "raw_dino_rigid"]: + # ret[key] = ret[key].cpu() + # else: + # ret[key] = ret[key].cuda() + num_sample = ret['raw_rgb'].shape[2] + + #assert False, ret["raw_rgb"].shape + # get opaque filter to only remain non-empty space points + + + tmp["T_i"] = torch.ones((ret["raw_rgb"].shape[0], ret["raw_rgb"].shape[1], num_sample, 1)).cpu() + for j in tqdm(range(0, num_sample-1)): + tmp["alpha_final"] = 1.0 - \ + (1.0 - ret["raw_alpha"][..., j, None]) * \ + (1.0 - ret["raw_alpha_rigid"][..., j, None]) + tmp["T_i"][..., j+1, :] = tmp["T_i"][..., j, :] * (1.0 - tmp["alpha_final"] + 1e-10) + #tmp["T_i"] = 1.-tmp["T_i"][opaque] + #assert False, [tmp["T_i"].shape, ret["raw_alpha"].shape, + #opaque.shape] + tmp["density"] = 1. - (1. - ret["raw_alpha"])*(1. - ret["raw_alpha_rigid"]) + + + #opaque = (opaque > alpha_threshold) + new_opaque = (tmp["density"] > alpha_threshold) + opaque = (tmp["density"] > -1) + tmp["density"] = tmp["density"].view(-1, 1) + #opaque = opaque & (tmp["T_i"][..., 0] > trs_threshold) + #tmp["T_i"] = tmp["T_i"][opaque][..., None] + + + #opaque = opaque & (tmp["dy"] > 0.5) + + #tmp["dinos"] = tmp["dinos"][opaque, :] + #tmp["sals"] = tmp["sals"][opaque, :] + #tmp["points"] = tmp["sals"][opaque, :] + # get each sample's dino feature + #assert False, [opaque.shape, opaque.device, ret["raw_alpha"][opaque].shape, ret["raw_dino"][opaque, :].shape] + + #tmp["dy"] = 1. - (1. - ret["raw_alpha"])*(1. - ret["raw_alpha_rigid"]) + + tmp["raw_opq"] = ret["raw_alpha"][opaque][..., None] + tmp["raw_opq_rigid"] = ret["raw_alpha_rigid"][opaque][..., None] + + tmp["colors"] = tmp["raw_opq"] * ret["raw_rgb"][opaque, :] + tmp["raw_opq_rigid"] * ret["raw_rgb_rigid"][opaque, :] + tmp["colors"] /= (tmp["raw_opq"]+tmp["raw_opq_rigid"]) + + + # get each sample's saliency information + tmp["sals"] = tmp["raw_opq"] * ret["raw_sal"][opaque, :] + tmp["raw_opq_rigid"] * ret["raw_sal_rigid"][opaque, :] + tmp["sals"] /= (tmp["raw_opq"] +tmp["raw_opq_rigid"] ) + # get each sample's 3D position in ndc space + #tmp["z_vals"] = + #assert False, [ret["rays_o"][None, :].shape] + #assert False, [ret["rays_o"][:, :, None, :].repeat(1, 1, opaque.shape[-1], 1)[opaque, :].shape] + tmp["points"] = ret["rays_o"].cpu()[:, :, None, :].repeat(1, 1, opaque.shape[-1], 1)[opaque, :] + ret["rays_d"].cpu()[:, :, None, :].repeat(1, 1, opaque.shape[-1], 1)[opaque, :] * \ + ret["z_vals"][opaque][:, None] + #tmp["points"] = ret['rays_o'][opaque,None,:] + ret['rays_d'][opaque,None,:]* (ret['z_vals'][opaque,:,None]) + #tmp["dy"] = tmp["dy"][opaque][:, None] + #tmp["sf_prev"] = ret["raw_sf_ref2prev"][opaque] + #tmp["sf_post"] = ret["raw_sf_ref2post"][opaque] + + tmp["raw_opq"] = tmp["raw_opq"].to(ret["raw_dino"].device) + tmp["raw_opq_rigid"] = tmp["raw_opq_rigid"].to(ret["raw_dino"].device) + tmp["dinos"] = tmp["raw_opq"] * ret["raw_dino"][opaque, :] + tmp["raw_opq_rigid"] * ret["raw_dino_rigid"][opaque, :] + tmp["dinos"] /= (tmp["raw_opq"]+tmp["raw_opq_rigid"]) + + ## not working!!! as dino values are too far away from normalized! + # normalize each domain so that they first fall between -1 and 1, then is divided by their dimension + # this is the way used in vanilla transformer + #samples["dinos"] /= math.sqrt(samples["dinos"].shape[-1]) + + normed = torch.nn.functional.normalize(tmp["dinos"], dim=-1) + #torch.save(samples["dinos"] / normed, os.path.join(savedir, "dino_normalizer.pt") ) + tmp["dinos"] = normed + + feature = torch.cat([ + #torch.nn.functional.normalize(samples["colors"], dim=-1), + tmp["dinos"]*dino_weight, + # samples["sals"], + # samples["dy"], + # torch.nn.functional.normalize(samples["sf_prev"], dim=-1) * flow_weight, + # torch.nn.functional.normalize(samples["sf_post"], dim=-1) * flow_weight, + # samples["points"], + # samples["times"], + ], + dim=-1).cpu().numpy().astype(np.float32) + + squared_distances, labels = index.search(feature, 1) + for key in label_mapper: + labels[labels == key] = label_mapper[key] + labels[~np.isin(labels, salient_labels)] = -1 + labels[~new_opaque.view(labels.shape[0]).numpy()] = -1 + tmp["colors"][~new_opaque.view(labels.shape[0])] *= 0 + + point_cloud = o3d.geometry.PointCloud() + tmp["points"][..., -1] *= -1 + tmp["points"][..., 0] *= float(W)/float(H) + + point_cloud.points = o3d.utility.Vector3dVector(tmp["points"].numpy()) # array_of_points.shape = (N,3) + point_cloud.colors = o3d.utility.Vector3dVector(d3_41_colors_rgb[labels[..., 0]]/255.) # array_of_colors.shape = (N,3) + o3d.io.write_point_cloud("vis_cls.ply", point_cloud) + + point_cloud.colors = o3d.utility.Vector3dVector(tmp["colors"].numpy()) # array_of_colors.shape = (N,3) + o3d.io.write_point_cloud("vis_color.ply", point_cloud) + + point_cloud.colors = o3d.utility.Vector3dVector(np.repeat(tmp["sals"].numpy(), 3, axis=-1)) # array_of_colors.shape = (N,3) + o3d.io.write_point_cloud("vis_sals.ply", point_cloud) + + + print(tmp["sals"].shape, tmp["density"].shape) + print(type(tmp["sals"]), type(tmp["density"])) + point_cloud.colors = o3d.utility.Vector3dVector(np.repeat(tmp["density"].numpy(), 3, axis=-1)) # array_of_colors.shape = (N,3) + o3d.io.write_point_cloud("vis_dens.ply", point_cloud) + + pickle.dump(new_opaque.numpy(), open(os.path.join(".", "new_opaque.pkl"), 'wb')) + + + #torch.save(new_opaque, ".pt") + assert False, [d3_41_colors_rgb[labels_per_image[0][..., 0]].shape, np.split(samples["points"].numpy(), num_samples_per_image)[:num_img][0].shape] + + + for key in tmp: + tmp[key] = None + for key in ret: + ret[key] = None + torch.cuda.empty_cache() + + + + + + + #samples["T_i"] = samples["T_i"].numpy() + + #samples["sals"] = 2.*samples["sals"] - 1. + # points are roughly between -1 and 1; no modification for now + #assert False, [torch.max(samples["points"][:, 0]), torch.min(samples["points"][:, 0]), + # torch.max(samples["points"][:, 1]), torch.min(samples["points"][:, 1]), + # torch.max(samples["points"][:, 2]), torch.min(samples["points"][:, 2]),] + #samples["points"] /= math.sqrt(samples["points"].shape[-1]) + #normed = torch.nn.functional.normalize(samples["points"], dim=-1) + #torch.save(samples["points"] / normed, os.path.join(savedir, "point_normalizer.pt") ) + #samples["points"] = normed + + #assert False, samples["dinos"].shape + #assert False, [(k, samples[k].shape) for k in samples] + #ngpus = faiss.get_num_gpus() + #assert False, ngpus + if True or quant_index is None or index is None or salient_labels is None: + print(f"start clustering on {len(feature)} points...") + + ''' GPU version not successful :< only one class + res = faiss.StandardGpuResources() + #flat_config = faiss.GpuIndexFlatConfig() + #flat_config.device = 0 + #quantizer = faiss.GpuIndexFlatL2(res, feature.shape[-1], flat_config) + feature = samples["dinos"].cpu().numpy() + M = int(feature.shape[-1] / 4) # for PQ: #subquantizers + nbits_per_index = 8 # for PQ + nlist = 1024 # for PQ + #assert False, [feature.shape[-1], M] + quant_index = faiss.GpuIndexIVFPQ(res, feature.shape[-1], nlist, M, nbits_per_index, faiss.METRIC_INNER_PRODUCT) + quant_index.train(feature) + D, I, R = quant_index.search_and_reconstruct(feature, 1) + assert False, [np.unique(D), np.unique(I), R.shape] + #quant_index.add(feature) + + + quantizer = faiss.ProductQuantizer(64, 8, 8) + dino_feat = np.ascontiguousarray(feature[:, :64]) + quantizer.train(dino_feat) + codes = quantizer.compute_codes(dino_feat) + x2 = quantizer.decode(codes) + avg_relative_error = ((dino_feat- x2)**2).sum() / (dino_feat ** 2).sum() + assert False, avg_relative_error + ''' + # faiss Kmeans + #algorithm = faiss.Kmeans(d=feature.shape[-1], k=n_cluster, gpu=False, niter=300, nredo=10, seed=1234, verbose=False) + #algorithm.train(feature) + #_, labels = algorithm.index.search(feature, 1) + #faiss.write_index(faiss.index_gpu_to_cpu(algorithm.index), os.path.join(savedir, "large.index")) + #_, labels = algorithm.index.search(feature, 1) + #index = algorithm.index + + # sklearn spectralclustering + # not working as too big an array + #clustering = SpectralClustering(n_clusters=n_cluster, + #assign_labels="discretize", + #random_state=0).fit(feature) + + # DBSCAN + #clustering = DBSCAN(eps=0.5, min_samples=5, n_jobs=4).fit(feature) + #labels = clustering.labels_[:, None] + #pickle.dump(clustering, open(os.path.join(savedir, "save.pkl"), "wb")) + #n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) + #assert False, [n_clusters_, labels.shape] + sum_of_squared_dists = [] + n_cluster_range = list(range(1, n_cluster)) + for n_clu in tqdm(n_cluster_range): + #algorithm = faiss.Kmeans(d=feature.shape[-1], k=n_clu, gpu=True, niter=300, nredo=10, seed=1234, verbose=False) + algorithm = faiss.Kmeans(d=feature.shape[-1], k=n_clu, gpu=False, niter=300, nredo=10, seed=1234, verbose=False) + algorithm.train(feature) + squared_distances, labels = algorithm.index.search(feature, 1) + objective = squared_distances.sum() + sum_of_squared_dists.append(objective / feature.shape[0]) + if (len(sum_of_squared_dists) > 1 and sum_of_squared_dists[-1] > elbow * sum_of_squared_dists[-2]): + break + + faiss.write_index(algorithm.index, os.path.join(savedir, "large.index")) + + num_labels = np.max(n_clu) + 1 + labels_per_image = np.split(labels, num_samples_per_image)[:num_img] + #print(labels_per_image[0].shape) + + centroids = algorithm.centroids + #centroids = np.linalg.norm(centroids, axis=0) + #assert False, centroids.shape + sims = -np.ones((len(centroids), len(centroids))) + assert samples["dinos"].shape[-1] == 64 + for c1 in range(len(centroids)): + item_1 = centroids[c1][:64] + for c2 in range(c1+1, len(centroids)): + item_2 = centroids[c2][:64] + sims[c1, c2] = np.dot(item_1, item_2) / (np.linalg.norm(item_1) * np.linalg.norm(item_2)) + print(c1, c2, sims[c1, c2]) + label_mapper = {} + #print(salient_labels) + for c2 in range(len(centroids)): + for c1 in range(c2): + if sims[c1, c2] > similarity_thresh: + label_mapper[c2] = c1 + break + pickle.dump(label_mapper, open(os.path.join(savedir, "label_mapper.pkl"), 'wb')) + #assert False + #print(np.unique(labels)) + for key in label_mapper: + print(key, label_mapper[key]) + #old_labels_per_image = copy.deepcopy(labels_per_image) + for c1 in range(len(centroids)): + key = len(centroids) - c1 - 1 + if key in label_mapper: + labels[labels == key] = label_mapper[key] + #labels = labels[:, None] + #assert False, np.unique(labels) + + + labels_per_image = np.split(labels, num_samples_per_image)[:num_img] + #assert False, labels_per_image[0].shape + ''' + point_cloud = o3d.geometry.PointCloud() + point_cloud.points = o3d.utility.Vector3dVector(np.split(samples["points"].numpy(), num_samples_per_image)[:num_img][0]) # array_of_points.shape = (N,3) + point_cloud.colors = o3d.utility.Vector3dVector(d3_41_colors_rgb[labels_per_image[0][..., 0]]/255.) # array_of_colors.shape = (N,3) + o3d.io.write_point_cloud("test.ply", point_cloud) + assert False, [d3_41_colors_rgb[labels_per_image[0][..., 0]].shape, np.split(samples["points"].numpy(), num_samples_per_image)[:num_img][0].shape] + ''' + + + def cluster_pcd(render_poses, hwf, chunk, render_kwargs, dino_weight, @@ -1326,12 +1760,21 @@ def cluster_pcd(render_poses, savedir=None, render_factor=0, alpha_threshold=0.2, - sample_interval=20, - n_cluster=10, - thresh=0.01, - votes_percentage=75): + trs_threshold=0.2, + sal_threshold=0., + sample_interval=200, + n_cluster=25, + thresh=0.11, + #motion thresh = 0.02 + votes_percentage=70, + render_mode=False, + weighted_sample=True, + use_motion_filter=False, + similarity_thresh=.5, + elbow=0.975): # import scipy.io torch.manual_seed(0) + np.random.seed(0) H, W, focal = hwf if render_factor!=0: @@ -1360,7 +1803,9 @@ def cluster_pcd(render_poses, "alpha_final": None, "points": None, "dinos": None, - "sals": None + "sals": None, + "raw_opq": None, + "raw_opq_rigid": None } @@ -1372,14 +1817,31 @@ def cluster_pcd(render_poses, } - num_img = render_poses.shape[0] + if render_mode: + render_poses = np.concatenate([render_poses, np.repeat(render_poses[:1], 24, axis=0), render_poses[:12]], axis=0) + #assert False, render_poses.shape + times = list(range(24)) + list(range(24)) + [0]*12 + else: + times = list(range(24)) + + num_img = 24 #num_img = 2 + + '''delete this!~''' + #render_poses = render_poses[:2] + #times = list(range(2)) + #num_img = 2 + '''store all non-opaque points and shuffle''' ret = {} num_samples_per_image = [] - for i in tqdm(range(num_img)): - cur_time = i + samples = {} + #saliency_maps_list = [] + #rgb_list = [] + labels = None + for i in tqdm(range(render_poses.shape[0])): + cur_time = times[i] #flow_time = int(np.floor(cur_time)) #ratio = cur_time - np.floor(cur_time) #print('cur_time ', i, cur_time, ratio) @@ -1415,13 +1877,34 @@ def cluster_pcd(render_poses, c2w=tmp["render_pose"], return_sem=True, **render_kwargs) + #for key in ret: + # if key in ["raw_dino", "raw_dino_rigid"]: + # ret[key] = ret[key].cpu() + # else: + # ret[key] = ret[key].cuda() num_sample = ret['raw_rgb'].shape[2] - + #assert False, ret["raw_rgb"].shape # get opaque filter to only remain non-empty space points + + + tmp["T_i"] = torch.ones((ret["raw_rgb"].shape[0], ret["raw_rgb"].shape[1], num_sample, 1)).cpu() + for j in tqdm(range(0, num_sample-1)): + tmp["alpha_final"] = 1.0 - \ + (1.0 - ret["raw_alpha"][..., j, None]) * \ + (1.0 - ret["raw_alpha_rigid"][..., j, None]) + tmp["T_i"][..., j+1, :] = tmp["T_i"][..., j, :] * (1.0 - tmp["alpha_final"] + 1e-10) + #tmp["T_i"] = 1.-tmp["T_i"][opaque] opaque = 1. - (1. - ret["raw_alpha"])*(1. - ret["raw_alpha_rigid"]) - opaque = opaque > alpha_threshold - tmp["dy"] = ret["raw_alpha"]/(ret["raw_alpha"] + ret["raw_alpha_rigid"]) + #assert False, [tmp["T_i"].shape, ret["raw_alpha"].shape, + #opaque.shape] + + + opaque = (opaque > alpha_threshold) + opaque = opaque & (tmp["T_i"][..., 0] > trs_threshold) + tmp["T_i"] = tmp["T_i"][opaque][..., None] + + #opaque = opaque & (tmp["dy"] > 0.5) #tmp["dinos"] = tmp["dinos"][opaque, :] @@ -1429,14 +1912,19 @@ def cluster_pcd(render_poses, #tmp["points"] = tmp["sals"][opaque, :] # get each sample's dino feature #assert False, [opaque.shape, opaque.device, ret["raw_alpha"][opaque].shape, ret["raw_dino"][opaque, :].shape] - tmp["colors"] = ret["raw_alpha"][opaque][..., None] * ret["raw_rgb"][opaque, :] + ret["raw_alpha_rigid"][opaque][..., None] * ret["raw_rgb_rigid"][opaque, :] - tmp["colors"] /= (ret["raw_alpha"][opaque][..., None]+ret["raw_alpha_rigid"][opaque][..., None]) - tmp["dinos"] = ret["raw_alpha"][opaque][..., None].cuda() * ret["raw_dino"][opaque, :] + ret["raw_alpha_rigid"][opaque][..., None].cuda() * ret["raw_dino_rigid"][opaque, :] - tmp["dinos"] /= (ret["raw_alpha"][opaque][..., None].cuda()+ret["raw_alpha_rigid"][opaque][..., None].cuda()) + tmp["dy"] = 1. - (1. - ret["raw_alpha"])*(1. - ret["raw_alpha_rigid"]) + + tmp["raw_opq"] = ret["raw_alpha"][opaque][..., None] + tmp["raw_opq_rigid"] = ret["raw_alpha_rigid"][opaque][..., None] + + tmp["colors"] = tmp["raw_opq"] * ret["raw_rgb"][opaque, :] + tmp["raw_opq_rigid"] * ret["raw_rgb_rigid"][opaque, :] + tmp["colors"] /= (tmp["raw_opq"]+tmp["raw_opq_rigid"]) + + # get each sample's saliency information - tmp["sals"] = ret["raw_alpha"][opaque][..., None] * ret["raw_sal"][opaque, :] + ret["raw_alpha_rigid"][opaque][..., None] * ret["raw_sal_rigid"][opaque, :] - tmp["sals"] /= (ret["raw_alpha"][opaque][..., None]+ret["raw_alpha_rigid"][opaque][..., None]) + tmp["sals"] = tmp["raw_opq"] * ret["raw_sal"][opaque, :] + tmp["raw_opq_rigid"] * ret["raw_sal_rigid"][opaque, :] + tmp["sals"] /= (tmp["raw_opq"] +tmp["raw_opq_rigid"] ) # get each sample's 3D position in ndc space #tmp["z_vals"] = #assert False, [ret["rays_o"][None, :].shape] @@ -1446,7 +1934,22 @@ def cluster_pcd(render_poses, #tmp["points"] = ret['rays_o'][opaque,None,:] + ret['rays_d'][opaque,None,:]* (ret['z_vals'][opaque,:,None]) tmp["dy"] = tmp["dy"][opaque][:, None] tmp["sf_prev"] = ret["raw_sf_ref2prev"][opaque] - tmp["sf_post"] = ret["raw_sf_ref2post"][opaque] + tmp["sf_post"] = ret["raw_sf_ref2post"][opaque] + + tmp["raw_opq"] = tmp["raw_opq"].to(ret["raw_dino"].device) + tmp["raw_opq_rigid"] = tmp["raw_opq_rigid"].to(ret["raw_dino"].device) + tmp["dinos"] = tmp["raw_opq"] * ret["raw_dino"][opaque, :] + tmp["raw_opq_rigid"] * ret["raw_dino_rigid"][opaque, :] + tmp["dinos"] /= (tmp["raw_opq"]+tmp["raw_opq_rigid"]) + + is_sal = (tmp["sals"] > sal_threshold).view(-1) + tmp["colors"] = tmp["colors"][is_sal, :] + tmp["dinos"] = tmp["dinos"][is_sal.cuda(), :] + tmp["sals"] = tmp["sals"][is_sal, :] + tmp["points"] = tmp["points"][is_sal, :] + tmp["dy"] = tmp["dy"][is_sal, :] + tmp["sf_prev"] = tmp["sf_prev"][is_sal, :] + tmp["sf_post"] = tmp["sf_post"][is_sal, :] + tmp["T_i"] = tmp["T_i"][is_sal, :] # shuffle the points before selection indices = torch.randperm(tmp["dinos"].size()[0]) @@ -1457,35 +1960,85 @@ def cluster_pcd(render_poses, tmp["dy"] = tmp["dy"][indices, :] tmp["sf_prev"] = tmp["sf_prev"][indices, :] tmp["sf_post"] = tmp["sf_post"][indices, :] + tmp["T_i"] = tmp["T_i"][indices, :] # select sample points for cluster at this time step - if i == 0: - samples["times"] = torch.ones_like(tmp["sals"])[::sample_interval, :] * img_idx_embed - samples["dinos"] = tmp["dinos"][::sample_interval, :].cpu() - samples["sals"] = tmp["sals"][::sample_interval, :] - samples["points"] = tmp["points"][::sample_interval, :] - samples["dy"] = tmp["dy"][::sample_interval, :] - samples["colors"] = tmp["colors"][::sample_interval, :] - samples["sf_prev"] = tmp["sf_prev"][::sample_interval, :] - samples["sf_post"] = tmp["sf_post"][::sample_interval, :] - num_samples_per_image.append(samples["times"].shape[0]) + if not weighted_sample: + if i == 0 or samples["dinos"] is None: + samples["times"] = torch.ones_like(tmp["sals"])[::sample_interval, :] * img_idx_embed + samples["dinos"] = tmp["dinos"][::sample_interval, :].cpu() + samples["sals"] = tmp["sals"][::sample_interval, :] + samples["points"] = tmp["points"][::sample_interval, :] + samples["dy"] = tmp["dy"][::sample_interval, :] + samples["T_i"] = tmp["T_i"][::sample_interval, :] + samples["colors"] = tmp["colors"][::sample_interval, :] + samples["sf_prev"] = tmp["sf_prev"][::sample_interval, :] + samples["sf_post"] = tmp["sf_post"][::sample_interval, :] + num_samples_per_image.append(samples["times"].shape[0]) + else: + + samples["times"] = torch.cat((samples["times"], torch.ones_like(tmp["sals"])[::sample_interval, :] * img_idx_embed), dim=0) + samples["dinos"] = torch.cat((samples["dinos"], tmp["dinos"][::sample_interval, :].cpu()), dim=0) + samples["sals"] = torch.cat((samples["sals"], tmp["sals"][::sample_interval, :]), dim=0) + samples["points"] = torch.cat((samples["points"], tmp["points"][::sample_interval, :]), dim=0) + samples["dy"] = torch.cat((samples["dy"], tmp["dy"][::sample_interval, :]), dim=0) + samples["T_i"] = torch.cat((samples["T_i"], tmp["T_i"][::sample_interval, :]), dim=0) + samples["colors"] = torch.cat((samples["colors"], tmp["colors"][::sample_interval, :]), dim=0) + samples["sf_prev"] = torch.cat((samples["sf_prev"], tmp["sf_prev"][::sample_interval, :]), dim=0) + samples["sf_post"] = torch.cat((samples["sf_post"], tmp["sf_post"][::sample_interval, :]), dim=0) + num_samples_per_image.append(samples["times"].shape[0]) else: + list_of_candidates = list(range(tmp["sals"].shape[0])) + number_of_items_to_pick = len(list_of_candidates) // sample_interval + probability_distribution = tmp["sals"][:, 0].numpy() + #probability_distribution = torch.norm(probability_distribution, dim=0).numpy() + probability_distribution = probability_distribution / np.sum(probability_distribution) + probability_distribution = probability_distribution.tolist() + gap = 1.0 - sum(probability_distribution) + tmp_id = 0 + while probability_distribution[tmp_id] < -gap: + tmp_id += 1 + probability_distribution[tmp_id] += gap - samples["times"] = torch.cat((samples["times"], torch.ones_like(tmp["sals"])[::sample_interval, :] * img_idx_embed), dim=0) - samples["dinos"] = torch.cat((samples["dinos"], tmp["dinos"][::sample_interval, :].cpu()), dim=0) - samples["sals"] = torch.cat((samples["sals"], tmp["sals"][::sample_interval, :]), dim=0) - samples["points"] = torch.cat((samples["points"], tmp["points"][::sample_interval, :]), dim=0) - samples["dy"] = torch.cat((samples["dy"], tmp["dy"][::sample_interval, :]), dim=0) - samples["colors"] = torch.cat((samples["colors"], tmp["colors"][::sample_interval, :]), dim=0) - samples["sf_prev"] = torch.cat((samples["sf_prev"], tmp["sf_prev"][::sample_interval, :]), dim=0) - samples["sf_post"] = torch.cat((samples["sf_post"], tmp["sf_post"][::sample_interval, :]), dim=0) - num_samples_per_image.append(samples["times"].shape[0]) + #probability_distribution /= sum(probability_distribution) + sampled_ind = np.random.choice(list_of_candidates, number_of_items_to_pick, p=probability_distribution) + + + if i == 0 or samples["dinos"] is None: + samples["times"] = torch.ones_like(tmp["sals"])[sampled_ind, :] * img_idx_embed + samples["dinos"] = tmp["dinos"][sampled_ind, :].cpu() + samples["sals"] = tmp["sals"][sampled_ind, :] + samples["points"] = tmp["points"][sampled_ind, :] + samples["dy"] = tmp["dy"][sampled_ind, :] + samples["T_i"] = tmp["T_i"][sampled_ind, :] + samples["colors"] = tmp["colors"][sampled_ind, :] + samples["sf_prev"] = tmp["sf_prev"][sampled_ind, :] + samples["sf_post"] = tmp["sf_post"][sampled_ind, :] + num_samples_per_image.append(samples["times"].shape[0]) + else: + samples["times"] = torch.cat((samples["times"], torch.ones_like(tmp["sals"])[sampled_ind, :] * img_idx_embed), dim=0) + samples["dinos"] = torch.cat((samples["dinos"], tmp["dinos"][sampled_ind, :].cpu()), dim=0) + samples["sals"] = torch.cat((samples["sals"], tmp["sals"][sampled_ind, :]), dim=0) + samples["points"] = torch.cat((samples["points"], tmp["points"][sampled_ind, :]), dim=0) + samples["dy"] = torch.cat((samples["dy"], tmp["dy"][sampled_ind, :]), dim=0) + samples["T_i"] = torch.cat((samples["T_i"], tmp["T_i"][sampled_ind, :]), dim=0) + samples["colors"] = torch.cat((samples["colors"], tmp["colors"][sampled_ind, :]), dim=0) + samples["sf_prev"] = torch.cat((samples["sf_prev"], tmp["sf_prev"][sampled_ind, :]), dim=0) + samples["sf_post"] = torch.cat((samples["sf_post"], tmp["sf_post"][sampled_ind, :]), dim=0) + num_samples_per_image.append(samples["times"].shape[0]) for key in tmp: tmp[key] = None for key in ret: ret[key] = None torch.cuda.empty_cache() + + if render_mode and ((i==render_poses.shape[0] -1) or (i % 20 == 19)): + assert False, "Not implemented yet" + + if render_mode: + assert False, "Not implemented yet" + ## not working!!! as dino values are too far away from normalized! @@ -1497,15 +2050,17 @@ def cluster_pcd(render_poses, #torch.save(samples["dinos"] / normed, os.path.join(savedir, "dino_normalizer.pt") ) samples["dinos"] = normed + #samples["T_i"] = samples["T_i"].numpy() + #samples["sals"] = 2.*samples["sals"] - 1. # points are roughly between -1 and 1; no modification for now #assert False, [torch.max(samples["points"][:, 0]), torch.min(samples["points"][:, 0]), # torch.max(samples["points"][:, 1]), torch.min(samples["points"][:, 1]), # torch.max(samples["points"][:, 2]), torch.min(samples["points"][:, 2]),] #samples["points"] /= math.sqrt(samples["points"].shape[-1]) - normed = torch.nn.functional.normalize(samples["points"], dim=-1) + #normed = torch.nn.functional.normalize(samples["points"], dim=-1) #torch.save(samples["points"] / normed, os.path.join(savedir, "point_normalizer.pt") ) - samples["points"] = normed + #samples["points"] = normed #assert False, samples["dinos"].shape #assert False, [(k, samples[k].shape) for k in samples] @@ -1513,12 +2068,13 @@ def cluster_pcd(render_poses, #torch.nn.functional.normalize(samples["colors"], dim=-1), samples["dinos"]*dino_weight, # samples["sals"], - samples["dy"], - torch.nn.functional.normalize(samples["sf_prev"], dim=-1) * flow_weight, - torch.nn.functional.normalize(samples["sf_post"], dim=-1) * flow_weight, - samples["points"], - samples["times"],], - dim=-1).numpy() + # samples["dy"], + # torch.nn.functional.normalize(samples["sf_prev"], dim=-1) * flow_weight, + # torch.nn.functional.normalize(samples["sf_post"], dim=-1) * flow_weight, + # samples["points"], + # samples["times"], + ], + dim=-1).cpu().numpy().astype(np.float32) #ngpus = faiss.get_num_gpus() #assert False, ngpus if True or quant_index is None or index is None or salient_labels is None: @@ -1550,11 +2106,12 @@ def cluster_pcd(render_poses, assert False, avg_relative_error ''' # faiss Kmeans - algorithm = faiss.Kmeans(d=feature.shape[-1], k=n_cluster, gpu=True, niter=300, nredo=10, seed=1234, verbose=False) - algorithm.train(feature) - faiss.write_index(faiss.index_gpu_to_cpu(algorithm.index), os.path.join(savedir, "large.index")) - _, labels = algorithm.index.search(feature, 1) - index = algorithm.index + #algorithm = faiss.Kmeans(d=feature.shape[-1], k=n_cluster, gpu=False, niter=300, nredo=10, seed=1234, verbose=False) + #algorithm.train(feature) + #_, labels = algorithm.index.search(feature, 1) + #faiss.write_index(faiss.index_gpu_to_cpu(algorithm.index), os.path.join(savedir, "large.index")) + #_, labels = algorithm.index.search(feature, 1) + #index = algorithm.index # sklearn spectralclustering # not working as too big an array @@ -1568,28 +2125,132 @@ def cluster_pcd(render_poses, #pickle.dump(clustering, open(os.path.join(savedir, "save.pkl"), "wb")) #n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) #assert False, [n_clusters_, labels.shape] + sum_of_squared_dists = [] + n_cluster_range = list(range(1, n_cluster)) + for n_clu in tqdm(n_cluster_range): + #algorithm = faiss.Kmeans(d=feature.shape[-1], k=n_clu, gpu=True, niter=300, nredo=10, seed=1234, verbose=False) + algorithm = faiss.Kmeans(d=feature.shape[-1], k=n_clu, gpu=False, niter=300, nredo=10, seed=1234, verbose=False) + algorithm.train(feature) + squared_distances, labels = algorithm.index.search(feature, 1) + objective = squared_distances.sum() + sum_of_squared_dists.append(objective / feature.shape[0]) + if (len(sum_of_squared_dists) > 1 and sum_of_squared_dists[-1] > elbow * sum_of_squared_dists[-2]): + break + faiss.write_index(algorithm.index, os.path.join(savedir, "large.index")) + + num_labels = np.max(n_clu) + 1 labels_per_image = np.split(labels, num_samples_per_image)[:num_img] - #assert False, num_samples_per_image - #assert False, [len(labels_per_image), [label_per_image.shape for label_per_image in labels_per_image]] - saliency_maps_list = np.split(samples["dy"], num_samples_per_image)[:num_img] + #print(labels_per_image[0].shape) + + centroids = algorithm.centroids + #centroids = np.linalg.norm(centroids, axis=0) + #assert False, centroids.shape + sims = -np.ones((len(centroids), len(centroids))) + assert samples["dinos"].shape[-1] == 64 + for c1 in range(len(centroids)): + item_1 = centroids[c1][:64] + for c2 in range(c1+1, len(centroids)): + item_2 = centroids[c2][:64] + sims[c1, c2] = np.dot(item_1, item_2) / (np.linalg.norm(item_1) * np.linalg.norm(item_2)) + print(c1, c2, sims[c1, c2]) + label_mapper = {} + #print(salient_labels) + for c2 in range(len(centroids)): + for c1 in range(c2): + if sims[c1, c2] > similarity_thresh: + label_mapper[c2] = c1 + break + pickle.dump(label_mapper, open(os.path.join(savedir, "label_mapper.pkl"), 'wb')) + #assert False + #print(np.unique(labels)) + for key in label_mapper: + print(key, label_mapper[key]) + #old_labels_per_image = copy.deepcopy(labels_per_image) + for c1 in range(len(centroids)): + key = len(centroids) - c1 - 1 + if key in label_mapper: + labels[labels == key] = label_mapper[key] + #labels = labels[:, None] + #assert False, np.unique(labels) - votes = np.zeros(n_cluster) + labels_per_image = np.split(labels, num_samples_per_image)[:num_img] + #assert False, labels_per_image[0].shape + ''' + point_cloud = o3d.geometry.PointCloud() + point_cloud.points = o3d.utility.Vector3dVector(np.split(samples["points"].numpy(), num_samples_per_image)[:num_img][0]) # array_of_points.shape = (N,3) + point_cloud.colors = o3d.utility.Vector3dVector(d3_41_colors_rgb[labels_per_image[0][..., 0]]/255.) # array_of_colors.shape = (N,3) + o3d.io.write_point_cloud("test.ply", point_cloud) + assert False, [d3_41_colors_rgb[labels_per_image[0][..., 0]].shape, np.split(samples["points"].numpy(), num_samples_per_image)[:num_img][0].shape] + ''' + + + + + #assert False, num_samples_per_image + #assert False, [len(labels_per_image), [label_per_image.shape for label_per_image in labels_per_image]] + saliency_maps_list = np.split(samples["sals"], num_samples_per_image)[:num_img] + #trans_maps_list = np.split(samples["T_i"], num_samples_per_image)[:num_img] + if use_motion_filter: + samples["sf_post"] = torch.norm(samples["sf_post"], dim=-1) + samples["sf_prev"] = torch.norm(samples["sf_prev"], dim=-1) + ''' + assert False, [ + torch.max(samples["sf_post"]), torch.min(samples["sf_post"]),torch.quantile(samples["sf_post"], 0.5),\ + torch.max(samples["sf_prev"]), torch.min(samples["sf_prev"]),\ + torch.quantile(samples["sf_prev"], 0.5), + ] + ''' + + + saliency_maps_list_prev = np.split(samples["sf_prev"], num_samples_per_image)[:num_img] + saliency_maps_list_post = np.split(samples["sf_post"], num_samples_per_image)[:num_img] + saliency_maps_list = [] + for flow_idx in range(num_img): + if flow_idx == 0: + saliency_maps_list.append(saliency_maps_list_post[0]) + elif flow_idx == num_img-1: + saliency_maps_list.append(saliency_maps_list_prev[num_img-1]) + else: + should_change = saliency_maps_list_post[flow_idx] > saliency_maps_list_prev[flow_idx] + saliency_maps_list_prev[flow_idx][should_change] = saliency_maps_list_post[flow_idx][should_change] + saliency_maps_list.append(saliency_maps_list_prev[flow_idx]) + #print(np.max(saliency_maps_list[-1]), np.min(saliency_maps_list[-1]),\ + #np.quantile(saliency_maps_list[-1], 0.5)) + votes = np.zeros(num_labels) for image_labels, saliency_map in zip(labels_per_image, saliency_maps_list): - for label in range(n_cluster): + for label in np.unique(labels): #votes[label] += 1 #continue + #label_saliency = saliency_map[image_labels[:, 0] == label].mean() + #print( + # label, label_saliency) if np.any(image_labels[:, 0] == label): - label_saliency = (saliency_map[image_labels[:, 0] == label]).mean() - else: - label_saliency = 0 - print(label, label_saliency) - if label_saliency > thresh: - votes[label] += 1 + label_saliency = saliency_map[image_labels[:, 0] == label].mean() + print(label, label_saliency, + np.quantile(saliency_map[image_labels[:, 0] == label], 0.7),\ + np.quantile(saliency_map[image_labels[:, 0] == label], 0.5)) + #if np.quantile(saliency_map[image_labels[:, 0] == label], 0.7) > thresh: + # votes[label] += 1 + if label_saliency > thresh: + votes[label] += 1 #assert False, votes + print(votes) salient_labels = np.where(votes >= np.ceil(num_img * votes_percentage / 100)) + with open(os.path.join(savedir, "salient.npy"), "wb") as f: + np.save(f, salient_labels) + print(salient_labels) + labels_per_image[0][~np.isin(labels_per_image[0], salient_labels)] = -1 + print(np.unique(labels_per_image[0])) + point_cloud = o3d.geometry.PointCloud() + point_cloud.points = o3d.utility.Vector3dVector(np.split(samples["points"].numpy(), num_samples_per_image)[:num_img][0]) # array_of_points.shape = (N,3) + point_cloud.colors = o3d.utility.Vector3dVector(d3_41_colors_rgb[labels_per_image[0][..., 0]]/255.) # array_of_colors.shape = (N,3) + o3d.io.write_point_cloud("test.ply", point_cloud) + assert False, [d3_41_colors_rgb[labels_per_image[0][..., 0]].shape, np.split(samples["points"].numpy(), num_samples_per_image)[:num_img][0].shape] + + #salient_labels = np.unique(labels) with open(os.path.join(savedir, "saliency.npy"), "wb") as f: np.save(f, salient_labels) @@ -1896,8 +2557,8 @@ def cluster_2D(render_poses, if render_factor!=0: # Render downsampled for speed - H = H//render_factor - W = W//render_factor + H = int(H/render_factor) + W = int(W//render_factor) focal = focal/render_factor #assert False, [H, W, focal] t = time.time() @@ -1932,14 +2593,27 @@ def cluster_2D(render_poses, } - if render_mode: - render_poses = np.concatenate([render_poses, np.repeat(render_poses[:1], 24, axis=0), render_poses[:12]], axis=0) - #assert False, render_poses.shape - times = list(range(24)) + list(range(24)) + [0]*12 + if len(render_poses) == 48: + render_poses = render_poses[:48] + num_img = float(render_poses.shape[0]) + if render_mode: + render_poses = np.concatenate([render_poses, np.repeat(render_poses[:1], 48, axis=0)], axis=0) + times = list(range(48)) + list(range(48)) + else: + render_poses = np.concatenate([render_poses[2*i:2*i+1] for i in range(24)], axis=0) + times = [2*i+1 for i in range(24)] + else: - times = list(range(24)) + render_poses = render_poses[:24] + num_img = float(render_poses.shape[0]) + if render_mode: + render_poses = np.concatenate([render_poses, np.repeat(render_poses[:1], 24, axis=0), render_poses[:12]], axis=0) + #assert False, render_poses.shape + times = list(range(24)) + list(range(24)) + [0]*12 + else: + times = list(range(24)) - num_img = 24. + #num_img = 24. #num_img = 2 '''store all non-opaque points and shuffle''' @@ -2207,7 +2881,10 @@ def write_label_image(img_clu, idx, name): for c2 in range(len(centroids)): for c1 in range(c2): if sims[c1, c2] > similarity_thresh: - label_mapper[c2] = c1 + if c1 in label_mapper: + label_mapper[c2] = label_mapper[c1] + else: + label_mapper[c2] = c1 break pickle.dump(label_mapper, open(os.path.join(savedir, "label_mapper.pkl"), 'wb')) #assert False @@ -3016,8 +3693,8 @@ def render_2D(render_poses, if render_factor!=0: # Render downsampled for speed - H = H//render_factor - W = W//render_factor + H = int(H/render_factor) + W = int(W/render_factor) focal = focal/render_factor #assert False, [H, W, focal] t = time.time() @@ -3044,12 +3721,19 @@ def render_2D(render_poses, } - - render_poses = np.concatenate([render_poses, np.repeat(render_poses[:1], 24, axis=0), render_poses[:12]], axis=0) - times = list(range(24)) + list(range(24)) + [0]*12 + if len(render_poses) == 48: + render_poses = render_poses[:48] + num_img = float(render_poses.shape[0]) + render_poses = np.concatenate([render_poses, np.repeat(render_poses[:1], 48, axis=0)], axis=0) + times = list(range(48)) + list(range(48)) + else: + render_poses = render_poses[:24] + num_img = float(render_poses.shape[0]) + render_poses = np.concatenate([render_poses, np.repeat(render_poses[:1], 24, axis=0), render_poses[:12]], axis=0) + times = list(range(24)) + list(range(24)) + [0]*12 - num_img = 24. + #num_img = 24. #num_img = 2 '''store all non-opaque points and shuffle''' diff --git a/Neural-Scene-Flow-Fields/nsff_exp/run_nerf.py b/Neural-Scene-Flow-Fields/nsff_exp/run_nerf.py index 3711836..2d6c476 100644 --- a/Neural-Scene-Flow-Fields/nsff_exp/run_nerf.py +++ b/Neural-Scene-Flow-Fields/nsff_exp/run_nerf.py @@ -57,6 +57,9 @@ def config_parser(): help="cluster point cloud in 3D ") parser.add_argument("--cluster_2D", action="store_true", help="cluster on 2D rendered result ") + parser.add_argument("--render_pcd", action="store_true", + help="render on 3D the clustering result ") + parser.add_argument("--cluster_2D_flow", action="store_true", help="cluster on 2D rendered result with optical flow") @@ -80,7 +83,8 @@ def config_parser(): parser.add_argument("--render_2D", action="store_true", help="Store 2D rendering result") - + parser.add_argument("--use_pcd", action="store_true", + ) parser.add_argument("--final_height", type=int, default=288, help='training image height, default is 512x288') @@ -133,7 +137,7 @@ def config_parser(): parser.add_argument("--render_test", action='store_true', help='do not optimize, reload weights and render out render_poses path') - parser.add_argument("--render_factor", type=int, default=0, + parser.add_argument("--render_factor", type=float, default=0, help='downsampling factor to speed up rendering, set 4 or 8 for fast preview') # dataset options @@ -251,7 +255,7 @@ def config_parser(): def train(): - + #assert False, "Pause" parser = config_parser() args = parser.parse_args() @@ -271,7 +275,9 @@ def train(): poses = poses[:,:3,:4] print('Loaded llff', images.shape, render_poses.shape, hwf, args.datadir) - if args.use_multi_dino_single_sal: + if args.render_slowmo_full: + pass + elif args.use_multi_dino_single_sal: # a version of multiresolution assert args.dino_coe >0, "has to make sure dino is being used" assert args.prep_dino, "Has to make sure dim is small enough other wise explode cpu/gpu" @@ -623,7 +629,13 @@ def train(): start = None - i_test = [] + print("need to filter out 24 images for training if #images is not 24!") + print("if 24, means NSFF; if not 24, means DyCheck") + #assert False, images.shape + if len(images) == 48: + i_test = [2*i+1 for i in range(24)] + else: + i_test = [] i_val = [] #i_test i_train = np.array([i for i in np.arange(int(images.shape[0])) if (i not in i_test and i not in i_val)]) @@ -666,9 +678,11 @@ def train(): f = os.path.join(basedir, expname, 'config.txt') with open(f, 'w') as file: file.write(open(args.config, 'r').read()) - + print("creating nerf model...") + + #assert False # Create nerf model - render_kwargs_train, render_kwargs_test, start, grad_vars, optimizer = create_nerf(feats.shape[-1] if args.dino_coe > 0 else 0, args) + render_kwargs_train, render_kwargs_test, start, grad_vars, optimizer = create_nerf(args.n_components if args.dino_coe > 0 else 0, args) global_step = start bds_dict = { @@ -744,7 +758,7 @@ def train(): return if args.render_slowmo_full: - assert False, "axis may be wrong due to saliency channel!!!" + #assert False, "axis may be wrong due to saliency channel!!!" print('RENDER SLOW MOTION') curr_ts = 0 render_poses = poses #torch.Tensor(poses).to(device) @@ -897,6 +911,8 @@ def train(): testsavedir += "_pos" if args.use_time: testsavedir += "_time" + if args.use_pcd: + testsavedir += "_pcd" os.makedirs(testsavedir, exist_ok=True) #assert args.load_algo != '' and os.path.exists(args.load_algo), "must have valid cluster stored" #assert False, [load_algo, n_clusters] @@ -931,6 +947,48 @@ def train(): index = None salient_labels = None label_mapper = None + if args.use_pcd: + try: + #index = faiss.index_cpu_to_gpu(res, 0, faiss.read_index(os.path.join(basedir, expname, 'cluster_2D-%03d'%\ + # target_idx + '_{}_{:06d}'.format('test' if args.render_test else 'path', start), "large.index"))) + pcdsavedir = os.path.join(basedir, expname, + 'cluster_pcd-%03d'%\ + target_idx + '_{}_{:06d}'.format('test' if args.render_test else 'path', start)) + + index = faiss.read_index(os.path.join(pcdsavedir, "large.index")) + #print("I am here!") + salient_labels = np.load(os.path.join(pcdsavedir, "salient.npy")) + #print("I am here!") + if not args.no_merge: + #assert label_mapper is not None + label_mapper = pickle.load(open(os.path.join(pcdsavedir, "label_mapper.pkl"), "rb")) + else: + label_mapper = None + #print("I am here!") + except: + index = None + salient_labels = None + label_mapper = None + + if args.render_pcd: + assert index is not None + assert salient_labels is not None + if not args.no_merge: + assert label_mapper is not None + print("painting point clouds") + render_pcd(render_poses, + hwf, args.chunk, render_kwargs_test, + dino_weight=args.dino_weight, + flow_weight=args.flow_weight, + index = index, + salient_labels = salient_labels, + label_mapper = label_mapper, + render_factor=args.render_factor, + + + ) + return + if args.render_mode: assert index is not None assert salient_labels is not None diff --git a/README.md b/README.md index 02c6702..7951a52 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,324 @@ -# NOF -Neural Object Flow +# Semantic Attention Flow Fields for Monocular Dynamic Scene Decomposition (ICCV 2023) -Referred to code: https://github.com/gaochen315/DynamicNeRF +[Yiqing Liang](lynl7130.github.io), [Eliot Laidlaw](https://www.linkedin.com/in/eliot-laidlaw-472640197/), [Alexander Meyerowitz](https://www.linkedin.com/in/ameyerow/), [Srinath Sridhar](https://cs.brown.edu/people/ssrinath/), [James Tompkin](https://jamestompkin.com/) + +Official Implementation for "Semantic Attention Flow Fields for Monocular Dynamic Scene Decomposition". + +[[```Paper```](https://arxiv.org/abs/2303.01526)] [[```Project```](https://visual.cs.brown.edu/projects/semantic-attention-flow-fields-webpage/)] [[```Data```](#download-data-checkpoints-and-results)] + +## Installation + +Tested with System Spec: + +``` +python==3.7.4 +cmake==3.20.0 +gcc==10.2 +cuda==11.3.1 +cudnn==8.2.0 +``` + +Steps: +``` +python3 -m venv +source /bin/activate + +pip install --upgrade pip + +pip install -r [path/to/repo]/requirements.txt + +# install eigen using apt-get if can; otherwise compile from source: +cd +wget https://gitlab.com/libeigen/eigen/-/archive/master/eigen-master.tar.gz +tar -xzf eigen-master.tar.gz +cd eigen-master +rm -rf build +mkdir build +cd build +cmake .. -DCMAKE_INSTALL_PREFIX= +make -j8 +make install + +#install pydensecrf +pip3 install --force-reinstall cython==0.29.36 +pip install --no-build-isolation git+https://github.com/lucasb-eyer/pydensecrf.git + +``` + +We tested our code with 1 single RTX3090 / A6000. + +## Download Data, Checkpoints and Results + +[Google Drive Link](https://drive.google.com/drive/folders/1dlO7TmUefTX97qtByMIDCnckCzgTjobC?usp=sharing) + + +```gt_masks.zip```: segmentation annotation for NVIDIA dataset + +```ours_1018_processed_crf.zip```: final results on NVIDIA dataset + +```nvidia_data_full.zip```: processed NVIDIA dataset for training and testing + +```checkpoints.zip```: trained models for NIVIDIA dataset + + + +## Data, Configurations and Checkpoints + +| Scene | Data | Config | Checkpoint | +|---|---|---|---| +| Balloon1-2 | nvidia_data_full/Balloon1-2 | https://github.com/brownvc/NOF/blob/camera_ready/Neural-Scene-Flow-Fields/nsff_exp/configs/config_balloon1-2_4.txt | checkpoints/Balloon1-2/360000.tar | +| Balloon2-2 | nvidia_data_full/Balloon2-2 | https://github.com/brownvc/NOF/blob/camera_ready/Neural-Scene-Flow-Fields/nsff_exp/configs/config_balloon2-2_4.txt | checkpoints/Balloon2-2/360000.tar | +| DynamicFace-2 | nvidia_data_full/DynamicFace-2 | https://github.com/brownvc/NOF/blob/camera_ready/Neural-Scene-Flow-Fields/nsff_exp/configs/config_dynamicFace-2_4.txt | checkpoints/DynamicFace-2/360000.tar | +| Jumping | nvidia_data_full/Jumping | https://github.com/brownvc/NOF/blob/camera_ready/Neural-Scene-Flow-Fields/nsff_exp/configs/config_jumping_4.txt | checkpoints/Jumping/360000.tar | +| Playground | nvidia_data_full/playground | https://github.com/brownvc/NOF/blob/camera_ready/Neural-Scene-Flow-Fields/nsff_exp/configs/config_playground_4.txt | checkpoints/Playground/360000.tar | +| Skating-2 | nvidia_data_full/Skating-2 | https://github.com/brownvc/NOF/blob/camera_ready/Neural-Scene-Flow-Fields/nsff_exp/configs/config_skating-2_4.txt | checkpoints/Skating-2/360000.tar | +| Truck | nvidia_data_full/Truck-2 | https://github.com/brownvc/NOF/blob/camera_ready/Neural-Scene-Flow-Fields/nsff_exp/configs/config_truck2_4.txt | checkpoints/Truck/360000.tar | +| Umbrella | nvidia_data_full/Umbrella | https://github.com/brownvc/NOF/blob/camera_ready/Neural-Scene-Flow-Fields/nsff_exp/configs/config_umbrella_4.txt | checkpoints/Umbrella/360000.tar | + +For usage, Data and Checkpoints should be put to corresponding location according to config files. + +```cd [path/to/repo]/Neural-Scene-Flow-Fields/``` + +Data folder should be renamed as ```datadir```. + +Checkpoint should be put under ```basedir/expname```. + + + + +## Workflow + +```cd [path/to/repo]/Neural-Scene-Flow-Fields/nsff_exp``` + +### Training +```python run_nerf.py --config [path/to/config/file]``` + +For now, each scene(```[path/to/config/file]```)'s corresponding results is stored to: +``` +├── [path/to/repo]/Neural-Scene-Flow-Fields/nsff_exp/logs/[your/exp/name]_F[start_frame]_[end_frame]/ +│ ├── 000000.tar +│ ├── 010000.tar +│ ├── ... +│ ├── 360000.tar +│ ├── args.txt +│ ├── config.txt +``` + +### After Training, render per-view +```python run_nerf.py --config [path/to/config/file] --render_2D``` + +This would create a new folder under scene folder: +``` +├── [path/to/repo]/Neural-Scene-Flow-Fields/nsff_exp/logs/[your/exp/name]_F[start_frame]_[end_frame]/ +| ├──render_2D-010_path_[last_ckpt_id+1] +| ├──0_blend.png +| ├──0_depth.png +| ├──0_dino.pt +| ├──0_rgb.png +| ├──0_sal.png +| ├──... +``` + +#### Split (Shared by all sections): +* ```0-23```: training views + +* ```24-47```: fixed camera 0, moving times + +* ```48-59```: moving camera, fixed time = 0 + +### After Training, cluster per-view +```python run_nerf.py --config [path/to/config/file] --cluster_2D``` + +```python run_nerf.py --config [path/to/config/file] --cluster_2D --render_mode``` + +This would create a new folder under scene folder: +``` +├── [path/to/repo]/Neural-Scene-Flow-Fields/nsff_exp/logs/[your/exp/name]_F[start_frame]_[end_frame]/ +| ├──cluster_2D-010_path_[last_ckpt_id+1] +| ├──final +| ├──0.png +| ├──1.png +| ├──... +| ├──no_merge_no_salient +| ├──0.png +| ├──1.png +| ├──... +| ├──no_salient +| ├──0.png +| ├──1.png +| ├──... +| ├──0_bld_full_beforemerge.png +| ├──0_bld_full.png +| ├──0_bld.pt +| ├──0_clu_full_beforemerge.png +| ├──0_clu_full.png +| ├──0_clu.png +| ├──... +``` + +### Oracle Vote Using GT mask + +```python postprocess_oracle.py --raw_folder [path/to/repo]/Neural-Scene-Flow-Fields/nsff_exp/logs/[your/exp/name]_F[start_frame]_[end_frame]/cluster_2D-010_path_[last_ckpt_id+1]/no_salient --gt_folder [path/to/this/scene/mask] [--flip_fg]``` + +If use Black to denote background, need ```--flip_fg``` flag. + +This would create a new folder ```oracle``` under ```no_salient``` folder. + +### CRF postprocessing + +Note: ```[path/to/your/final/result]``` could be your default clustering result folder ```[path/to/repo]/Neural-Scene-Flow-Fields/nsff_exp/logs/[your/exp/name]_F[start_frame]_[end_frame]/cluster_2D-010_path_[last_ckpt_id+1]/final``` or oracle processed folder ```[path/to/repo]/Neural-Scene-Flow-Fields/nsff_exp/logs/[your/exp/name]_F[start_frame]_[end_frame]/cluster_2D-010_path_[last_ckpt_id+1]/no_salient/oracle```. + +```python postprocess_per_scene.py --root_dir [path/to/your/final/result]``` + +```python postprocess_crf_per_scene.py --root_dir [path/to/your/final/result]_processed --render_dir [path/to/repo]/Neural-Scene-Flow-Fields/nsff_exp/logs/[your/exp/name]_F[start_frame]_[end_frame]/render_2D-010_path_[last_ckpt_id+1]``` + + + +## Collect Results for NVIDIA dataset + + +First organize all scene's final clustering result(```[path/to/repo]/Neural-Scene-Flow-Fields/nsff_exp/logs/[your/exp/name]_F[start_frame]_[end_frame]/cluster_2D-010_path_[last_ckpt_id+1]/final```) and final rgb rendering result(```[path/to/repo]/Neural-Scene-Flow-Fields/nsff_exp/logs/[your/exp/name]_F[start_frame]_[end_frame]/render_2D-010_path_[last_ckpt_id+1]/*_rgb.png```) under the same folder: +``` +├── [path/to/results] +| ├──Balloon1-2 +| ├──0.png # cluster result +| ├──0_rgb.png # rgb result +| ├──... +| ├──Balloon2-2 +| ├──DynamicFace-2 +| ├──Jumping +| ├──playground +| ├──Skating-2 +| ├──Truck-2 +| ├──Umbrella-2 +``` + + +### PostProcessing (NVIDIA dataset only) + +#### NOTE: +for now only supports NVIDIA scene organization! + +```python postprocess.py --root_dir [path/to/results]``` + +```python postprocess_crf.py --root_dir [path/to/results]_processed``` + +Now the results would be stored in folder ```[path/to/results]_processed_crf```. + +### Evaluation (NVIDIA dataset only) +for now only supports NVIDIA scene organization! + +```cd [path/to/repo]/benchmarks``` + +```python evaluate_fg_ours.py --vis_folder [path/to/results]_processed_crf --gt_folder [path/to/gt_masks]``` + +```python evaluate_ours.py --vis_folder [path/to/results]_processed_crf --gt_folder [path/to/gt_masks]``` + +## Use SAFF on your own data + +### Step 1. Data prepration +Collect rgb image sequence and corresponding dynamic masks, organize like: +``` +├── [path/to/data] +| ├──images +| ├──00000.png +| ├──00001.png +| ├──... +| ├──colmap_masks +| ├──00000.png.png +| ├──00001.png.png +| ├──... +``` +Note: Naming has to follow ```%05d``` format! + +```colmap_masks``` is where we store foreground masks. Dynamic Foreground -> black. Static Background -> white. + +### Step 2. run COLMAP + +Make sure you have colmap installed on your machine. + +For example, on Mac, run +```sudo zsh colmap.sh [path/to/data]``` + +After running, the same data folder looks like: +``` +├── [path/to/data] +| ├──images +| ├──... +| ├──colmap_masks +| ├──... +| ├──dense +| ├──... +| ├──sparse +| ├──... +| ├──database.db +| ├──database.db-shm +| ├──database.db-wal +``` + +```[path/to/data]/dense``` is the final data we want. + +### Step 3. calculate pseudo depth and optical flow for supervision + +```cd [path/to/repo]/Neural-Scene-Flow-Fields/nsff_scripts``` + +```python save_poses_nerf.py --data_path "[path/to/data]/dense/"``` + +Download single view depth prediction model ```model.pt``` from [link](https://drive.google.com/drive/folders/1G-NFZKEA8KSWojUKecpJPVoq5XCjBLOV?usp=sharing), and put it under the folder ```nsff_scripts```. + +```python run_midas.py --data_path "[path/to/data]/dense/" [--resize_height ???] # use resize_height if data is over big for SAFF ``` + +Download RAFT model ```raft-things.pth``` from [link](https://drive.google.com/drive/folders/1sWDsfuZ3Up38EUQt7-JDTT1HcGHuJgvT?usp=sharing), and put it under the folder ```nsff_scripts/models```. + +```python run_flows_video.py --model models/raft-things.pth --data_path [path/to/data]/dense/``` + + +After running, the same data folder looks like: +``` +├── [path/to/data] +| ├──images +| ├──... +| ├──colmap_masks +| ├──... +| ├──dense +| ├──disp +| ├──... +| ├──flow_i1 +| ├──... +| ├──images +| ├──... +| ├──images_306x400 +| ├──... +| ├──motion_masks +| ├──... +| ├──sparse +| ├──... +| ├──stereo +| ├──... +| ├──poses_bounds.npy +| ├──run-colmap-geometric.sh +| ├──run-colmap-photometric.sh +| ├──scene.json +| ├──sparse +| ├──... +| ├──database.db +| ├──database.db-shm +| ├──database.db-wal +``` + +### Step 4. Create Config file + +copy config file [template](https://github.com/brownvc/NOF/blob/camera_ready/Neural-Scene-Flow-Fields/nsff_exp/configs/config_balloon1-2_4.txt), and change the value of field: +* ```expname```: to be your expname +* ```datadir```: to your [path/to/data]/dense +* ```final_height```: this must be same as --resize_height argument in run_midas.py +* ```start_frame```, ```end_frame```: which images would +participate in training according to image id. + +Note: if end_frame - start_frame == 48, would filter out half of the images for testing. (for DyCheck's sake) + +#### Then you are all set! + +## Reference +* https://github.com/gaochen315/DynamicNeRF +* https://github.com/zhengqili/Neural-Scene-Flow-Fields/tree/main diff --git a/benchmarks/evaluate_fg_ours.py b/benchmarks/evaluate_fg_ours.py index e55d200..d8ebbc3 100644 --- a/benchmarks/evaluate_fg_ours.py +++ b/benchmarks/evaluate_fg_ours.py @@ -6,11 +6,22 @@ import torch import json +def config_parser(): + import configargparse + parser = configargparse.ArgumentParser() + parser.add_argument("--vis_folder", type=str, default="../data/ours_1018_processed_crf") + parser.add_argument("--gt_folder", type=str, default="../data/gt_masks") + #parser.add_argument("--compact_rgb", type=str, default="20") + #parser.add_argument("--sdim_depth", type=str, default="40") + #parser.add_argument("--sdim_rgb", type=str, default="20") + return parser + if __name__ == '__main__': - vis_folder = "/users/yliang51/data/yliang51/NOF/data/ours_1018_processed_crf" - gt_folder = "/users/yliang51/data/yliang51/NOF/data/gt_masks" - + #vis_folder = "/users/yliang51/data/yliang51/NOF/data/ours_1018_processed_crf" + vis_folder = args.vis_folder + #gt_folder = "/users/yliang51/data/yliang51/NOF/data/gt_masks" + gt_folder = args.gt_folder result = { } diff --git a/benchmarks/evaluate_fg_ours_per_scene.py b/benchmarks/evaluate_fg_ours_per_scene.py new file mode 100644 index 0000000..59e6d11 --- /dev/null +++ b/benchmarks/evaluate_fg_ours_per_scene.py @@ -0,0 +1,110 @@ +import argparse +import numpy as np +import cv2 +from metrics import * +import os +import torch +import json + +def config_parser(): + import configargparse + parser = configargparse.ArgumentParser() + parser.add_argument("--vis_folder", type=str, default="../data/ours_1018_processed_crf") + parser.add_argument("--gt_folder", type=str, default="../data/gt_masks") + #parser.add_argument("--compact_rgb", type=str, default="20") + #parser.add_argument("--sdim_depth", type=str, default="40") + #parser.add_argument("--sdim_rgb", type=str, default="20") + return parser + +if __name__ == '__main__': + parser = config_parser() + args = parser.parse_args() + #vis_folder = "/users/yliang51/data/yliang51/NOF/data/ours_1018_processed_crf" + vis_folder = args.vis_folder + #gt_folder = "/users/yliang51/data/yliang51/NOF/data/gt_masks" + gt_folder = args.gt_folder + result = { + + } + #scenes = ["Balloon1-2", "Balloon2-2", "DynamicFace-2", "Jumping", "playground", "Skating-2", "Truck-2", "Umbrella"] + #for scene in scenes: + #print(os.path.isdir(os.path.join(gt_folder, scene))) + + result = { + "training": { + "Jaccard": [], + }, + "test": { + "Jaccard": [], + } + } + # training images + #with open(os.path.join(vis_folder, f"{scene}_train.npy"), 'rb') as f: + # pred_masks = np.load(f) + #assert False, [pred_masks.shape, np.unique(pred_masks)] + for num in range(24): + gt_mask = cv2.imread(os.path.join(gt_folder, "%05d.png.png" % (2*num)), cv2.IMREAD_GRAYSCALE) + assert gt_mask is not None, os.path.join(gt_folder, "%05d.png.png" % (2*num)) + gt_mask = gt_mask.astype(int) + #assert False, np.unique(gt_mask) + gt_mask[gt_mask > 0] = -1 + #if num == 0: + # pred_masks = torch.from_numpy(pred_masks)[:, None, :, :] + # pred_masks = torch.nn.functional.interpolate(pred_masks, size=(gt_mask.shape[0], gt_mask.shape[1]), mode='nearest').numpy()[:, 0, :, :] + #pred_mask = pred_masks[num] + pred_mask = cv2.imread(os.path.join(vis_folder, f"{2*num}.png")) + unique_colors = np.unique(pred_mask.reshape((-1, 3)), axis=0)[:,None, None, :] + ids = list(range(len(unique_colors))) + tmp = np.zeros_like(pred_mask).astype(int)-1 + for color, idx in zip(unique_colors, ids): + #assert False, [pred_mask.shape, color.shape] + #print(color) + if color[0][0][0] == 0 and color[0][0][1] == 0 and color[0][0][2] == 0: + continue + tmp[pred_mask == color] = idx + pred_mask = tmp[..., 0] + #print(np.all(gt_mask == pred_mask)) + + #assert False, [gt_mask.shape, fg_mask.shape, pred_mask.shape] + #print(num) + #print("ARI: ", ARI(gt_mask, pred_mask)) + #print("fg-ARI: ", ARI(gt_mask, pred_mask, fg_mask)) + result["training"]["Jaccard"].append(compute_jaccard(gt_mask, pred_mask)) + result["training"]["mean-Jaccard"] = sum(result["training"]["Jaccard"]) / float(len(result["training"]["Jaccard"])) + + #assert False + # nv_spatial + #with open(os.path.join(vis_folder, f"{scene}_nv_spatial.npy"), 'rb') as f: + # pred_masks = np.load(f) + for num in range(24): + gt_mask = cv2.imread(os.path.join(gt_folder, "%05d.png.png" % (2*num+1)), cv2.IMREAD_GRAYSCALE) + gt_mask = gt_mask.astype(int) + gt_mask[gt_mask > 0] = -1 + #if num == 25: + # pred_masks = torch.from_numpy(pred_masks)[:, None, :, :] + # pred_masks = torch.nn.functional.interpolate(pred_masks, size=(gt_mask.shape[0], gt_mask.shape[1]), mode='nearest').numpy()[:, 0, :, :] + #pred_mask = pred_masks[num-25] + pred_mask = cv2.imread(os.path.join(vis_folder, f"{2*num+1}.png")) + unique_colors = np.unique(pred_mask.reshape((-1, 3)), axis=0)[:,None, None, :] + ids = list(range(len(unique_colors))) + tmp = np.zeros_like(pred_mask).astype(int)-1 + for color, idx in zip(unique_colors, ids): + #assert False, [pred_mask.shape, color.shape] + #print(color) + if color[0][0][0] == 0 and color[0][0][1] == 0 and color[0][0][2] == 0: + continue + tmp[pred_mask == color] = idx + pred_mask = tmp[..., 0] + + #assert False, [gt_mask.shape, fg_mask.shape, pred_mask.shape] + #print(num) + #print("ARI: ", ARI(gt_mask, pred_mask)) + #print("fg-ARI: ", ARI(gt_mask, pred_mask, fg_mask)) + result["test"]["Jaccard"].append(compute_jaccard(gt_mask, pred_mask)) + result["test"]["mean-Jaccard"] = sum(result["test"]["Jaccard"]) / float(len(result["test"]["Jaccard"])) + + + print("Saving to: " + os.path.join(vis_folder, "j_result.json")) + with open(os.path.join(vis_folder, "j_result.json"), 'w') as f: + json.dump(result, f, indent=4) + \ No newline at end of file diff --git a/benchmarks/evaluate_ours.py b/benchmarks/evaluate_ours.py index 9bf9c32..bf38f24 100644 --- a/benchmarks/evaluate_ours.py +++ b/benchmarks/evaluate_ours.py @@ -10,7 +10,9 @@ def config_parser(): import configargparse parser = configargparse.ArgumentParser() - parser.add_argument("--vis_folder", type=str, default="../data/ours_1018") + parser.add_argument("--vis_folder", type=str, default="../data/ours_1018_processed_crf") + parser.add_argument("--gt_folder", type=str, default="../data/gt_masks") + #parser.add_argument("--compact_rgb", type=str, default="20") #parser.add_argument("--sdim_depth", type=str, default="40") #parser.add_argument("--sdim_rgb", type=str, default="20") @@ -25,8 +27,8 @@ def config_parser(): #assert False, vis_folder #vis_folder = "../data/ours_1018-multisal-edge" - gt_folder = "../data/gt_masks" - + #gt_folder = "../data/gt_masks" + gt_folder = args.gt_folder result = { } diff --git a/benchmarks/evaluate_ours_per_scene.py b/benchmarks/evaluate_ours_per_scene.py new file mode 100644 index 0000000..d3bd35e --- /dev/null +++ b/benchmarks/evaluate_ours_per_scene.py @@ -0,0 +1,121 @@ +import argparse +import numpy as np +import cv2 +from metrics import * +import os +import torch +import json + + +def config_parser(): + import configargparse + parser = configargparse.ArgumentParser() + parser.add_argument("--vis_folder", type=str, default="../data/ours_1018_processed_crf") + parser.add_argument("--gt_folder", type=str, default="../data/gt_masks") + + #parser.add_argument("--compact_rgb", type=str, default="20") + #parser.add_argument("--sdim_depth", type=str, default="40") + #parser.add_argument("--sdim_rgb", type=str, default="20") + return parser + +if __name__ == '__main__': + + parser = config_parser() + args = parser.parse_args() + #vis_folder = "/users/yliang51/data/yliang51/NOF/data/ours_1018_processed_crf" + vis_folder = args.vis_folder + #assert False, vis_folder + + #vis_folder = "../data/ours_1018-multisal-edge" + #gt_folder = "../data/gt_masks" + gt_folder = args.gt_folder + result = { + + } + #scenes = ["Balloon1-2", "Balloon2-2", "DynamicFace-2", "Jumping", "playground", "Skating-2", "Truck-2", "Umbrella"] + #for scene in scenes: + #print(os.path.isdir(os.path.join(gt_folder, scene))) + + result= { + "training": { + "ARI": [], + "fg-ARI": [] + }, + "test": { + "ARI": [], + "fg-ARI": [] + }, + + } + # training images + #with open(os.path.join(vis_folder, f"{scene}_train.npy"), 'rb') as f: + # pred_masks = np.load(f) + #assert False, [pred_masks.shape, np.unique(pred_masks)] + for num in range(24): + gt_mask = cv2.imread(os.path.join(gt_folder, "%05d.png.png" % (2*num)), cv2.IMREAD_GRAYSCALE) + fg_mask = np.zeros(gt_mask.shape).astype(bool) + fg_mask[gt_mask < 10] = True + #if num == 0: + # pred_masks = torch.from_numpy(pred_masks)[:, None, :, :] + # pred_masks = torch.nn.functional.interpolate(pred_masks, size=(gt_mask.shape[0], gt_mask.shape[1]), mode='nearest').numpy()[:, 0, :, :] + #pred_mask = pred_masks[num] + pred_mask = cv2.imread(os.path.join(vis_folder, f"{2*num}.png")) + unique_colors = np.unique(pred_mask.reshape((-1, 3)), axis=0)[:,None, None, :] + ids = list(range(len(unique_colors))) + tmp = np.zeros_like(pred_mask).astype(int)-1 + for color, idx in zip(unique_colors, ids): + #assert False, [pred_mask.shape, color.shape] + #print(color) + if color[0][0][0] == 0 and color[0][0][1] == 0 and color[0][0][2] == 0: + continue + tmp[pred_mask == color] = idx + pred_mask = tmp[..., 0] + + #assert False, [gt_mask.shape, fg_mask.shape, pred_mask.shape] + #print(num) + #print("ARI: ", ARI(gt_mask, pred_mask)) + #print("fg-ARI: ", ARI(gt_mask, pred_mask, fg_mask)) + result["training"]["ARI"].append(ARI(gt_mask, pred_mask)) + result["training"]["fg-ARI"].append(ARI(gt_mask, pred_mask, fg_mask)) + result["training"]["mean-ARI"] = sum(result["training"]["ARI"]) / float(len(result["training"]["ARI"])) + result["training"]["mean-fg-ARI"] = sum(result["training"]["fg-ARI"]) / float(len(result["training"]["fg-ARI"])) + + #assert False + # nv_spatial + #with open(os.path.join(vis_folder, f"{scene}_nv_spatial.npy"), 'rb') as f: + # pred_masks = np.load(f) + for num in range(24): + gt_mask = cv2.imread(os.path.join(gt_folder, "%05d.png.png" % (2*num+1)), cv2.IMREAD_GRAYSCALE) + fg_mask = np.zeros(gt_mask.shape).astype(bool) + fg_mask[gt_mask < 10] = True + #if num == 25: + # pred_masks = torch.from_numpy(pred_masks)[:, None, :, :] + # pred_masks = torch.nn.functional.interpolate(pred_masks, size=(gt_mask.shape[0], gt_mask.shape[1]), mode='nearest').numpy()[:, 0, :, :] + #pred_mask = pred_masks[num-25] + pred_mask = cv2.imread(os.path.join(vis_folder, f"{2*num+1}.png")) + unique_colors = np.unique(pred_mask.reshape((-1, 3)), axis=0)[:,None, None, :] + ids = list(range(len(unique_colors))) + tmp = np.zeros_like(pred_mask).astype(int)-1 + for color, idx in zip(unique_colors, ids): + #assert False, [pred_mask.shape, color.shape] + #print(color) + if color[0][0][0] == 0 and color[0][0][1] == 0 and color[0][0][2] == 0: + continue + tmp[pred_mask == color] = idx + pred_mask = tmp[..., 0] + + #assert False, [gt_mask.shape, fg_mask.shape, pred_mask.shape] + #print(num) + #print("ARI: ", ARI(gt_mask, pred_mask)) + #print("fg-ARI: ", ARI(gt_mask, pred_mask, fg_mask)) + result["test"]["ARI"].append(ARI(gt_mask, pred_mask)) + result["test"]["fg-ARI"].append(ARI(gt_mask, pred_mask, fg_mask)) + result["test"]["mean-ARI"] = sum(result["test"]["ARI"]) / float(len(result["test"]["ARI"])) + result["test"]["mean-fg-ARI"] = sum(result["test"]["fg-ARI"]) / float(len(result["test"]["fg-ARI"])) + + + + print("Saving to: " + os.path.join(vis_folder, "ours_result.json")) + with open(os.path.join(vis_folder, "ours_result.json"), 'w') as f: + json.dump(result, f, indent=4) + \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ff70579 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,140 @@ +absl-py==1.2.0 +addict==2.4.0 +anyio==3.6.1 +argon2-cffi==21.3.0 +argon2-cffi-bindings==21.2.0 +attrs==22.1.0 +#av==10.0.0 +Babel==2.10.3 +backcall==0.2.0 +beautifulsoup4==4.11.1 +bleach==5.0.1 +cachetools==5.2.0 +certifi==2022.9.24 +cffi==1.15.1 +charset-normalizer==2.1.1 +ConfigArgParse==1.5.3 +cupy-cuda111==11.6.0 +cupy-cuda11x==11.1.0 +cycler==0.11.0 +debugpy==1.6.3 +decorator==5.1.1 +defusedxml==0.7.1 +deprecation==2.1.0 +entrypoints==0.4 +numpy==1.21.6 +faiss-gpu +fastjsonschema==2.16.2 +fastrlock==0.8 +filelock==3.8.0 +fonttools==4.37.4 +google-auth==2.12.0 +google-auth-oauthlib==0.4.6 +grpcio==1.49.1 +huggingface-hub==0.10.0 +idna==3.4 +imageio==2.22.1 +imageio-ffmpeg==0.4.7 +importlib-metadata==5.0.0 +importlib-resources==5.9.0 +ipykernel==6.16.0 +ipython==7.34.0 +ipython-genutils==0.2.0 +ipywidgets==8.0.2 +jedi==0.18.1 +Jinja2==3.1.2 +joblib==1.2.0 +json5==0.9.10 +jsonschema==4.16.0 +jupyter-core==4.11.1 +jupyter-server==1.19.1 +jupyter_client==7.3.5 +jupyter_packaging==0.12.3 +jupyterlab==3.4.8 +jupyterlab-pygments==0.2.2 +jupyterlab-widgets==3.0.3 +jupyterlab_server==2.15.2 +kiwisolver==1.4.4 +kornia==0.6.7 +lpips==0.1.4 +Markdown==3.4.1 +MarkupSafe==2.1.1 +matplotlib==3.5.3 +matplotlib-inline==0.1.6 +mistune==2.0.4 +nbclassic==0.4.4 +nbclient==0.6.8 +nbconvert==7.1.0 +nbformat==5.6.1 +nest-asyncio==1.5.6 +networkx==2.6.3 +notebook==6.4.12 +notebook-shim==0.1.0 +oauthlib==3.2.1 +open3d==0.9.0 +#open3d +#open3d-python +https://github.com/isl-org/Open3D/releases/download/v0.13.0/open3d-0.13.0-cp37-cp37m-manylinux2014_x86_64.whl +opencv-python==4.6.0.66 +packaging==21.3 +pandas==1.3.5 +pandocfilters==1.5.0 +parso==0.8.3 +pexpect==4.8.0 +pickleshare==0.7.5 +Pillow==9.2.0 +piq==0.7.0 +pkgutil_resolve_name==1.3.10 +prometheus-client==0.14.1 +prompt-toolkit==3.0.31 +protobuf==3.19.6 +psutil==5.9.2 +ptyprocess==0.7.0 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycparser==2.21 +#cython==0.29.36 +#pydensecrf @ git+https://github.com/lucasb-eyer/pydensecrf.git +Pygments==2.13.0 +pyparsing==3.0.9 +pyrsistent==0.18.1 +python-dateutil==2.8.2 +pytz==2022.4 +PyWavelets==1.3.0 +PyYAML==6.0 +pyzmq==24.0.1 +requests==2.28.1 +requests-oauthlib==1.3.1 +rsa==4.9 +scikit-image==0.19.3 +scikit-learn==1.0.2 +scipy==1.7.3 +Send2Trash==1.8.0 +six==1.16.0 +sniffio==1.3.0 +soupsieve==2.3.2.post1 +tensorboard==2.10.1 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +terminado==0.16.0 +threadpoolctl==3.1.0 +tifffile==2021.11.2 +timm==0.6.11 +tinycss2==1.1.1 +tomli==2.0.1 +tomlkit==0.11.5 +tornado==6.2 +tqdm==4.64.1 +traitlets==5.4.0 +typing_extensions==4.3.0 +urllib3==1.26.6 +wcwidth==0.2.5 +webencodings==0.5.1 +websocket-client==1.4.1 +Werkzeug==2.2.2 +widgetsnbextension==4.0.3 +zipp==3.8.1 +--extra-index-url https://download.pytorch.org/whl/cu113 +torch==1.12.1+cu113 +torchaudio==0.12.1+cu113 +torchvision==0.13.1+cu113