From bb0cf64de4213de40b47e175fa198882ec24734b Mon Sep 17 00:00:00 2001 From: Simon Venshtain Date: Thu, 19 Jul 2018 17:00:21 -0500 Subject: [PATCH 1/6] adding jupyter notebook for dataset API, WIP --- data_load.ipynb | 219 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 219 insertions(+) create mode 100644 data_load.ipynb diff --git a/data_load.ipynb b/data_load.ipynb new file mode 100644 index 0000000..f65899f --- /dev/null +++ b/data_load.ipynb @@ -0,0 +1,219 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import h5py\n", + "import tensorflow as tf\n", + "import numpy as np\n", + "from train.basic_train import BasicTrain\n", + "from metrics.metrics import Metrics\n", + "from utils.reporter import Reporter\n", + "from utils.misc import timeit\n", + "from utils.average_meter import FPSMeter\n", + "\n", + "MIN_DEPTH = 300\n", + "MAX_DEPTH = 1500" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "sess = tf.Session()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "c:\\seq\\train_seq\\\n" + ] + } + ], + "source": [ + "train_seq_folder = 'c:\\\\seq\\\\train_seq\\\\'\n", + "print(train_seq_folder)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['c:\\\\seq\\\\train_seq\\\\annotated_man_chicken_dance_3_cam_labeled.h5', 'c:\\\\seq\\\\train_seq\\\\annotated_woman_hand_signals_3_cam_labeled.h5', 'c:\\\\seq\\\\train_seq\\\\annotated_woman_wash_window_3_cam_labeled.h5']\n" + ] + } + ], + "source": [ + "train_seq_files = []\n", + "for (dirpath, dirnames, filenames) in os.walk(train_seq_folder):\n", + " train_seq_files.extend(os.path.join(dirpath, x) for x in filenames)\n", + "print(train_seq_files)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "filenames = []\n", + "for train_seq_name in train_seq_files:\n", + " train_seq = h5py.File(train_seq_name, \"r\")\n", + " num_cameras = train_seq['INFO']['NUM_CAMERAS'].value[0]\n", + " num_frames = train_seq['INFO']['COUNT'].value[0]\n", + " train_seq.close()\n", + " for frame_idx in range(num_frames):\n", + " for cam_idx in range(num_cameras):\n", + " filename_str = train_seq_name + '__' + 'FRAME{:04d}/RAW/CAM{:d}/'.format(frame_idx, cam_idx)\n", + " filenames.append(filename_str)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "def _read_hdf5_func(filename, label):\n", + " filename_decoded = filename.decode(\"utf-8\")\n", + " print(filename_decoded)\n", + " h5_file_name, group_name = filename_decoded.split('__')\n", + " h5_file = h5py.File(h5_file_name, \"r\")\n", + " #print(group_name)\n", + " \n", + " # Read depth image\n", + " depth_image_path = group_name + 'Z'\n", + " depth_image = h5_file[depth_image_path].value\n", + " depth_image_scaled = np.array(depth_image, copy=False)\n", + " depth_image_scaled.clip(MIN_DEPTH, MAX_DEPTH, out=depth_image_scaled)\n", + " depth_image_scaled -= MIN_DEPTH\n", + " np.floor_divide(depth_image_scaled, (MAX_DEPTH - MIN_DEPTH + 1) / 256,\n", + " out=depth_image_scaled, casting='unsafe')\n", + " \n", + " depth_image_scaled = depth_image_scaled.astype(np.uint8)\n", + " \n", + " # Read labels\n", + " label_image_path = group_name + 'LABEL'\n", + " label_image = h5_file[label_image_path].value\n", + " h5_file.close()\n", + " return depth_image_scaled, label_image\n", + "\n", + "labels = [0]*len(filenames)\n", + "dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))\n", + "dataset = dataset.shuffle(buffer_size=10000)\n", + "dataset = dataset.map(\n", + " lambda filename, label: tuple(tf.py_func(\n", + " _read_hdf5_func, [filename, labels], [tf.uint8, tf.uint8])), num_parallel_calls=1)\n", + "\n", + "\n", + "dataset = dataset.batch(1)\n", + "dataset = dataset.repeat()\n", + "dataset = dataset.prefetch(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "c:\\seq\\train_seq\\annotated_woman_hand_signals_3_cam_labeled.h5__FRAME0173/RAW/CAM2/\n", + "c:\\seq\\train_seq\\annotated_woman_wash_window_3_cam_labeled.h5__FRAME0095/RAW/CAM2/" + ] + }, + { + "data": { + "text/plain": [ + "(array([[[0, 0, 0, ..., 0, 0, 0],\n", + " [0, 0, 0, ..., 0, 0, 0],\n", + " [0, 0, 0, ..., 0, 0, 0],\n", + " ...,\n", + " [0, 0, 0, ..., 0, 0, 0],\n", + " [0, 0, 0, ..., 0, 0, 0],\n", + " [0, 0, 0, ..., 0, 0, 0]]], dtype=uint8),\n", + " array([[[27, 27, 27, ..., 27, 27, 27],\n", + " [27, 27, 27, ..., 27, 27, 27],\n", + " [27, 27, 27, ..., 27, 27, 27],\n", + " ...,\n", + " [27, 27, 27, ..., 27, 27, 27],\n", + " [27, 27, 27, ..., 27, 27, 27],\n", + " [27, 27, 27, ..., 27, 27, 27]]], dtype=uint8))" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "iterator = dataset.make_one_shot_iterator()\n", + "next_example = iterator.get_next()\n", + "\n", + "sess.run(next_example)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From ad9587acd744aadf3df36e6241725818a932bb5a Mon Sep 17 00:00:00 2001 From: Simon Venshtain Date: Thu, 19 Jul 2018 17:05:03 -0500 Subject: [PATCH 2/6] adding python script --- data_load.py | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 data_load.py diff --git a/data_load.py b/data_load.py new file mode 100644 index 0000000..c3546b0 --- /dev/null +++ b/data_load.py @@ -0,0 +1,106 @@ + +# coding: utf-8 + +# In[21]: + + +import os +import h5py +import tensorflow as tf +import numpy as np +from train.basic_train import BasicTrain +from metrics.metrics import Metrics +from utils.reporter import Reporter +from utils.misc import timeit +from utils.average_meter import FPSMeter + +MIN_DEPTH = 300 +MAX_DEPTH = 1500 + + +# In[22]: + + +sess = tf.Session() + + +# In[23]: + + +train_seq_folder = 'c:\\seq\\train_seq\\' +print(train_seq_folder) + + +# In[24]: + + +train_seq_files = [] +for (dirpath, dirnames, filenames) in os.walk(train_seq_folder): + train_seq_files.extend(os.path.join(dirpath, x) for x in filenames) +print(train_seq_files) + + +# In[25]: + + +filenames = [] +for train_seq_name in train_seq_files: + train_seq = h5py.File(train_seq_name, "r") + num_cameras = train_seq['INFO']['NUM_CAMERAS'].value[0] + num_frames = train_seq['INFO']['COUNT'].value[0] + train_seq.close() + for frame_idx in range(num_frames): + for cam_idx in range(num_cameras): + filename_str = train_seq_name + '__' + 'FRAME{:04d}/RAW/CAM{:d}/'.format(frame_idx, cam_idx) + filenames.append(filename_str) + + + +# In[26]: + + +def _read_hdf5_func(filename, label): + filename_decoded = filename.decode("utf-8") + print(filename_decoded) + h5_file_name, group_name = filename_decoded.split('__') + h5_file = h5py.File(h5_file_name, "r") + #print(group_name) + + # Read depth image + depth_image_path = group_name + 'Z' + depth_image = h5_file[depth_image_path].value + depth_image_scaled = np.array(depth_image, copy=False) + depth_image_scaled.clip(MIN_DEPTH, MAX_DEPTH, out=depth_image_scaled) + depth_image_scaled -= MIN_DEPTH + np.floor_divide(depth_image_scaled, (MAX_DEPTH - MIN_DEPTH + 1) / 256, + out=depth_image_scaled, casting='unsafe') + + depth_image_scaled = depth_image_scaled.astype(np.uint8) + + # Read labels + label_image_path = group_name + 'LABEL' + label_image = h5_file[label_image_path].value + h5_file.close() + return depth_image_scaled, label_image + +labels = [0]*len(filenames) +dataset = tf.data.Dataset.from_tensor_slices((filenames, labels)) +dataset = dataset.shuffle(buffer_size=10000) +dataset = dataset.map( + lambda filename, label: tuple(tf.py_func( + _read_hdf5_func, [filename, labels], [tf.uint8, tf.uint8])), num_parallel_calls=1) + + +dataset = dataset.batch(1) +dataset = dataset.repeat() +dataset = dataset.prefetch(1) + + +# In[27]: + + +iterator = dataset.make_one_shot_iterator() +next_example = iterator.get_next() + +sess.run(next_example) + From b5ec2ef9ad6540225c446ee1e5d6717eb3f96700 Mon Sep 17 00:00:00 2001 From: Simon Venshtain Date: Wed, 25 Jul 2018 10:45:22 -0500 Subject: [PATCH 3/6] adding new stuff --- .../fcn8s_depthnet_train.yaml | 28 + .../unet_depthnet_train.yaml | 33 + data/psy_input.py | 87 ++ data_load.ipynb | 569 ++++++++- models/basic/basic_model.py | 4 +- models/encoders/depthnet.py | 164 +++ models/fcn8s_depthnet.py | 78 ++ models/unet_depthnet.py | 123 ++ train/train.py | 1024 ----------------- train/train_psy.py | 311 +++++ 10 files changed, 1348 insertions(+), 1073 deletions(-) create mode 100644 config/experiments_config/fcn8s_depthnet_train.yaml create mode 100644 config/experiments_config/unet_depthnet_train.yaml create mode 100644 data/psy_input.py create mode 100644 models/encoders/depthnet.py create mode 100644 models/fcn8s_depthnet.py create mode 100644 models/unet_depthnet.py delete mode 100644 train/train.py create mode 100644 train/train_psy.py diff --git a/config/experiments_config/fcn8s_depthnet_train.yaml b/config/experiments_config/fcn8s_depthnet_train.yaml new file mode 100644 index 0000000..5f0f3c0 --- /dev/null +++ b/config/experiments_config/fcn8s_depthnet_train.yaml @@ -0,0 +1,28 @@ +# Directories arguments +data_dir: "synthetic_seq" +exp_dir: "fcn8s_depthnet" +out_dir: "fcn8s_depthnet" + +# Data arguments +img_height: 720 +img_width: 1280 +num_channels: 1 +num_classes: 26 + +# Train arguments +num_epochs: 200 +batch_size: 5 +shuffle: True +data_mode: "experiment" +save_every: 5 +test_every: 5 +max_to_keep: 1 + +# Models arguments +learning_rate: 0.0001 +weight_decay: 0.0005 +pretrained_path: "pretrained_weights/depthnet.npy" + +# Misc arguments +verbose: False + diff --git a/config/experiments_config/unet_depthnet_train.yaml b/config/experiments_config/unet_depthnet_train.yaml new file mode 100644 index 0000000..e3aa6e9 --- /dev/null +++ b/config/experiments_config/unet_depthnet_train.yaml @@ -0,0 +1,33 @@ +# Directories arguments +data_dir: "seq" +exp_dir: "unet_depthnet" +out_dir: "unet_depthnet" + +# Data arguments +img_height: 720 +img_width: 1280 +num_channels: 1 +num_classes: 26 + +# Train arguments +num_epochs: 200 +batch_size: 1 +shuffle: True +data_mode: "experiment" +save_every: 5 +test_every: 5 +max_to_keep: 2 +weighted_loss: False +random_cropping: False +freeze_encoder: False + +# Models arguments +learning_rate: 0.0001 +weight_decay: 0.0005 +bias : 0.0 +batchnorm_enabled: True +#pretrained_path: "pretrained_weights/mobilenet_v1.pkl" + +# Misc arguments +verbose: False + diff --git a/data/psy_input.py b/data/psy_input.py new file mode 100644 index 0000000..4765130 --- /dev/null +++ b/data/psy_input.py @@ -0,0 +1,87 @@ +def _read_hdf5_func(filename, label): + filename_decoded = filename.decode("utf-8") + h5_file_name, group_name = filename_decoded.split('__') + h5_file = h5py.File(h5_file_name, "r") + #print(group_name) + + # Read depth image + depth_image_path = group_name + 'Z' + depth_image = h5_file[depth_image_path].value + + # Read labels + label_image_path = group_name + 'LABEL' + label_image = h5_file[label_image_path].value + h5_file.close() + return depth_image, label_image + + +def generate_datasets(train_seq_folder, val_seq_folder, params): + print(train_seq_folder) + print(val_seq_folder) + + train_seq_files = [] + for (dirpath, dirnames, filenames) in os.walk(train_seq_folder): + train_seq_files.extend(os.path.join(dirpath, x) for x in filenames) + print(train_seq_files) + + val_seq_files = [] + for (dirpath, dirnames, filenames) in os.walk(val_seq_folder): + val_seq_files.extend(os.path.join(dirpath, x) for x in filenames) + print(val_seq_files) + + train_filenames = [] + for train_seq_name in train_seq_files: + train_seq = h5py.File(train_seq_name, "r") + num_cameras = train_seq['INFO']['NUM_CAMERAS'].value[0] + num_frames = train_seq['INFO']['COUNT'].value[0] + train_seq.close() + for frame_idx in range(num_frames): + for cam_idx in range(num_cameras): + train_filename_str = train_seq_name + '__' + 'FRAME{:04d}/RAW/CAM{:d}/'.format(frame_idx, cam_idx) + train_filenames.append(train_filename_str) + + val_filenames = [] + for val_seq_name in val_seq_files: + val_seq = h5py.File(val_seq_name, "r") + num_cameras = val_seq['INFO']['NUM_CAMERAS'].value[0] + num_frames = val_seq['INFO']['COUNT'].value[0] + val_seq.close() + for frame_idx in range(num_frames): + for cam_idx in range(num_cameras): + val_filename_str = val_seq_name + '__' + 'FRAME{:04d}/RAW/CAM{:d}/'.format(frame_idx, cam_idx) + val_filenames.append(val_filename_str) + + + + parse_fn = lambda filename, label: tuple(tf.py_func( + read_hdf5_func, [filename, label], [tf.int16, tf.uint8])) + + val_labels = [0]*len(val_filenames) + val_dataset = tf.data.Dataset.from_tensor_slices((val_filenames, val_labels)) + .shuffle(buffer_size=10000) + .map(parse_fn, num_parallel_calls=params.num_parallel_calls) + + train_labels = [0]*len(train_filenames) + train_dataset = tf.data.Dataset.from_tensor_slices((train_filenames, train_labels)) + .shuffle(buffer_size=10000) + .map(parse_fn, num_parallel_calls=params.num_parallel_calls) + .batch(params.batch_size) + .repeat() + .prefetch(1) + + iterator = tf.data.Iterator.from_structure(train_dataset.output_types, + train_dataset.output_shapes) + + train_init_op = iterator.make_initializer(train_dataset) + val_init_op = iterator.make_initializer(val_dataset) + + images, labels = iterator.get_next() + iterator_init_op = iterator.initializer + + inputs = {'images': images, + 'labels': labels, + 'train_init_op': train_init_op, + 'val_init_op': val_init_op} + + return inputs + diff --git a/data_load.ipynb b/data_load.ipynb index f65899f..2a11cbc 100644 --- a/data_load.ipynb +++ b/data_load.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 21, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -16,13 +16,12 @@ "from utils.misc import timeit\n", "from utils.average_meter import FPSMeter\n", "\n", - "MIN_DEPTH = 300\n", - "MAX_DEPTH = 1500" + "calcAvgDepth = False" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -31,25 +30,28 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "c:\\seq\\train_seq\\\n" + "c:\\seq\\train_seq\\\n", + "c:\\seq\\val_seq\\\n" ] } ], "source": [ "train_seq_folder = 'c:\\\\seq\\\\train_seq\\\\'\n", - "print(train_seq_folder)" + "val_seq_folder = 'c:\\\\seq\\\\val_seq\\\\'\n", + "print(train_seq_folder)\n", + "print(val_seq_folder)" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -69,11 +71,56 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['c:\\\\seq\\\\val_seq\\\\annotated_man_chicken_dance_3_cam_labeled.h5']\n" + ] + } + ], + "source": [ + "val_seq_files = []\n", + "for (dirpath, dirnames, filenames) in os.walk(val_seq_folder):\n", + " val_seq_files.extend(os.path.join(dirpath, x) for x in filenames)\n", + "print(val_seq_files)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ - "filenames = []\n", + "if (calcAvgDepth):\n", + " avg_depth = 0\n", + " for train_seq_name in train_seq_files:\n", + " train_seq = h5py.File(train_seq_name, \"r\")\n", + " num_cameras = train_seq['INFO']['NUM_CAMERAS'].value[0]\n", + " num_frames = train_seq['INFO']['COUNT'].value[0]\n", + " for frame_idx in range(num_frames):\n", + " for cam_idx in range(num_cameras):\n", + " depth_path = 'FRAME{:04d}/RAW/CAM{:d}/Z'.format(frame_idx, cam_idx)\n", + " depth_image = train_seq[depth_path].value\n", + " depth_mask = depth_image > 0\n", + " avg_depth += np.average(depth_image, weights = depth_mask)\n", + "\n", + " avg_depth /= num_frames * num_cameras\n", + " train_seq.close()\n", + "\n", + " print (avg_depth)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "train_filenames = []\n", "for train_seq_name in train_seq_files:\n", " train_seq = h5py.File(train_seq_name, \"r\")\n", " num_cameras = train_seq['INFO']['NUM_CAMERAS'].value[0]\n", @@ -81,14 +128,25 @@ " train_seq.close()\n", " for frame_idx in range(num_frames):\n", " for cam_idx in range(num_cameras):\n", - " filename_str = train_seq_name + '__' + 'FRAME{:04d}/RAW/CAM{:d}/'.format(frame_idx, cam_idx)\n", - " filenames.append(filename_str)\n", + " train_filename_str = train_seq_name + '__' + 'FRAME{:04d}/RAW/CAM{:d}/'.format(frame_idx, cam_idx)\n", + " train_filenames.append(train_filename_str)\n", + "\n", + "val_filenames = []\n", + "for val_seq_name in val_seq_files:\n", + " val_seq = h5py.File(val_seq_name, \"r\")\n", + " num_cameras = val_seq['INFO']['NUM_CAMERAS'].value[0]\n", + " num_frames = val_seq['INFO']['COUNT'].value[0]\n", + " val_seq.close()\n", + " for frame_idx in range(num_frames):\n", + " for cam_idx in range(num_cameras):\n", + " val_filename_str = val_seq_name + '__' + 'FRAME{:04d}/RAW/CAM{:d}/'.format(frame_idx, cam_idx)\n", + " val_filenames.append(val_filename_str)\n", " " ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ @@ -102,90 +160,507 @@ " # Read depth image\n", " depth_image_path = group_name + 'Z'\n", " depth_image = h5_file[depth_image_path].value\n", - " depth_image_scaled = np.array(depth_image, copy=False)\n", - " depth_image_scaled.clip(MIN_DEPTH, MAX_DEPTH, out=depth_image_scaled)\n", - " depth_image_scaled -= MIN_DEPTH\n", - " np.floor_divide(depth_image_scaled, (MAX_DEPTH - MIN_DEPTH + 1) / 256,\n", - " out=depth_image_scaled, casting='unsafe')\n", + " #depth_image_scaled = np.array(depth_image, copy=False)\n", + " #depth_image_scaled.clip(MIN_DEPTH, MAX_DEPTH, out=depth_image_scaled)\n", + " #depth_image_scaled -= MIN_DEPTH\n", + " #np.floor_divide(depth_image_scaled, (MAX_DEPTH - MIN_DEPTH + 1) / 256,\n", + " # out=depth_image_scaled, casting='unsafe')\n", " \n", - " depth_image_scaled = depth_image_scaled.astype(np.uint8)\n", + " #depth_image_scaled = depth_image_scaled.astype(np.uint8)\n", " \n", " # Read labels\n", " label_image_path = group_name + 'LABEL'\n", " label_image = h5_file[label_image_path].value\n", " h5_file.close()\n", - " return depth_image_scaled, label_image\n", + " return depth_image, label_image\n", + "\n", + "val_labels = [0]*len(val_filenames)\n", + "val_dataset = tf.data.Dataset.from_tensor_slices((val_filenames, val_labels))\n", + "val_dataset = val_dataset.shuffle(buffer_size=10000)\n", + "val_dataset = val_dataset.map(\n", + " lambda filename, label: tuple(tf.py_func(\n", + " _read_hdf5_func, [filename, label], [tf.int16, tf.uint8])), num_parallel_calls=1)\n", + "\n", + "\n", + "#val_dataset = dataset.batch(10)\n", + "#val_dataset = dataset.repeat()\n", + "#val_dataset = dataset.prefetch(1)\n", + "\n", + "\n", "\n", - "labels = [0]*len(filenames)\n", - "dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))\n", - "dataset = dataset.shuffle(buffer_size=10000)\n", - "dataset = dataset.map(\n", + "train_labels = [0]*len(train_filenames)\n", + "train_dataset = tf.data.Dataset.from_tensor_slices((train_filenames, train_labels))\n", + "train_dataset = train_dataset.shuffle(buffer_size=10000)\n", + "train_dataset = train_dataset.map(\n", " lambda filename, label: tuple(tf.py_func(\n", - " _read_hdf5_func, [filename, labels], [tf.uint8, tf.uint8])), num_parallel_calls=1)\n", + " _read_hdf5_func, [filename, label], [tf.int16, tf.uint8])), num_parallel_calls=1)\n", "\n", "\n", - "dataset = dataset.batch(1)\n", - "dataset = dataset.repeat()\n", - "dataset = dataset.prefetch(1)" + "train_dataset = train_dataset.batch(1)\n", + "train_dataset = train_dataset.repeat()\n", + "train_dataset = train_dataset.prefetch(1)" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "c:\\seq\\train_seq\\annotated_woman_hand_signals_3_cam_labeled.h5__FRAME0173/RAW/CAM2/\n", - "c:\\seq\\train_seq\\annotated_woman_wash_window_3_cam_labeled.h5__FRAME0095/RAW/CAM2/" + "c:\\seq\\train_seq\\annotated_man_chicken_dance_3_cam_labeled.h5__FRAME0129/RAW/CAM2/\n", + "c:\\seq\\train_seq\\annotated_woman_hand_signals_3_cam_labeled.h5__FRAME0229/RAW/CAM2/\n" ] }, { "data": { "text/plain": [ - "(array([[[0, 0, 0, ..., 0, 0, 0],\n", + "[array([[[0, 0, 0, ..., 0, 0, 0],\n", " [0, 0, 0, ..., 0, 0, 0],\n", " [0, 0, 0, ..., 0, 0, 0],\n", " ...,\n", " [0, 0, 0, ..., 0, 0, 0],\n", " [0, 0, 0, ..., 0, 0, 0],\n", - " [0, 0, 0, ..., 0, 0, 0]]], dtype=uint8),\n", - " array([[[27, 27, 27, ..., 27, 27, 27],\n", - " [27, 27, 27, ..., 27, 27, 27],\n", - " [27, 27, 27, ..., 27, 27, 27],\n", + " [0, 0, 0, ..., 0, 0, 0]]], dtype=int16),\n", + " array([[[46, 46, 46, ..., 46, 46, 46],\n", + " [46, 46, 46, ..., 46, 46, 46],\n", + " [46, 46, 46, ..., 46, 46, 46],\n", " ...,\n", - " [27, 27, 27, ..., 27, 27, 27],\n", - " [27, 27, 27, ..., 27, 27, 27],\n", - " [27, 27, 27, ..., 27, 27, 27]]], dtype=uint8))" + " [46, 46, 46, ..., 46, 46, 46],\n", + " [46, 46, 46, ..., 46, 46, 46],\n", + " [46, 46, 46, ..., 46, 46, 46]]], dtype=uint8)]" ] }, - "execution_count": 27, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" - }, + } + ], + "source": [ + "iterator = train_dataset.make_one_shot_iterator()\n", + "next_depth, next_label = iterator.get_next()\n", + "\n", + "sess.run([next_depth, next_label])\n", + "#sess.run(next_label)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\n" + "202.97058823529412\n" ] } ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [], "source": [ - "iterator = dataset.make_one_shot_iterator()\n", - "next_example = iterator.get_next()\n", + "\"\"\"\n", + "Trainer class to train Segmentation models\n", + "\"\"\"\n", + "\n", + "from tqdm import tqdm\n", + "from utils.augmentation import flip_randomly_left_right_image_with_annotation, \\\n", + " scale_randomly_image_with_annotation_with_fixed_size_output\n", + "import scipy.misc as misc\n", + "\n", + "\n", + "class TrainPsy(BasicTrain):\n", + " \"\"\"\n", + " Trainer class\n", + " \"\"\"\n", + "\n", + " def __init__(self, args, sess, train_model, test_model):\n", + " \"\"\"\n", + " Call the constructor of the base class\n", + " init summaries\n", + " init loading data\n", + " :param args:\n", + " :param sess:\n", + " :param model:\n", + " :return:\n", + " \"\"\"\n", + " super().__init__(args, sess, train_model, test_model)\n", + " ##################################################################################\n", + " # Init summaries\n", + "\n", + " # Summary variables\n", + " self.scalar_summary_tags = ['mean_iou_on_val',\n", + " 'train-loss-per-epoch', 'val-loss-per-epoch',\n", + " 'train-acc-per-epoch', 'val-acc-per-epoch']\n", + " self.images_summary_tags = [\n", + " ('train_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]),\n", + " ('val_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3])]\n", + " self.summary_tags = []\n", + " self.summary_placeholders = {}\n", + " self.summary_ops = {}\n", + " # init summaries and it's operators\n", + " self.init_summaries()\n", + " # Create summary writer\n", + " self.summary_writer = tf.summary.FileWriter(self.args.summary_dir, self.sess.graph)\n", + " ##################################################################################\n", + " # Init metrics class\n", + " self.metrics = Metrics(self.args.num_classes)\n", + " # Init reporter class\n", + " if self.args.mode == 'train' or 'overfit':\n", + " self.reporter = Reporter(self.args.out_dir + 'report_train.json', self.args)\n", + " elif self.args.mode == 'test':\n", + " self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args)\n", + " ##################################################################################\n", + "\n", + " def init_summaries(self):\n", + " \"\"\"\n", + " Create the summary part of the graph\n", + " :return:\n", + " \"\"\"\n", + " with tf.variable_scope('train-summary-per-epoch'):\n", + " for tag in self.scalar_summary_tags:\n", + " self.summary_tags += tag\n", + " self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag)\n", + " self.summary_ops[tag] = tf.summary.scalar(tag, self.summary_placeholders[tag])\n", + " for tag, shape in self.images_summary_tags:\n", + " self.summary_tags += tag\n", + " self.summary_placeholders[tag] = tf.placeholder('float32', shape, name=tag)\n", + " self.summary_ops[tag] = tf.summary.image(tag, self.summary_placeholders[tag], max_outputs=10)\n", + "\n", + " def add_summary(self, step, summaries_dict=None, summaries_merged=None):\n", + " \"\"\"\n", + " Add the summaries to tensorboard\n", + " :param step:\n", + " :param summaries_dict:\n", + " :param summaries_merged:\n", + " :return:\n", + " \"\"\"\n", + " if summaries_dict is not None:\n", + " summary_list = self.sess.run([self.summary_ops[tag] for tag in summaries_dict.keys()],\n", + " {self.summary_placeholders[tag]: value for tag, value in\n", + " summaries_dict.items()})\n", + " for summary in summary_list:\n", + " self.summary_writer.add_summary(summary, step)\n", + " if summaries_merged is not None:\n", + " self.summary_writer.add_summary(summaries_merged, step)\n", + "\n", + " def train(self):\n", + " print(\"Training mode will begin NOW ..\")\n", + " # curr_lr= self.model.args.learning_rate\n", + " for cur_epoch in range(self.model.global_epoch_tensor.eval(self.sess) + 1, self.args.num_epochs + 1, 1):\n", + "\n", + " tt = tqdm(self.generator(), total=self.num_iterations_training_per_epoch,\n", + " desc=\"epoch-\" + str(cur_epoch) + \"-\")\n", + " # init acc and loss lists\n", + " loss_list = []\n", + " acc_list = []\n", + " for _ in tt:\n", + " # get the cur_it for the summary\n", + " cur_it = self.model.global_step_tensor.eval(self.sess)\n", + "\n", + " # Feed this variables to the network\n", + " feed_dict = {self.model.x_pl: x_batch,\n", + " self.model.y_pl: y_batch,\n", + " self.model.is_training: True\n", + " #self.model.curr_learning_rate:curr_lr\n", + " }\n", + "\n", + " # run the feed_forward\n", + " _, loss, acc, summaries_merged = self.sess.run(\n", + " [self.model.train_op, self.model.loss, self.model.accuracy, \n", + " self.model.merged_summaries],\n", + " feed_dict=feed_dict)\n", + " # log loss and acc\n", + " loss_list += [loss]\n", + " acc_list += [acc]\n", + " \n", + " # Update the Global step\n", + " self.model.global_step_assign_op.eval(session=self.sess,\n", + " feed_dict={self.model.global_step_input: cur_it + 1})\n", + "\n", + " total_loss = np.mean(loss_list)\n", + " total_acc = np.mean(acc_list)\n", + " \n", + " # summarize\n", + " summaries_dict = dict()\n", + " summaries_dict['train-loss-per-epoch'] = total_loss\n", + " summaries_dict['train-acc-per-epoch'] = total_acc\n", + "\n", + " if self.args.data_mode != 'experiment_v2':\n", + " summaries_dict['train_prediction_sample'] = segmented_imgs\n", + " # self.add_summary(cur_it, summaries_dict=summaries_dict, summaries_merged=summaries_merged)\n", + "\n", + " # report\n", + " self.reporter.report_experiment_statistics('train-acc', 'epoch-' + str(cur_epoch), str(total_acc))\n", + " self.reporter.report_experiment_statistics('train-loss', 'epoch-' + str(cur_epoch), str(total_loss))\n", + " self.reporter.finalize()\n", + "\n", + " # Update the Cur Epoch tensor\n", + " # it is the last thing because if it is interrupted it repeat this\n", + " self.model.global_epoch_assign_op.eval(session=self.sess,\n", + " feed_dict={self.model.global_epoch_input: cur_epoch + 1})\n", "\n", - "sess.run(next_example)\n" + " # print in console\n", + " tt.close()\n", + " print(\"epoch-\" + str(cur_epoch) + \"-\" + \"loss:\" + str(total_loss) + \"-\" + \" acc:\" + str(total_acc)[\n", + " :6])\n", + "\n", + " # Save the current checkpoint\n", + " if cur_epoch % self.args.save_every == 0:\n", + " self.save_model()\n", + "\n", + " # Test the model on validation\n", + " if cur_epoch % self.args.test_every == 0:\n", + " self.test_per_epoch(step=self.model.global_step_tensor.eval(self.sess),\n", + " epoch=self.model.global_epoch_tensor.eval(self.sess))\n", + "\n", + " print(\"Training Finished\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 62, "metadata": {}, "outputs": [], - "source": [] + "source": [ + " def test_per_epoch(self, step, epoch):\n", + " print(\"Validation at step:\" + str(step) + \" at epoch:\" + str(epoch) + \" ..\")\n", + "\n", + " # init tqdm and get the epoch value\n", + " tt = tqdm(range(self.num_iterations_validation_per_epoch), total=self.num_iterations_validation_per_epoch,\n", + " desc=\"Val-epoch-\" + str(epoch) + \"-\")\n", + "\n", + " # init acc and loss lists\n", + " loss_list = []\n", + " acc_list = []\n", + " inf_list = []\n", + "\n", + " # reset metrics\n", + " self.metrics.reset()\n", + "\n", + " # get the maximum iou to compare with and save the best model\n", + " max_iou = self.model.best_iou_tensor.eval(self.sess)\n", + "\n", + " # loop by the number of iterations\n", + " for _ in tt:\n", + " # load minibatches\n", + " x_batch = self.val_data['X'][idx:idx + self.args.batch_size]\n", + " y_batch = self.val_data['Y'][idx:idx + self.args.batch_size]\n", + " if self.args.data_mode == 'experiment_v2':\n", + " y_batch_large = self.val_data['Y_large'][idx:idx + self.args.batch_size]\n", + "\n", + " # update idx of minibatch\n", + " idx += self.args.batch_size\n", + "\n", + " # Feed this variables to the network\n", + " feed_dict = {self.model.x_pl: x_batch,\n", + " self.model.y_pl: y_batch,\n", + " self.model.is_training: False\n", + " }\n", + "\n", + " start = time.time()\n", + " # run the feed_forward\n", + "\n", + " out_argmax, loss, acc, summaries_merged = self.sess.run(\n", + " [self.model.out_argmax, self.model.loss, self.model.accuracy, self.model.merged_summaries],\n", + " feed_dict=feed_dict)\n", + "\n", + " end = time.time()\n", + " # log loss and acc\n", + " loss_list += [loss]\n", + " acc_list += [acc]\n", + " inf_list += [end - start]\n", + "\n", + " # log metrics\n", + " self.metrics.update_metrics_batch(out_argmax, y_batch)\n", + "\n", + "\n", + " # mean over batches\n", + " total_acc = np.mean(acc_list)\n", + " mean_iou = self.metrics.compute_final_metrics(self.num_iterations_validation_per_epoch)\n", + " mean_iou_arr = self.metrics.iou\n", + " mean_inference = str(np.mean(inf_list)) + '-seconds'\n", + " # summarize\n", + " summaries_dict = dict()\n", + " summaries_dict['val-acc-per-epoch'] = total_acc\n", + " summaries_dict['mean_iou_on_val'] = mean_iou\n", + "\n", + " # report\n", + " self.reporter.report_experiment_statistics('validation-acc', 'epoch-' + str(epoch), str(total_acc))\n", + " self.reporter.report_experiment_statistics('avg_inference_time_on_validation', 'epoch-' + str(epoch),\n", + " str(mean_inference))\n", + " self.reporter.report_experiment_validation_iou('epoch-' + str(epoch), str(mean_iou), mean_iou_arr)\n", + " self.reporter.finalize()\n", + "\n", + " # print in console\n", + " tt.close()\n", + " print(\"Val-epoch-\" + str(epoch) + \"-\" +\n", + " \"acc:\" + str(total_acc)[:6] + \"-mean_iou:\" + str(mean_iou))\n", + " print(\"Last_max_iou: \" + str(max_iou))\n", + " if mean_iou > max_iou:\n", + " print(\"This validation got a new best iou. so we will save this one\")\n", + " # save the best model\n", + " self.save_best_model()\n", + " # Set the new maximum\n", + " self.model.best_iou_assign_op.eval(session=self.sess,\n", + " feed_dict={self.model.best_iou_input: mean_iou})\n", + " else:\n", + " print(\"hmm not the best validation epoch :/..\")" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + " def test(self, pkl=False):\n", + " print(\"Testing mode will begin NOW..\")\n", + "\n", + " # load the best model checkpoint to test on it\n", + " if not pkl:\n", + " self.load_best_model()\n", + "\n", + " # init tqdm and get the epoch value\n", + " tt = tqdm(range(self.test_data_len))\n", + " # naming = np.load(self.args.data_dir + 'names_train.npy')\n", + "\n", + " # init acc and loss lists\n", + " acc_list = []\n", + " img_list = []\n", + "\n", + "\n", + " # reset metrics\n", + " self.metrics.reset()\n", + "\n", + " # loop by the number of iterations\n", + " for _ in tt:\n", + " # load mini_batches\n", + "\n", + "\n", + "\n", + " feed_dict = {self.test_model.x_pl: x_batch,\n", + " self.test_model.y_pl: y_batch,\n", + " self.test_model.is_training: False\n", + " }\n", + "\n", + " # run the feed_forward\n", + " if self.args.data_mode == 'test_v2':\n", + " out_argmax, acc = self.sess.run(\n", + " [self.test_model.out_argmax, self.test_model.accuracy],\n", + " feed_dict=feed_dict)\n", + " else:\n", + " out_argmax, acc, segmented_imgs = self.sess.run(\n", + " [self.test_model.out_argmax, self.test_model.accuracy,\n", + " # self.test_model.merged_summaries, self.test_model.segmented_summary],\n", + " self.test_model.segmented_summary],\n", + " feed_dict=feed_dict)\n", + "\n", + " if pkl:\n", + " out_argmax[0] = self.linknet_postprocess(out_argmax[0])\n", + " segmented_imgs = decode_labels(out_argmax, 20)\n", + "\n", + " if self.args.data_mode == 'test':\n", + " plt.imsave(self.args.out_dir + 'imgs/' + 'test_' + str(cur_iteration) + '.png', segmented_imgs[0])\n", + "\n", + " # log loss and acc\n", + " acc_list += [acc]\n", + "\n", + " # log metrics\n", + " if self.args.random_cropping:\n", + " y1 = np.expand_dims(y_batch[0, :, :512], axis=0)\n", + " y2 = np.expand_dims(y_batch[0, :, 512:], axis=0)\n", + " y_batch = np.concatenate((y1, y2), axis=0)\n", + " self.metrics.update_metrics(out_argmax, y_batch, 0, 0)\n", + " else:\n", + " self.metrics.update_metrics(out_argmax[0], y_batch[0], 0, 0)\n", + "\n", + " # mean over batches\n", + " total_loss = 0\n", + " total_acc = np.mean(acc_list)\n", + " mean_iou = self.metrics.compute_final_metrics(self.test_data_len)\n", + "\n", + " # print in console\n", + " tt.close()\n", + " print(\"Here the statistics\")\n", + " print(\"Total_loss: \" + str(total_loss))\n", + " print(\"Total_acc: \" + str(total_acc)[:6])\n", + " print(\"mean_iou: \" + str(mean_iou))\n", + "\n", + " print(\"Plotting imgs\")\n", + " for i in range(len(img_list)):\n", + " plt.imsave(self.args.imgs_dir + 'test_' + str(i) + '.png', img_list[i])" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + " def test_eval(self, pkl=False):\n", + " print(\"Testing mode will begin NOW..\")\n", + "\n", + " # load the best model checkpoint to test on it\n", + " if not pkl:\n", + " self.load_best_model()\n", + "\n", + " # init tqdm and get the epoch value\n", + " tt = tqdm(range(self.test_data_len))\n", + "\n", + "\n", + " # loop by the number of iterations\n", + " for _ in tt:\n", + "\n", + " # Feed this variables to the network\n", + " if self.args.random_cropping:\n", + " feed_dict = {self.test_model.x_pl_before: x_batch,\n", + " self.test_model.is_training: False,\n", + " }\n", + " else:\n", + " feed_dict = {self.test_model.x_pl: x_batch,\n", + " self.test_model.is_training: False\n", + " }\n", + "\n", + " # run the feed_forward\n", + " out_argmax, segmented_imgs = self.sess.run(\n", + " [self.test_model.out_argmax,\n", + " self.test_model.segmented_summary],\n", + " feed_dict=feed_dict)\n", + "\n", + " # Colored results for visualization\n", + " #colored_save_path = self.args.out_dir + 'imgs/' + str(self.names_mapper['Y'][idx])\n", + " #if not os.path.exists(os.path.dirname(colored_save_path)):\n", + " # os.makedirs(os.path.dirname(colored_save_path))\n", + " #plt.imsave(colored_save_path, segmented_imgs[0])\n", + "\n", + " # Results for official evaluation\n", + " #save_path = self.args.out_dir + 'results/' + str(self.names_mapper['Y'][idx])\n", + " #if not os.path.exists(os.path.dirname(save_path)):\n", + " # os.makedirs(os.path.dirname(save_path))\n", + " #output = postprocess(out_argmax[0])\n", + " #misc.imsave(save_path, misc.imresize(output, [1024, 2048], 'nearest'))\n", + "\n", + "\n", + " # print in console\n", + " tt.close()\n", + "\n", + " \n", + " def finalize(self):\n", + " self.reporter.finalize()\n", + " self.summary_writer.close()\n", + " self.save_model()\n", + "\n", + " " + ] }, { "cell_type": "code", diff --git a/models/basic/basic_model.py b/models/basic/basic_model.py index a202d7e..d16a162 100644 --- a/models/basic/basic_model.py +++ b/models/basic/basic_model.py @@ -36,7 +36,7 @@ def __init__(self, args): self.params.img_height = self.args.img_height self.params.num_channels = self.args.num_channels self.params.num_classes = self.args.num_classes - self.params.class_weights = np.load(self.args.data_dir + 'weights.npy') + #self.params.class_weights = np.load(self.args.data_dir + 'weights.npy') self.params.weighted_loss = self.args.weighted_loss # Input self.x_pl = None @@ -106,7 +106,7 @@ def build(self): def init_input(self): with tf.name_scope('input'): self.x_pl = tf.placeholder(tf.float32, - [self.args.batch_size, self.params.img_height, self.params.img_width, 3]) + [self.args.batch_size, self.params.img_height, self.params.img_width, self.params.num_channels]) self.y_pl = tf.placeholder(tf.int32, [self.args.batch_size, self.params.img_height, self.params.img_width]) # self.curr_learning_rate= tf.placeholder(tf.float32) diff --git a/models/encoders/depthnet.py b/models/encoders/depthnet.py new file mode 100644 index 0000000..4b2c98c --- /dev/null +++ b/models/encoders/depthnet.py @@ -0,0 +1,164 @@ +import tensorflow as tf +from layers.convolution import depthwise_separable_conv2d, conv2d +import os +from utils.misc import load_obj, save_obj + +class DepthNet: + """ + DepthNet Encoder class + """ + MEAN = 1000.0 + + def __init__(self, x_input, + num_classes, + pretrained_path, + train_flag, + width_multipler=1.0, + weight_decay=5e-4): + + # init parameters and input + self.x_input = x_input + self.num_classes = num_classes + self.train_flag = train_flag + self.wd = weight_decay + self.pretrained_path = os.path.realpath(os.getcwd()) + "/" + pretrained_path + self.width_multiplier = width_multipler + + # All layers + self.conv1_1 = None + + self.conv2_1 = None + self.conv2_2 = None + + self.conv3_1 = None + self.conv3_2 = None + + self.conv4_1 = None + self.conv4_2 = None + + self.conv5_1 = None + self.conv5_2 = None + self.conv5_3 = None + self.conv5_4 = None + self.conv5_5 = None + self.conv5_6 = None + + self.conv6_1 = None + self.flattened = None + + self.score_fr = None + + # These feed layers are for the decoder + self.feed1 = None + self.feed2 = None + + def build(self): + self.encoder_build() + + @staticmethod + def _debug(operation): + print("Layer_name: " + operation.op.name + " -Output_Shape: " + str(operation.shape.as_list())) + + def encoder_build(self): + print("Building the DepthNet..") + with tf.variable_scope('mobilenet_encoder'): + with tf.name_scope('Pre_Processing'): + preprocessed_input = self.x_input - DepthNet.MEAN + + self.conv1_1 = conv2d('conv_1', preprocessed_input, num_filters=int(round(32 * self.width_multiplier)), + kernel_size=(3, 3), + padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, + is_training=self.train_flag, l2_strength=self.wd) + + self._debug(self.conv1_1) + self.conv2_1 = depthwise_separable_conv2d('conv_ds_2', self.conv1_1, width_multiplier=self.width_multiplier, + num_filters=64, kernel_size=(3, 3), padding='SAME', stride=(1, 1), + batchnorm_enabled=True, is_training=self.train_flag, + l2_strength=self.wd, activation=tf.nn.relu6) + self._debug(self.conv2_1) + self.conv2_2 = depthwise_separable_conv2d('conv_ds_3', self.conv2_1, width_multiplier=self.width_multiplier, + num_filters=128, kernel_size=(3, 3), padding='SAME', + stride=(2, 2), activation=tf.nn.relu6, + batchnorm_enabled=True, is_training=self.train_flag, + l2_strength=self.wd) + self._debug(self.conv2_2) + self.conv3_1 = depthwise_separable_conv2d('conv_ds_4', self.conv2_2, width_multiplier=self.width_multiplier, + num_filters=128, kernel_size=(3, 3), padding='SAME', + stride=(1, 1), activation=tf.nn.relu6, + batchnorm_enabled=True, is_training=self.train_flag, + l2_strength=self.wd) + self._debug(self.conv3_1) + self.conv3_2 = depthwise_separable_conv2d('conv_ds_5', self.conv3_1, width_multiplier=self.width_multiplier, + num_filters=256, kernel_size=(3, 3), padding='SAME', + stride=(2, 2), activation=tf.nn.relu6, + batchnorm_enabled=True, is_training=self.train_flag, + l2_strength=self.wd) + self._debug(self.conv3_2) + self.conv4_1 = depthwise_separable_conv2d('conv_ds_6', self.conv3_2, width_multiplier=self.width_multiplier, + num_filters=256, kernel_size=(3, 3), padding='SAME', + stride=(1, 1), activation=tf.nn.relu6, + batchnorm_enabled=True, is_training=self.train_flag, + l2_strength=self.wd) + self._debug(self.conv4_1) + self.conv4_2 = depthwise_separable_conv2d('conv_ds_7', self.conv4_1, width_multiplier=self.width_multiplier, + num_filters=512, kernel_size=(3, 3), padding='SAME', + stride=(2, 2), activation=tf.nn.relu6, + batchnorm_enabled=True, is_training=self.train_flag, + l2_strength=self.wd) + self._debug(self.conv4_2) + self.conv5_1 = depthwise_separable_conv2d('conv_ds_8', self.conv4_2, width_multiplier=self.width_multiplier, + num_filters=512, kernel_size=(3, 3), padding='SAME', + stride=(1, 1), activation=tf.nn.relu6, + batchnorm_enabled=True, is_training=self.train_flag, + l2_strength=self.wd) + self._debug(self.conv5_1) + self.conv5_2 = depthwise_separable_conv2d('conv_ds_9', self.conv5_1, width_multiplier=self.width_multiplier, + num_filters=512, kernel_size=(3, 3), padding='SAME', + stride=(1, 1), activation=tf.nn.relu6, + batchnorm_enabled=True, is_training=self.train_flag, + l2_strength=self.wd) + self._debug(self.conv5_2) + self.conv5_3 = depthwise_separable_conv2d('conv_ds_10', self.conv5_2, + width_multiplier=self.width_multiplier, + num_filters=512, kernel_size=(3, 3), padding='SAME', + stride=(1, 1), activation=tf.nn.relu6, + batchnorm_enabled=True, is_training=self.train_flag, + l2_strength=self.wd) + self._debug(self.conv5_3) + self.conv5_4 = depthwise_separable_conv2d('conv_ds_11', self.conv5_3, + width_multiplier=self.width_multiplier, + num_filters=512, kernel_size=(3, 3), padding='SAME', + stride=(1, 1), activation=tf.nn.relu6, + batchnorm_enabled=True, is_training=self.train_flag, + l2_strength=self.wd) + self._debug(self.conv5_4) + self.conv5_5 = depthwise_separable_conv2d('conv_ds_12', self.conv5_4, + width_multiplier=self.width_multiplier, + num_filters=512, kernel_size=(3, 3), padding='SAME', + stride=(1, 1), activation=tf.nn.relu6, + batchnorm_enabled=True, is_training=self.train_flag, + l2_strength=self.wd) + self._debug(self.conv5_5) + self.conv5_6 = depthwise_separable_conv2d('conv_ds_13', self.conv5_5, + width_multiplier=self.width_multiplier, + num_filters=1024, kernel_size=(3, 3), padding='SAME', + stride=(2, 2), activation=tf.nn.relu6, + batchnorm_enabled=True, is_training=self.train_flag, + l2_strength=self.wd) + self._debug(self.conv5_6) + self.conv6_1 = depthwise_separable_conv2d('conv_ds_14', self.conv5_6, + width_multiplier=self.width_multiplier, + num_filters=1024, kernel_size=(3, 3), padding='SAME', + stride=(1, 1), activation=tf.nn.relu6, + batchnorm_enabled=True, is_training=self.train_flag, + l2_strength=self.wd) + self._debug(self.conv6_1) + # Pooling is removed. + self.score_fr = conv2d('conv_1c_1x1', self.conv6_1, num_filters=self.num_classes, l2_strength=self.wd, + kernel_size=(1, 1)) + + self._debug(self.score_fr) + self.feed1 = self.conv4_2 + self.feed2 = self.conv3_2 + + print("\nEncoder DepthNet is built successfully\n\n") diff --git a/models/fcn8s_depthnet.py b/models/fcn8s_depthnet.py new file mode 100644 index 0000000..de4c520 --- /dev/null +++ b/models/fcn8s_depthnet.py @@ -0,0 +1,78 @@ +from models.basic.basic_model import BasicModel +from models.encoders.depthnet import DepthNet +from models.encoders.mobilenet import MobileNet +from layers.convolution import conv2d_transpose, conv2d + +import tensorflow as tf +from utils.misc import _debug +import pdb + +class FCN8sDepthNet(BasicModel): + """ + FCN8sDepthNet Model Architecture + """ + + def __init__(self, args): + super().__init__(args) + # init encoder + self.encoder = None + # init network layers + self.upscore2 = None + self.score_feed1 = None + self.fuse_feed1 = None + self.upscore4 = None + self.score_feed2 = None + self.fuse_feed2 = None + self.upscore8 = None + + def build(self): + print("\nBuilding the MODEL...") + self.init_input() + self.init_network() + self.init_output() + self.init_train() + self.init_summaries() + print("The Model is built successfully\n") + + def init_network(self): + """ + Building the Network here + :return: + """ + + + # Init DepthNet as an encoder + self.encoder = DepthNet(x_input=self.x_pl, num_classes=self.params.num_classes, + pretrained_path=self.args.pretrained_path, + train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) + + # Build Encoding part + self.encoder.build() + + # Build Decoding part + with tf.name_scope('upscore_2s'): + self.upscore2 = conv2d_transpose('upscore2', x=self.encoder.score_fr, + output_shape=self.encoder.feed1.shape.as_list()[0:3] + [ + self.params.num_classes], batchnorm_enabled= self.args.batchnorm_enabled, is_training=self.is_training, + kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) + self.score_feed1 = conv2d('score_feed1', x=self.encoder.feed1, batchnorm_enabled= self.args.batchnorm_enabled, is_training=self.is_training, + num_filters=self.params.num_classes, kernel_size=(1, 1), + l2_strength=self.encoder.wd) + self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) + + with tf.name_scope('upscore_4s'): + self.upscore4 = conv2d_transpose('upscore4', x=self.fuse_feed1, batchnorm_enabled= self.args.batchnorm_enabled, is_training=self.is_training, + output_shape=self.encoder.feed2.shape.as_list()[0:3] + [ + self.params.num_classes], + kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) + self.score_feed2 = conv2d('score_feed2', x=self.encoder.feed2, batchnorm_enabled= self.args.batchnorm_enabled, is_training=self.is_training, + num_filters=self.params.num_classes, kernel_size=(1, 1), + l2_strength=self.encoder.wd) + self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) + + with tf.name_scope('upscore_8s'): + self.upscore8 = conv2d_transpose('upscore8', x=self.fuse_feed2, + output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], + kernel_size=(16, 16), stride=(8, 8), l2_strength=self.encoder.wd) + + self.logits = self.upscore8 diff --git a/models/unet_depthnet.py b/models/unet_depthnet.py new file mode 100644 index 0000000..c499e8e --- /dev/null +++ b/models/unet_depthnet.py @@ -0,0 +1,123 @@ +from models.basic.basic_model import BasicModel +from models.encoders.depthnet import DepthNet +from layers.convolution import conv2d_transpose, conv2d + +import tensorflow as tf + + +class UNetDepthNet(BasicModel): + def __init__(self, args): + super().__init__(args) + # init encoder + self.encoder = None + + def build(self): + print("\nBuilding the MODEL...") + self.init_input() + self.init_network() + self.init_output() + self.init_train() + self.init_summaries() + print("The Model is built successfully\n") + + @staticmethod + def _debug(operation): + print("Layer_name: " + operation.op.name + " -Output_Shape: " + str(operation.shape.as_list())) + + def init_network(self): + """ + Building the Network here + :return: + """ + + # Init DepthNet as an encoder + self.encoder = DepthNet(x_input=self.x_pl, num_classes=self.params.num_classes, + pretrained_path=self.args.pretrained_path, + train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) + + # Build Encoding part + self.encoder.build() + + # Build Decoding part + with tf.name_scope('upscale_1'): + self.expand11 = conv2d('expand1_1', x=self.encoder.conv5_6, batchnorm_enabled=True, is_training= self.is_training, + num_filters=self.encoder.conv5_5.shape.as_list()[3], kernel_size=(1, 1), + l2_strength=self.encoder.wd) + self._debug(self.expand11) + self.upscale1 = conv2d_transpose('upscale1', x=self.expand11,is_training= self.is_training, + output_shape=self.encoder.conv5_5.shape.as_list(), batchnorm_enabled=True, + kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) + self._debug(self.upscale1) + self.add1 = tf.add(self.upscale1, self.encoder.conv5_5) + self._debug(self.add1) + self.expand12 = conv2d('expand1_2', x=self.add1, batchnorm_enabled=True,is_training= self.is_training, + num_filters=self.encoder.conv5_5.shape.as_list()[3], kernel_size=(1, 1), + l2_strength=self.encoder.wd) + self._debug(self.expand12) + + with tf.name_scope('upscale_2'): + self.expand21 = conv2d('expand2_1', x=self.expand12, batchnorm_enabled=True,is_training= self.is_training, + num_filters=self.encoder.conv4_1.shape.as_list()[3], kernel_size=(1, 1), + l2_strength=self.encoder.wd) + self._debug(self.expand21) + self.upscale2 = conv2d_transpose('upscale2', x=self.expand21,is_training= self.is_training, + output_shape=self.encoder.conv4_1.shape.as_list(),batchnorm_enabled=True, + kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) + self._debug(self.upscale2) + self.add2 = tf.add(self.upscale2, self.encoder.conv4_1) + self._debug(self.add2) + self.expand22 = conv2d('expand2_2', x=self.add2, batchnorm_enabled=True,is_training= self.is_training, + num_filters=self.encoder.conv4_1.shape.as_list()[3], kernel_size=(1, 1), + l2_strength=self.encoder.wd) + self._debug(self.expand22) + + with tf.name_scope('upscale_3'): + self.expand31 = conv2d('expand3_1', x=self.expand22, batchnorm_enabled=True,is_training= self.is_training, + num_filters=self.encoder.conv3_1.shape.as_list()[3], kernel_size=(1, 1), + l2_strength=self.encoder.wd) + self._debug(self.expand31) + self.upscale3 = conv2d_transpose('upscale3', x=self.expand31, batchnorm_enabled=True,is_training= self.is_training, + output_shape=self.encoder.conv3_1.shape.as_list(), + kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) + self._debug(self.upscale3) + self.add3 = tf.add(self.upscale3, self.encoder.conv3_1) + self._debug(self.add3) + self.expand32 = conv2d('expand3_2', x=self.add3, batchnorm_enabled=True,is_training= self.is_training, + num_filters=self.encoder.conv3_1.shape.as_list()[3], kernel_size=(1, 1), + l2_strength=self.encoder.wd) + self._debug(self.expand32) + + with tf.name_scope('upscale_4'): + self.expand41 = conv2d('expand4_1', x=self.expand32, batchnorm_enabled=True,is_training= self.is_training, + num_filters=self.encoder.conv2_1.shape.as_list()[3], kernel_size=(1, 1), + l2_strength=self.encoder.wd) + self._debug(self.expand41) + self.upscale4 = conv2d_transpose('upscale4', x=self.expand41, batchnorm_enabled=True,is_training= self.is_training, + output_shape=self.encoder.conv2_1.shape.as_list(), + kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) + self._debug(self.upscale4) + self.add4 = tf.add(self.upscale4, self.encoder.conv2_1) + self._debug(self.add4) + self.expand42 = conv2d('expand4_2', x=self.add4, batchnorm_enabled=True,is_training= self.is_training, + num_filters=self.encoder.conv2_1.shape.as_list()[3], kernel_size=(1, 1), + l2_strength=self.encoder.wd) + self._debug(self.expand42) + + with tf.name_scope('upscale_5'): + self.upscale5 = conv2d_transpose('upscale5', x=self.expand42, batchnorm_enabled=True,is_training= self.is_training, + output_shape=self.x_pl.shape.as_list()[0:3] + [ + self.encoder.conv2_1.shape.as_list()[3]], + kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) + self._debug(self.upscale5) + self.expand5 = conv2d('expand5', x=self.upscale5, batchnorm_enabled=True,is_training= self.is_training, + num_filters=self.encoder.conv1_1.shape.as_list()[3], kernel_size=(1, 1),dropout_keep_prob=0.5, + l2_strength=self.encoder.wd) + self._debug(self.expand5) + + with tf.name_scope('final_score'): + self.fscore = conv2d('fscore', x=self.expand5, + num_filters=self.params.num_classes, kernel_size=(1, 1), + l2_strength=self.encoder.wd) + self._debug(self.fscore) + + self.logits = self.fscore diff --git a/train/train.py b/train/train.py deleted file mode 100644 index 0841fbd..0000000 --- a/train/train.py +++ /dev/null @@ -1,1024 +0,0 @@ -""" -Trainer class to train Segmentation models -""" - -from train.basic_train import BasicTrain -from metrics.metrics import Metrics -from utils.reporter import Reporter -from utils.misc import timeit -from utils.average_meter import FPSMeter - -from tqdm import tqdm -import numpy as np -import tensorflow as tf -import matplotlib -import time -import h5py -import pickle -from utils.augmentation import flip_randomly_left_right_image_with_annotation, \ - scale_randomly_image_with_annotation_with_fixed_size_output -import scipy.misc as misc - -matplotlib.use('Agg') -import matplotlib.pyplot as plt -# import cv2 - -from utils.img_utils import decode_labels -from utils.seg_dataloader import SegDataLoader -from tensorflow.contrib.data import Iterator -import os -import pdb -import torchfile -from data.postprocess import postprocess - - -class Train(BasicTrain): - """ - Trainer class - """ - - def __init__(self, args, sess, train_model, test_model): - """ - Call the constructor of the base class - init summaries - init loading data - :param args: - :param sess: - :param model: - :return: - """ - super().__init__(args, sess, train_model, test_model) - ################################################################################## - # Init summaries - - # Summary variables - self.scalar_summary_tags = ['mean_iou_on_val', - 'train-loss-per-epoch', 'val-loss-per-epoch', - 'train-acc-per-epoch', 'val-acc-per-epoch'] - self.images_summary_tags = [ - ('train_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]), - ('val_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3])] - self.summary_tags = [] - self.summary_placeholders = {} - self.summary_ops = {} - # init summaries and it's operators - self.init_summaries() - # Create summary writer - self.summary_writer = tf.summary.FileWriter(self.args.summary_dir, self.sess.graph) - ################################################################################## - # Init load data and generator - self.generator = None - if self.args.data_mode == "experiment_tfdata": - self.data_session = None - self.train_next_batch, self.train_data_len = self.init_tfdata(self.args.batch_size, self.args.abs_data_dir, - (self.args.img_height, self.args.img_width), - mode='train') - self.num_iterations_training_per_epoch = self.train_data_len // self.args.batch_size - self.generator = self.train_tfdata_generator - elif self.args.data_mode == "experiment_h5": - self.train_data = None - self.train_data_len = None - self.val_data = None - self.val_data_len = None - self.num_iterations_training_per_epoch = None - self.num_iterations_validation_per_epoch = None - self.load_train_data_h5() - self.generator = self.train_h5_generator - elif self.args.data_mode == "experiment_v2": - self.targets_resize = self.args.targets_resize - self.train_data = None - self.train_data_len = None - self.val_data = None - self.val_data_len = None - self.num_iterations_training_per_epoch = None - self.num_iterations_validation_per_epoch = None - self.load_train_data(v2=True) - self.generator = self.train_generator - elif self.args.data_mode == "experiment": - self.train_data = None - self.train_data_len = None - self.val_data = None - self.val_data_len = None - self.num_iterations_training_per_epoch = None - self.num_iterations_validation_per_epoch = None - self.load_train_data() - self.generator = self.train_generator - elif self.args.data_mode == "test_tfdata": - self.test_data = None - self.test_data_len = None - self.num_iterations_testing_per_epoch = None - self.load_val_data() - self.generator = self.test_tfdata_generator - elif self.args.data_mode == "test": - self.test_data = None - self.test_data_len = None - self.num_iterations_testing_per_epoch = None - self.load_val_data() - self.generator = self.test_generator - elif self.args.data_mode == "test_eval": - self.test_data = None - self.test_data_len = None - self.num_iterations_testing_per_epoch = None - self.names_mapper = None - self.load_test_data() - self.generator = self.test_generator - elif self.args.data_mode == "test_v2": - self.targets_resize = self.args.targets_resize - self.test_data = None - self.test_data_len = None - self.num_iterations_testing_per_epoch = None - self.load_val_data(v2=True) - self.generator = self.test_generator - elif self.args.data_mode == "video": - self.args.data_mode = "test" - self.test_data = None - self.test_data_len = None - self.num_iterations_testing_per_epoch = None - self.load_vid_data() - self.generator = self.test_generator - elif self.args.data_mode == "debug": - print("Debugging photo loading..") - # self.debug_x= misc.imread('/data/menna/cityscapes/leftImg8bit/val/lindau/lindau_000048_000019_leftImg8bit.png') - # self.debug_y= misc.imread('/data/menna/cityscapes/gtFine/val/lindau/lindau_000048_000019_gtFine_labelIds.png') - # self.debug_x= np.expand_dims(misc.imresize(self.debug_x, (512,1024)), axis=0) - # self.debug_y= np.expand_dims(misc.imresize(self.debug_y, (512,1024)), axis=0) - self.debug_x = np.load('data/debug/debug_x.npy') - self.debug_y = np.load('data/debug/debug_y.npy') - print("Debugging photo loaded") - else: - print("ERROR Please select a proper data_mode BYE") - exit(-1) - ################################################################################## - # Init metrics class - self.metrics = Metrics(self.args.num_classes) - # Init reporter class - if self.args.mode == 'train' or 'overfit': - self.reporter = Reporter(self.args.out_dir + 'report_train.json', self.args) - elif self.args.mode == 'test': - self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args) - ################################################################################## - - def crop(self): - sh = self.val_data['X'].shape - temp_val_data = {'X': np.zeros((sh[0] * 2, sh[1], sh[2] // 2, sh[3]), self.val_data['X'].dtype), - 'Y': np.zeros((sh[0] * 2, sh[1], sh[2] // 2), self.val_data['Y'].dtype)} - for i in range(sh[0]): - temp_val_data['X'][i * 2, :, :, :] = self.val_data['X'][i, :, :sh[2] // 2, :] - temp_val_data['X'][i * 2 + 1, :, :, :] = self.val_data['X'][i, :, sh[2] // 2:, :] - temp_val_data['Y'][i * 2, :, :] = self.val_data['Y'][i, :, :sh[2] // 2] - temp_val_data['Y'][i * 2 + 1, :, :] = self.val_data['Y'][i, :, sh[2] // 2:] - - self.val_data = temp_val_data - - def init_tfdata(self, batch_size, main_dir, resize_shape, mode='train'): - self.data_session = tf.Session() - print("Creating the iterator for training data") - with tf.device('/cpu:0'): - segdl = SegDataLoader(main_dir, batch_size, (resize_shape[0], resize_shape[1]), resize_shape, - # * 2), resize_shape, - 'data/cityscapes_tfdata/train.txt') - iterator = Iterator.from_structure(segdl.data_tr.output_types, segdl.data_tr.output_shapes) - next_batch = iterator.get_next() - - self.init_op = iterator.make_initializer(segdl.data_tr) - self.data_session.run(self.init_op) - - print("Loading Validation data in memoryfor faster training..") - self.val_data = {'X': np.load(self.args.data_dir + "X_val.npy"), - 'Y': np.load(self.args.data_dir + "Y_val.npy")} - # self.crop() - # import cv2 - # cv2.imshow('crop1', self.val_data['X'][0,:,:,:]) - # cv2.imshow('crop2', self.val_data['X'][1,:,:,:]) - # cv2.imshow('seg1', self.val_data['Y'][0,:,:]) - # cv2.imshow('seg2', self.val_data['Y'][1,:,:]) - # cv2.waitKey() - - self.val_data_len = self.val_data['X'].shape[0] - self.val_data['X'].shape[0] % self.args.batch_size - # self.num_iterations_validation_per_epoch = ( - # self.val_data_len + self.args.batch_size - 1) // self.args.batch_size - self.num_iterations_validation_per_epoch = self.val_data_len // self.args.batch_size - - print("Val-shape-x -- " + str(self.val_data['X'].shape) + " " + str(self.val_data_len)) - print("Val-shape-y -- " + str(self.val_data['Y'].shape)) - print("Num of iterations on validation data in one epoch -- " + str(self.num_iterations_validation_per_epoch)) - print("Validation data is loaded") - - return next_batch, segdl.data_len - - @timeit - def load_overfit_data(self): - print("Loading data..") - self.train_data = {'X': np.load(self.args.data_dir + "X_train.npy"), - 'Y': np.load(self.args.data_dir + "Y_train.npy")} - self.train_data_len = self.train_data['X'].shape[0] - self.train_data['X'].shape[0] % self.args.batch_size - self.num_iterations_training_per_epoch = ( - self.train_data_len + self.args.batch_size - 1) // self.args.batch_size - print("Train-shape-x -- " + str(self.train_data['X'].shape)) - print("Train-shape-y -- " + str(self.train_data['Y'].shape)) - print("Num of iterations in one epoch -- " + str(self.num_iterations_training_per_epoch)) - print("Overfitting data is loaded") - - print("Loading Validation data..") - self.val_data = self.train_data - self.val_data_len = self.val_data['X'].shape[0] - self.val_data['X'].shape[0] % self.args.batch_size - self.num_iterations_validation_per_epoch = ( - self.val_data_len + self.args.batch_size - 1) // self.args.batch_size - print("Val-shape-x -- " + str(self.val_data['X'].shape) + " " + str(self.val_data_len)) - print("Val-shape-y -- " + str(self.val_data['Y'].shape)) - print("Num of iterations on validation data in one epoch -- " + str(self.num_iterations_validation_per_epoch)) - print("Validation data is loaded") - - def overfit_generator(self): - start = 0 - new_epoch_flag = True - idx = None - while True: - # init index array if it is a new_epoch - if new_epoch_flag: - if self.args.shuffle: - idx = np.random.choice(self.train_data_len, self.train_data_len, replace=False) - else: - idx = np.arange(self.train_data_len) - new_epoch_flag = False - - # select the mini_batches - mask = idx[start:start + self.args.batch_size] - x_batch = self.train_data['X'][mask] - y_batch = self.train_data['Y'][mask] - - start += self.args.batch_size - if start >= self.train_data_len: - start = 0 - new_epoch_flag = True - - yield x_batch, y_batch - - def init_summaries(self): - """ - Create the summary part of the graph - :return: - """ - with tf.variable_scope('train-summary-per-epoch'): - for tag in self.scalar_summary_tags: - self.summary_tags += tag - self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag) - self.summary_ops[tag] = tf.summary.scalar(tag, self.summary_placeholders[tag]) - for tag, shape in self.images_summary_tags: - self.summary_tags += tag - self.summary_placeholders[tag] = tf.placeholder('float32', shape, name=tag) - self.summary_ops[tag] = tf.summary.image(tag, self.summary_placeholders[tag], max_outputs=10) - - def add_summary(self, step, summaries_dict=None, summaries_merged=None): - """ - Add the summaries to tensorboard - :param step: - :param summaries_dict: - :param summaries_merged: - :return: - """ - if summaries_dict is not None: - summary_list = self.sess.run([self.summary_ops[tag] for tag in summaries_dict.keys()], - {self.summary_placeholders[tag]: value for tag, value in - summaries_dict.items()}) - for summary in summary_list: - self.summary_writer.add_summary(summary, step) - if summaries_merged is not None: - self.summary_writer.add_summary(summaries_merged, step) - - @timeit - def load_train_data(self, v2=False): - print("Loading Training data..") - self.train_data = {'X': np.load(self.args.data_dir + "X_train.npy"), - 'Y': np.load(self.args.data_dir + "Y_train.npy")} - self.train_data = self.resize(self.train_data) - - if v2: - out_shape = (self.train_data['Y'].shape[1] // self.targets_resize, - self.train_data['Y'].shape[2] // self.targets_resize) - yy = np.zeros((self.train_data['Y'].shape[0], out_shape[0], out_shape[1]), dtype=self.train_data['Y'].dtype) - for y in range(self.train_data['Y'].shape[0]): - yy[y, ...] = misc.imresize(self.train_data['Y'][y, ...], out_shape, interp='nearest') - self.train_data['Y'] = yy - self.train_data_len = self.train_data['X'].shape[0] - - self.num_iterations_training_per_epoch = ( - self.train_data_len + self.args.batch_size - 1) // self.args.batch_size - - print("Train-shape-x -- " + str(self.train_data['X'].shape) + " " + str(self.train_data_len)) - print("Train-shape-y -- " + str(self.train_data['Y'].shape)) - print("Num of iterations on training data in one epoch -- " + str(self.num_iterations_training_per_epoch)) - print("Training data is loaded") - - print("Loading Validation data..") - self.val_data = {'X': np.load(self.args.data_dir + "X_val.npy"), - 'Y': np.load(self.args.data_dir + "Y_val.npy")} - self.val_data['Y_large'] = self.val_data['Y'] - if v2: - out_shape = (self.val_data['Y'].shape[1] // self.targets_resize, - self.val_data['Y'].shape[2] // self.targets_resize) - yy = np.zeros((self.val_data['Y'].shape[0], out_shape[0], out_shape[1]), dtype=self.train_data['Y'].dtype) - for y in range(self.val_data['Y'].shape[0]): - yy[y, ...] = misc.imresize(self.val_data['Y'][y, ...], out_shape, interp='nearest') - self.val_data['Y'] = yy - - self.val_data_len = self.val_data['X'].shape[0] - self.val_data['X'].shape[0] % self.args.batch_size - self.num_iterations_validation_per_epoch = ( - self.val_data_len + self.args.batch_size - 1) // self.args.batch_size - print("Val-shape-x -- " + str(self.val_data['X'].shape) + " " + str(self.val_data_len)) - print("Val-shape-y -- " + str(self.val_data['Y'].shape)) - print("Num of iterations on validation data in one epoch -- " + str(self.num_iterations_validation_per_epoch)) - print("Validation data is loaded") - - @timeit - def load_train_data_h5(self): - print("Loading Training data..") - self.train_data = h5py.File(self.args.data_dir + self.args.h5_train_file, 'r') - self.train_data_len = self.args.h5_train_len - self.num_iterations_training_per_epoch = ( - self.train_data_len + self.args.batch_size - 1) // self.args.batch_size - print("Train-shape-x -- " + str(self.train_data['X'].shape) + " " + str(self.train_data_len)) - print("Train-shape-y -- " + str(self.train_data['Y'].shape)) - print("Num of iterations on training data in one epoch -- " + str(self.num_iterations_training_per_epoch)) - print("Training data is loaded") - - print("Loading Validation data..") - self.val_data = {'X': np.load(self.args.data_dir + "X_val.npy"), - 'Y': np.load(self.args.data_dir + "Y_val.npy")} - self.val_data_len = self.val_data['X'].shape[0] - self.val_data['X'].shape[0] % self.args.batch_size - self.num_iterations_validation_per_epoch = ( - self.val_data_len + self.args.batch_size - 1) // self.args.batch_size - print("Val-shape-x -- " + str(self.val_data['X'].shape) + " " + str(self.val_data_len)) - print("Val-shape-y -- " + str(self.val_data['Y'].shape)) - print("Num of iterations on validation data in one epoch -- " + str(self.num_iterations_validation_per_epoch)) - print("Validation data is loaded") - - @timeit - def load_vid_data(self): - print("Loading Video data..") - self.test_data = {'X': np.load(self.args.data_dir + "X_vid.npy")} - self.test_data['Y'] = np.zeros(self.test_data['X'].shape[:3]) - self.test_data_len = self.test_data['X'].shape[0] - print("Vid-shape-x -- " + str(self.test_data['X'].shape)) - print("Vid-shape-y -- " + str(self.test_data['Y'].shape)) - self.num_iterations_testing_per_epoch = (self.test_data_len + self.args.batch_size - 1) // self.args.batch_size - print("Video data is loaded") - - @timeit - def load_val_data(self, v2=False): - print("Loading Validation data..") - self.test_data = {'X': np.load(self.args.data_dir + "X_val.npy"), - 'Y': np.load(self.args.data_dir + "Y_val.npy")} - self.test_data = self.resize(self.test_data) - self.test_data['Y_large'] = self.test_data['Y'] - if v2: - out_shape = (self.test_data['Y'].shape[1] // self.targets_resize, - self.test_data['Y'].shape[2] // self.targets_resize) - yy = np.zeros((self.test_data['Y'].shape[0], out_shape[0], out_shape[1]), dtype=self.test_data['Y'].dtype) - for y in range(self.test_data['Y'].shape[0]): - yy[y, ...] = misc.imresize(self.test_data['Y'][y, ...], out_shape, interp='nearest') - self.test_data['Y'] = yy - - self.test_data_len = self.test_data['X'].shape[0] - self.test_data['X'].shape[0] % self.args.batch_size - print("Validation-shape-x -- " + str(self.test_data['X'].shape)) - print("Validation-shape-y -- " + str(self.test_data['Y'].shape)) - self.num_iterations_testing_per_epoch = (self.test_data_len + self.args.batch_size - 1) // self.args.batch_size - print("Validation data is loaded") - - @timeit - def load_test_data(self): - print("Loading Testing data..") - self.test_data = {'X': np.load(self.args.data_dir + "X_test.npy")} - self.names_mapper = {'X': np.load(self.args.data_dir + "xnames_test.npy"), - 'Y': np.load(self.args.data_dir + "ynames_test.npy")} - self.test_data_len = self.test_data['X'].shape[0] - self.test_data['X'].shape[0] % self.args.batch_size - print("Test-shape-x -- " + str(self.test_data['X'].shape)) - self.num_iterations_testing_per_epoch = (self.test_data_len + self.args.batch_size - 1) // self.args.batch_size - print("Test data is loaded") - - def test_generator(self): - start = 0 - new_epoch_flag = True - idx = None - while True: - # init index array if it is a new_epoch - if new_epoch_flag: - if self.args.shuffle: - idx = np.random.choice(self.test_data_len, self.test_data_len, replace=False) - else: - idx = np.arange(self.test_data_len) - new_epoch_flag = False - - # select the mini_batches - mask = idx[start:start + self.args.batch_size] - x_batch = self.test_data['X'][mask] - y_batch = self.test_data['Y'][mask] - - # update start idx - start += self.args.batch_size - - if start >= self.test_data_len: - start = 0 - new_epoch_flag = True - - yield x_batch, y_batch - - def train_generator(self): - start = 0 - idx = np.random.choice(self.train_data_len, self.num_iterations_training_per_epoch * self.args.batch_size, - replace=True) - while True: - # select the mini_batches - mask = idx[start:start + self.args.batch_size] - x_batch = self.train_data['X'][mask] - y_batch = self.train_data['Y'][mask] - - # update start idx - start += self.args.batch_size - - yield x_batch, y_batch - - if start >= self.train_data_len: - return - - def train_tfdata_generator(self): - with tf.device('/cpu:0'): - while True: - x_batch, y_batch = self.data_session.run(self.train_next_batch) - yield x_batch, y_batch[:, :, :, 0] - - def train_h5_generator(self): - start = 0 - idx = np.random.choice(self.train_data_len, self.train_data_len, - replace=False) - while True: - # select the mini_batches - mask = idx[start:start + self.args.batch_size] - x_batch = self.train_data['X'][sorted(mask.tolist())] - y_batch = self.train_data['Y'][sorted(mask.tolist())] - - # update start idx - start += self.args.batch_size - - if start >= self.train_data_len: - return - - yield x_batch, y_batch - - def resize(self, data): - X = [] - Y = [] - for i in range(data['X'].shape[0]): - X.append(misc.imresize(data['X'][i, ...], (self.args.img_height, self.args.img_width))) - Y.append(misc.imresize(data['Y'][i, ...], (self.args.img_height, self.args.img_width), 'nearest')) - data['X'] = np.asarray(X) - data['Y'] = np.asarray(Y) - return data - - def train(self): - print("Training mode will begin NOW ..") - # curr_lr= self.model.args.learning_rate - for cur_epoch in range(self.model.global_epoch_tensor.eval(self.sess) + 1, self.args.num_epochs + 1, 1): - - # init tqdm and get the epoch value - tt = tqdm(self.generator(), total=self.num_iterations_training_per_epoch, - desc="epoch-" + str(cur_epoch) + "-") - - # init the current iterations - cur_iteration = 0 - - # init acc and loss lists - loss_list = [] - acc_list = [] - - # loop by the number of iterations - for x_batch, y_batch in tt: - - # get the cur_it for the summary - cur_it = self.model.global_step_tensor.eval(self.sess) - - # Feed this variables to the network - feed_dict = {self.model.x_pl: x_batch, - self.model.y_pl: y_batch, - self.model.is_training: True - # self.model.curr_learning_rate:curr_lr - } - - # Run the feed forward but the last iteration finalize what you want to do - if cur_iteration < self.num_iterations_training_per_epoch - 1: - - # run the feed_forward - _, loss, acc, summaries_merged = self.sess.run( - [self.model.train_op, self.model.loss, self.model.accuracy, self.model.merged_summaries], - feed_dict=feed_dict) - # log loss and acc - loss_list += [loss] - acc_list += [acc] - # summarize - # self.add_summary(cur_it, summaries_merged=summaries_merged) - - else: - # run the feed_forward - if self.args.data_mode == 'experiment_v2': - _, loss, acc, summaries_merged = self.sess.run( - [self.model.train_op, self.model.loss, self.model.accuracy, - self.model.merged_summaries], - feed_dict=feed_dict) - else: - _, loss, acc, summaries_merged, segmented_imgs = self.sess.run( - [self.model.train_op, self.model.loss, self.model.accuracy, - self.model.merged_summaries, self.model.segmented_summary], - feed_dict=feed_dict) - - # log loss and acc - loss_list += [loss] - acc_list += [acc] - total_loss = np.mean(loss_list) - total_acc = np.mean(acc_list) - # summarize - summaries_dict = dict() - summaries_dict['train-loss-per-epoch'] = total_loss - summaries_dict['train-acc-per-epoch'] = total_acc - - if self.args.data_mode != 'experiment_v2': - summaries_dict['train_prediction_sample'] = segmented_imgs - # self.add_summary(cur_it, summaries_dict=summaries_dict, summaries_merged=summaries_merged) - - # report - self.reporter.report_experiment_statistics('train-acc', 'epoch-' + str(cur_epoch), str(total_acc)) - self.reporter.report_experiment_statistics('train-loss', 'epoch-' + str(cur_epoch), str(total_loss)) - self.reporter.finalize() - - # Update the Global step - self.model.global_step_assign_op.eval(session=self.sess, - feed_dict={self.model.global_step_input: cur_it + 1}) - - # Update the Cur Epoch tensor - # it is the last thing because if it is interrupted it repeat this - self.model.global_epoch_assign_op.eval(session=self.sess, - feed_dict={self.model.global_epoch_input: cur_epoch + 1}) - - # print in console - tt.close() - print("epoch-" + str(cur_epoch) + "-" + "loss:" + str(total_loss) + "-" + " acc:" + str(total_acc)[ - :6]) - - # Break the loop to finalize this epoch - break - - # Update the Global step - self.model.global_step_assign_op.eval(session=self.sess, - feed_dict={self.model.global_step_input: cur_it + 1}) - - # update the cur_iteration - cur_iteration += 1 - - # Save the current checkpoint - if cur_epoch % self.args.save_every == 0: - self.save_model() - - # Test the model on validation - if cur_epoch % self.args.test_every == 0: - self.test_per_epoch(step=self.model.global_step_tensor.eval(self.sess), - epoch=self.model.global_epoch_tensor.eval(self.sess)) - # if cur_epoch % self.args.learning_decay_every == 0: - # curr_lr= curr_lr*self.args.learning_decay - # print('Current learning rate is ', curr_lr) - - print("Training Finished") - - def test_per_epoch(self, step, epoch): - print("Validation at step:" + str(step) + " at epoch:" + str(epoch) + " ..") - - # init tqdm and get the epoch value - tt = tqdm(range(self.num_iterations_validation_per_epoch), total=self.num_iterations_validation_per_epoch, - desc="Val-epoch-" + str(epoch) + "-") - - # init acc and loss lists - loss_list = [] - acc_list = [] - inf_list = [] - - # idx of minibatch - idx = 0 - - # reset metrics - self.metrics.reset() - - # get the maximum iou to compare with and save the best model - max_iou = self.model.best_iou_tensor.eval(self.sess) - - # loop by the number of iterations - for cur_iteration in tt: - # load minibatches - x_batch = self.val_data['X'][idx:idx + self.args.batch_size] - y_batch = self.val_data['Y'][idx:idx + self.args.batch_size] - if self.args.data_mode == 'experiment_v2': - y_batch_large = self.val_data['Y_large'][idx:idx + self.args.batch_size] - - # update idx of minibatch - idx += self.args.batch_size - - # Feed this variables to the network - feed_dict = {self.model.x_pl: x_batch, - self.model.y_pl: y_batch, - self.model.is_training: False - } - - # Run the feed forward but the last iteration finalize what you want to do - if cur_iteration < self.num_iterations_validation_per_epoch - 1: - - start = time.time() - # run the feed_forward - - out_argmax, loss, acc, summaries_merged = self.sess.run( - [self.model.out_argmax, self.model.loss, self.model.accuracy, self.model.merged_summaries], - feed_dict=feed_dict) - - end = time.time() - # log loss and acc - loss_list += [loss] - acc_list += [acc] - inf_list += [end - start] - if self.args.data_mode == 'experiment_v2': - yy = np.zeros((out_argmax.shape[0], y_batch_large.shape[1], y_batch_large.shape[2]), - dtype=np.uint32) - out_argmax = np.asarray(out_argmax, dtype=np.uint8) - for y in range(out_argmax.shape[0]): - yy[y, ...] = misc.imresize(out_argmax[y, ...], y_batch_large.shape[1:], interp='nearest') - y_batch = y_batch_large - out_argmax = yy - - # log metrics - self.metrics.update_metrics_batch(out_argmax, y_batch) - - else: - start = time.time() - # run the feed_forward - if self.args.data_mode == 'experiment_v2': # Issues in concatenating gt and img with diff sizes now for segmented_imgs - out_argmax, acc = self.sess.run( - [self.test_model.out_argmax, self.test_model.accuracy], - feed_dict=feed_dict) - else: - out_argmax, acc, segmented_imgs = self.sess.run( - [self.test_model.out_argmax, self.test_model.accuracy, self.test_model.segmented_summary], - feed_dict=feed_dict) - - end = time.time() - # log loss and acc - acc_list += [acc] - inf_list += [end - start] - # log metrics - self.metrics.update_metrics_batch(out_argmax, y_batch) - # mean over batches - total_acc = np.mean(acc_list) - mean_iou = self.metrics.compute_final_metrics(self.num_iterations_validation_per_epoch) - mean_iou_arr = self.metrics.iou - mean_inference = str(np.mean(inf_list)) + '-seconds' - # summarize - summaries_dict = dict() - summaries_dict['val-acc-per-epoch'] = total_acc - summaries_dict['mean_iou_on_val'] = mean_iou - if self.args.data_mode != 'experiment_v2': # Issues in concatenating gt and img with diff sizes now for segmented_imgs - summaries_dict['val_prediction_sample'] = segmented_imgs - # self.add_summary(step, summaries_dict=summaries_dict, summaries_merged=summaries_merged) - - # report - self.reporter.report_experiment_statistics('validation-acc', 'epoch-' + str(epoch), str(total_acc)) - self.reporter.report_experiment_statistics('avg_inference_time_on_validation', 'epoch-' + str(epoch), - str(mean_inference)) - self.reporter.report_experiment_validation_iou('epoch-' + str(epoch), str(mean_iou), mean_iou_arr) - self.reporter.finalize() - - # print in console - tt.close() - print("Val-epoch-" + str(epoch) + "-" + - "acc:" + str(total_acc)[:6] + "-mean_iou:" + str(mean_iou)) - print("Last_max_iou: " + str(max_iou)) - if mean_iou > max_iou: - print("This validation got a new best iou. so we will save this one") - # save the best model - self.save_best_model() - # Set the new maximum - self.model.best_iou_assign_op.eval(session=self.sess, - feed_dict={self.model.best_iou_input: mean_iou}) - else: - print("hmm not the best validation epoch :/..") - break - - # Break the loop to finalize this epoch - - def linknet_postprocess(self, gt): - gt2 = gt - 1 - gt2[gt == -1] = 19 - return gt2 - - def test(self, pkl=False): - print("Testing mode will begin NOW..") - - # load the best model checkpoint to test on it - if not pkl: - self.load_best_model() - - # init tqdm and get the epoch value - tt = tqdm(range(self.test_data_len)) - # naming = np.load(self.args.data_dir + 'names_train.npy') - - # init acc and loss lists - acc_list = [] - img_list = [] - - # idx of image - idx = 0 - - # reset metrics - self.metrics.reset() - - # loop by the number of iterations - for cur_iteration in tt: - # load mini_batches - x_batch = self.test_data['X'][idx:idx + 1] - y_batch = self.test_data['Y'][idx:idx + 1] - if self.args.data_mode == 'test_v2': - y_batch_large = self.test_data['Y_large'][idx:idx + 1] - - idx += 1 - - # Feed this variables to the network - if self.args.random_cropping: - feed_dict = {self.test_model.x_pl_before: x_batch, - self.test_model.y_pl_before: y_batch, - self.test_model.is_training: False, - } - else: - feed_dict = {self.test_model.x_pl: x_batch, - self.test_model.y_pl: y_batch, - self.test_model.is_training: False - } - - # run the feed_forward - if self.args.data_mode == 'test_v2': - out_argmax, acc = self.sess.run( - [self.test_model.out_argmax, self.test_model.accuracy], - feed_dict=feed_dict) - else: - out_argmax, acc, segmented_imgs = self.sess.run( - [self.test_model.out_argmax, self.test_model.accuracy, - # self.test_model.merged_summaries, self.test_model.segmented_summary], - self.test_model.segmented_summary], - feed_dict=feed_dict) - - if self.args.data_mode == 'test_v2': - yy = np.zeros((out_argmax.shape[0], y_batch_large.shape[1], y_batch_large.shape[2]), dtype=np.uint32) - out_argmax = np.asarray(out_argmax, dtype=np.uint8) - for y in range(out_argmax.shape[0]): - yy[y, ...] = misc.imresize(out_argmax[y, ...], y_batch_large.shape[1:], interp='nearest') - y_batch = y_batch_large - out_argmax = yy - - if pkl: - out_argmax[0] = self.linknet_postprocess(out_argmax[0]) - segmented_imgs = decode_labels(out_argmax, 20) - - # print('mean preds ', out_argmax.mean()) - # np.save(self.args.out_dir + 'npy/' + str(cur_iteration) + '.npy', out_argmax[0]) - if self.args.data_mode == 'test': - plt.imsave(self.args.out_dir + 'imgs/' + 'test_' + str(cur_iteration) + '.png', segmented_imgs[0]) - - # log loss and acc - acc_list += [acc] - - # log metrics - if self.args.random_cropping: - y1 = np.expand_dims(y_batch[0, :, :512], axis=0) - y2 = np.expand_dims(y_batch[0, :, 512:], axis=0) - y_batch = np.concatenate((y1, y2), axis=0) - self.metrics.update_metrics(out_argmax, y_batch, 0, 0) - else: - self.metrics.update_metrics(out_argmax[0], y_batch[0], 0, 0) - - # mean over batches - total_loss = 0 - total_acc = np.mean(acc_list) - mean_iou = self.metrics.compute_final_metrics(self.test_data_len) - - # print in console - tt.close() - print("Here the statistics") - print("Total_loss: " + str(total_loss)) - print("Total_acc: " + str(total_acc)[:6]) - print("mean_iou: " + str(mean_iou)) - - print("Plotting imgs") - for i in range(len(img_list)): - plt.imsave(self.args.imgs_dir + 'test_' + str(i) + '.png', img_list[i]) - - def test_eval(self, pkl=False): - print("Testing mode will begin NOW..") - - # load the best model checkpoint to test on it - if not pkl: - self.load_best_model() - - # init tqdm and get the epoch value - tt = tqdm(range(self.test_data_len)) - - # idx of image - idx = 0 - - # loop by the number of iterations - for cur_iteration in tt: - # load mini_batches - x_batch = self.test_data['X'][idx:idx + 1] - - # Feed this variables to the network - if self.args.random_cropping: - feed_dict = {self.test_model.x_pl_before: x_batch, - self.test_model.is_training: False, - } - else: - feed_dict = {self.test_model.x_pl: x_batch, - self.test_model.is_training: False - } - - # run the feed_forward - out_argmax, segmented_imgs = self.sess.run( - [self.test_model.out_argmax, - self.test_model.segmented_summary], - feed_dict=feed_dict) - - if pkl: - out_argmax[0] = self.linknet_postprocess(out_argmax[0]) - segmented_imgs = decode_labels(out_argmax, 20) - - # Colored results for visualization - colored_save_path = self.args.out_dir + 'imgs/' + str(self.names_mapper['Y'][idx]) - if not os.path.exists(os.path.dirname(colored_save_path)): - os.makedirs(os.path.dirname(colored_save_path)) - plt.imsave(colored_save_path, segmented_imgs[0]) - - # Results for official evaluation - save_path = self.args.out_dir + 'results/' + str(self.names_mapper['Y'][idx]) - if not os.path.exists(os.path.dirname(save_path)): - os.makedirs(os.path.dirname(save_path)) - output = postprocess(out_argmax[0]) - misc.imsave(save_path, misc.imresize(output, [1024, 2048], 'nearest')) - - idx += 1 - - # print in console - tt.close() - - def test_inference(self): - """ - Like the testing function but this one is for calculate the inference time - and measure the frame per second - """ - print("INFERENCE mode will begin NOW..") - - # load the best model checkpoint to test on it - self.load_best_model() - - # output_node: network/output/Argmax - # input_node: network/input/Placeholder - # for n in tf.get_default_graph().as_graph_def().node: - # if 'input' in n.name:#if 'Argmax' in n.name: - # import pdb; pdb.set_trace() - print("Saving graph...") - tf.train.write_graph(self.sess.graph_def, ".", 'graph.pb') - print("Graph saved successfully.\n\n") - exit(1) - - # init tqdm and get the epoch value - tt = tqdm(range(self.test_data_len)) - - # idx of image - idx = 0 - - # create the FPS Meter - fps_meter = FPSMeter() - - # loop by the number of iterations - for cur_iteration in tt: - # load mini_batches - x_batch = self.test_data['X'][idx:idx + 1] - y_batch = self.test_data['Y'][idx:idx + 1] - - # update idx of mini_batch - idx += 1 - - # Feed this variables to the network - if self.args.random_cropping: - feed_dict = {self.test_model.x_pl_before: x_batch, - self.test_model.y_pl_before: y_batch - # self.test_model.is_training: False, - } - else: - feed_dict = {self.test_model.x_pl: x_batch, - self.test_model.y_pl: y_batch - # self.test_model.is_training: False - } - - # calculate the time of one inference - start = time.time() - - # run the feed_forward - _ = self.sess.run( - [self.test_model.out_argmax], - feed_dict=feed_dict) - - # update the FPS meter - fps_meter.update(time.time() - start) - - fps_meter.print_statistics() - - def finalize(self): - self.reporter.finalize() - self.summary_writer.close() - self.save_model() - - def debug_layers(self): - """ - This function will be responsible for output all outputs of all layers and dump them in a pickle - - :return: - """ - print("Debugging mode will begin NOW..") - - layers = tf.get_collection('debug_layers') - print("ALL Layers in the collection that i wanna to run {} layer".format(len(layers))) - for layer in layers: - print(layer) - - # exit(0) - - # reset metrics - self.metrics.reset() - - print('mean image ', self.debug_x.mean()) - print('mean gt ', self.debug_y.mean()) - - self.debug_y = self.linknet_preprocess_gt(self.debug_y) - - feed_dict = {self.test_model.x_pl: self.debug_x, - self.test_model.y_pl: self.debug_y, - self.test_model.is_training: False - } - - # var = [v for v in tf.all_variables() if v.op.name == "network/decoder_block_4/deconv/deconv/weights"] - # conv_w= self.sess.run(var[0]) - # var = [v for v in tf.all_variables() if v.op.name == "network/decoder_block_4/deconv/deconv/biases"] - # bias= self.sess.run(var[0]) - - # run the feed_forward - out_layers = self.sess.run(layers, feed_dict=feed_dict) - for layer in out_layers: - print(layer.shape) - - # dict_out= torchfile.load('out_networks_layers/dict_out.t7') - ## init= tf.constant_initializer(conv_w) - ## conv_w1 = tf.get_variable('my_weights', [3,3,128,128], tf.float32, initializer=init, trainable=True) - # pp= tf.nn.relu(layers[39]) - # out_relu= self.sess.run(pp, feed_dict={self.test_model.x_pl: self.debug_x, - # self.test_model.y_pl: self.debug_y, - # self.test_model.is_training: False - # }) - ## pp = tf.nn.conv2d_transpose(layers[39], conv_w, (1,32,64,128), strides=(1,2,2,1), padding="SAME") - ## pp= tf.image.resize_images(layers[39], (32,64)) - ## pp = tf.nn.conv2d(pp, conv_w, strides=(1,1,1,1), padding="SAME") - ## bias1= tf.get_variable('my_bias', 128, tf.float32, tf.constant_initializer(bias)) - # pp = tf.nn.bias_add(pp, bias) - # #self.sess.run(conv_w1.initializer) - # #self.sess.run(bias1.initializer) - # out_deconv= self.sess.run(pp, feed_dict={self.test_model.x_pl: self.debug_x, - # self.test_model.y_pl: self.debug_y, - # self.test_model.is_training: False - # }) - # out_deconv_direct= self.sess.run(layers[40], feed_dict={self.test_model.x_pl: self.debug_x, - # self.test_model.y_pl: self.debug_y, - # self.test_model.is_training: False - # }) - # pdb.set_trace() - - # print(out_layers) - # exit(0) - - # dump them in a pickle - with open("out_networks_layers/out_linknet_layers.pkl", "wb") as f: - pickle.dump(out_layers, f, protocol=2) - - # run the feed_forward again to see argmax and segmented - out_argmax, segmented_imgs = self.sess.run( - [self.test_model.out_argmax, - self.test_model.segmented_summary], - feed_dict=feed_dict) - - print('mean preds ', out_argmax[0].mean()) - - plt.imsave(self.args.out_dir + 'imgs/' + 'debug.png', segmented_imgs[0]) - - self.metrics.update_metrics(out_argmax[0], self.debug_y, 0, 0) - - mean_iou = self.metrics.compute_final_metrics(1) - - print("mean_iou_of_debug: " + str(mean_iou)) diff --git a/train/train_psy.py b/train/train_psy.py new file mode 100644 index 0000000..7f250aa --- /dev/null +++ b/train/train_psy.py @@ -0,0 +1,311 @@ +""" +Trainer class to train Segmentation models +""" +import os +import h5py +import tensorflow as tf +import numpy as np +from train.basic_train import BasicTrain +from metrics.metrics import Metrics +from utils.reporter import Reporter +from utils.misc import timeit +from utils.average_meter import FPSMeter + +from tqdm import tqdm +from utils.augmentation import flip_randomly_left_right_image_with_annotation, \ + scale_randomly_image_with_annotation_with_fixed_size_output +import scipy.misc as misc + + +class TrainPsy(BasicTrain): + """ + Trainer class + """ + + + def __init__(self, args, sess, train_model, test_model): + """ + Call the constructor of the base class + init summaries + init loading data + :param args: + :param sess: + :param model: + :return: + """ + super().__init__(args, sess, train_model, test_model) + ################################################################################## + # Init summaries + + # Summary variables + self.scalar_summary_tags = ['mean_iou_on_val', + 'train-loss-per-epoch', 'val-loss-per-epoch', + 'train-acc-per-epoch', 'val-acc-per-epoch'] + self.images_summary_tags = [ + ('train_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]), + ('val_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3])] + self.summary_tags = [] + self.summary_placeholders = {} + self.summary_ops = {} + # init summaries and it's operators + self.init_summaries() + # Create summary writer + self.summary_writer = tf.summary.FileWriter(self.args.summary_dir, self.sess.graph) + + self.num_iterations_training_per_epoch = 1000#self.train_data_len // self.args.batch_size + + ################################################################################## + # Init metrics class + self.metrics = Metrics(self.args.num_classes) + # Init reporter class + if self.args.mode == 'train' or 'overfit': + self.reporter = Reporter(self.args.out_dir + 'report_train.json', self.args) + elif self.args.mode == 'test': + self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args) + ################################################################################## + + + def init_summaries(self): + """ + Create the summary part of the graph + :return: + """ + with tf.variable_scope('train-summary-per-epoch'): + for tag in self.scalar_summary_tags: + self.summary_tags += tag + self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag) + self.summary_ops[tag] = tf.summary.scalar(tag, self.summary_placeholders[tag]) + for tag, shape in self.images_summary_tags: + self.summary_tags += tag + self.summary_placeholders[tag] = tf.placeholder('float32', shape, name=tag) + self.summary_ops[tag] = tf.summary.image(tag, self.summary_placeholders[tag], max_outputs=10) + + def add_summary(self, step, summaries_dict=None, summaries_merged=None): + """ + Add the summaries to tensorboard + :param step: + :param summaries_dict: + :param summaries_merged: + :return: + """ + if summaries_dict is not None: + summary_list = self.sess.run([self.summary_ops[tag] for tag in summaries_dict.keys()], + {self.summary_placeholders[tag]: value for tag, value in + summaries_dict.items()}) + for summary in summary_list: + self.summary_writer.add_summary(summary, step) + if summaries_merged is not None: + self.summary_writer.add_summary(summaries_merged, step) + + def train(self): + print("Training mode will begin NOW ..") + # curr_lr= self.model.args.learning_rate + for cur_epoch in range(self.model.global_epoch_tensor.eval(self.sess) + 1, self.args.num_epochs + 1, 1): + + tt = tqdm(range(self.num_iterations_training_per_epoch), total=self.num_iterations_training_per_epoch, + desc="epoch-" + str(cur_epoch) + "-") + # init acc and loss lists + loss_list = [] + acc_list = [] + for _ in tt: + # get the cur_it for the summary + cur_it = self.model.global_step_tensor.eval(self.sess) + iterator = self.train_dataset.make_one_shot_iterator() + next_x, next_y = iterator.get_next() + self.sess.run(next_x, next_y) + # Feed this variables to the network + feed_dict = {self.model.x_pl: x_batch, + self.model.y_pl: y_batch, + self.model.is_training: True + #self.model.curr_learning_rate:curr_lr + # } + + # run the feed_forward + _, loss, acc, summaries_merged = self.sess.run( + [self.model.train_op, self.model.loss, self.model.accuracy, + self.model.merged_summaries], + feed_dict=feed_dict) + # log loss and acc + loss_list += [loss] + acc_list += [acc] + + # Update the Global step + self.model.global_step_assign_op.eval(session=self.sess, + feed_dict={self.model.global_step_input: cur_it + 1}) + + total_loss = np.mean(loss_list) + total_acc = np.mean(acc_list) + + # summarize + summaries_dict = dict() + summaries_dict['train-loss-per-epoch'] = total_loss + summaries_dict['train-acc-per-epoch'] = total_acc + + if self.args.data_mode != 'experiment_v2': + summaries_dict['train_prediction_sample'] = segmented_imgs + # self.add_summary(cur_it, summaries_dict=summaries_dict, summaries_merged=summaries_merged) + + # report + self.reporter.report_experiment_statistics('train-acc', 'epoch-' + str(cur_epoch), str(total_acc)) + self.reporter.report_experiment_statistics('train-loss', 'epoch-' + str(cur_epoch), str(total_loss)) + self.reporter.finalize() + + # Update the Cur Epoch tensor + # it is the last thing because if it is interrupted it repeat this + self.model.global_epoch_assign_op.eval(session=self.sess, + feed_dict={self.model.global_epoch_input: cur_epoch + 1}) + + # print in console + tt.close() + print("epoch-" + str(cur_epoch) + "-" + "loss:" + str(total_loss) + "-" + " acc:" + str(total_acc)[ + :6]) + + # Save the current checkpoint + if cur_epoch % self.args.save_every == 0: + self.save_model() + + # Test the model on validation + if cur_epoch % self.args.test_every == 0: + self.test_per_epoch(step=self.model.global_step_tensor.eval(self.sess), + epoch=self.model.global_epoch_tensor.eval(self.sess)) + + print("Training Finished") + + def test_per_epoch(self, step, epoch): + print("Validation at step:" + str(step) + " at epoch:" + str(epoch) + " ..") + + # init tqdm and get the epoch value + tt = tqdm(range(self.num_iterations_validation_per_epoch), total=self.num_iterations_validation_per_epoch, + desc="Val-epoch-" + str(epoch) + "-") + + # init acc and loss lists + loss_list = [] + acc_list = [] + inf_list = [] + + # reset metrics + self.metrics.reset() + + # get the maximum iou to compare with and save the best model + max_iou = self.model.best_iou_tensor.eval(self.sess) + + # loop by the number of iterations + for _ in tt: + # load minibatches + x_batch = self.val_data['X'][idx:idx + self.args.batch_size] + y_batch = self.val_data['Y'][idx:idx + self.args.batch_size] + if self.args.data_mode == 'experiment_v2': + y_batch_large = self.val_data['Y_large'][idx:idx + self.args.batch_size] + + # update idx of minibatch + idx += self.args.batch_size + + # Feed this variables to the network + feed_dict = {self.model.x_pl: x_batch, + self.model.y_pl: y_batch, + self.model.is_training: False + } + + start = time.time() + # run the feed_forward + + out_argmax, loss, acc, summaries_merged = self.sess.run( + [self.model.out_argmax, self.model.loss, self.model.accuracy, self.model.merged_summaries], + feed_dict=feed_dict) + + end = time.time() + # log loss and acc + loss_list += [loss] + acc_list += [acc] + inf_list += [end - start] + + # log metrics + self.metrics.update_metrics_batch(out_argmax, y_batch) + + + # mean over batches + total_acc = np.mean(acc_list) + mean_iou = self.metrics.compute_final_metrics(self.num_iterations_validation_per_epoch) + mean_iou_arr = self.metrics.iou + mean_inference = str(np.mean(inf_list)) + '-seconds' + # summarize + summaries_dict = dict() + summaries_dict['val-acc-per-epoch'] = total_acc + summaries_dict['mean_iou_on_val'] = mean_iou + + # report + self.reporter.report_experiment_statistics('validation-acc', 'epoch-' + str(epoch), str(total_acc)) + self.reporter.report_experiment_statistics('avg_inference_time_on_validation', 'epoch-' + str(epoch), + str(mean_inference)) + self.reporter.report_experiment_validation_iou('epoch-' + str(epoch), str(mean_iou), mean_iou_arr) + self.reporter.finalize() + + # print in console + tt.close() + print("Val-epoch-" + str(epoch) + "-" + + "acc:" + str(total_acc)[:6] + "-mean_iou:" + str(mean_iou)) + print("Last_max_iou: " + str(max_iou)) + if mean_iou > max_iou: + print("This validation got a new best iou. so we will save this one") + # save the best model + self.save_best_model() + # Set the new maximum + self.model.best_iou_assign_op.eval(session=self.sess, + feed_dict={self.model.best_iou_input: mean_iou}) + else: + print("hmm not the best validation epoch :/..") + + def test_eval(self, pkl=False): + print("Testing mode will begin NOW..") + + # load the best model checkpoint to test on it + if not pkl: + self.load_best_model() + + # init tqdm and get the epoch value + tt = tqdm(range(self.test_data_len)) + + + # loop by the number of iterations + for _ in tt: + + # Feed this variables to the network + if self.args.random_cropping: + feed_dict = {self.test_model.x_pl_before: x_batch, + self.test_model.is_training: False, + } + else: + feed_dict = {self.test_model.x_pl: x_batch, + self.test_model.is_training: False + } + + # run the feed_forward + out_argmax, segmented_imgs = self.sess.run( + [self.test_model.out_argmax, + self.test_model.segmented_summary], + feed_dict=feed_dict) + + # Colored results for visualization + #colored_save_path = self.args.out_dir + 'imgs/' + str(self.names_mapper['Y'][idx]) + #if not os.path.exists(os.path.dirname(colored_save_path)): + # os.makedirs(os.path.dirname(colored_save_path)) + #plt.imsave(colored_save_path, segmented_imgs[0]) + + # Results for official evaluation + #save_path = self.args.out_dir + 'results/' + str(self.names_mapper['Y'][idx]) + #if not os.path.exists(os.path.dirname(save_path)): + # os.makedirs(os.path.dirname(save_path)) + #output = postprocess(out_argmax[0]) + #misc.imsave(save_path, misc.imresize(output, [1024, 2048], 'nearest')) + + + # print in console + tt.close() + + + def finalize(self): + self.reporter.finalize() + self.summary_writer.close() + self.save_model() + + \ No newline at end of file From 428b3815b1aecba4af6404f1b95a8d73674c70c2 Mon Sep 17 00:00:00 2001 From: limorhe Date: Wed, 25 Jul 2018 16:47:49 -0500 Subject: [PATCH 4/6] connect input to train --- .gitignore | 1 + data_load.py | 103 ++++---- models/basic/basic_model.py | 7 +- models/encoders/depthnet.py | 26 +-- train/basic_train.py | 4 +- train/new_train.py | 454 ------------------------------------ train/train_psy.py | 17 +- 7 files changed, 70 insertions(+), 542 deletions(-) delete mode 100644 train/new_train.py diff --git a/.gitignore b/.gitignore index 8847311..b7640d7 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ __pycache__/ *.py[cod] *$py.class +*.npy # C extensions *.so diff --git a/data_load.py b/data_load.py index c3546b0..f6e55fc 100644 --- a/data_load.py +++ b/data_load.py @@ -1,8 +1,15 @@ # coding: utf-8 -# In[21]: - +''' +#usage example +train_seq_folder = 'c:\\TFSegmentation\\data\\synthetic_seq\\train_seq' +dataset = load_datase(train_seq_folder, 5) +iterator = dataset.make_one_shot_iterator() +next_example = iterator.get_next() +sess = tf.Session() +sess.run(next_example) +''' import os import h5py @@ -17,51 +24,9 @@ MIN_DEPTH = 300 MAX_DEPTH = 1500 - -# In[22]: - - -sess = tf.Session() - - -# In[23]: - - -train_seq_folder = 'c:\\seq\\train_seq\\' -print(train_seq_folder) - - -# In[24]: - - -train_seq_files = [] -for (dirpath, dirnames, filenames) in os.walk(train_seq_folder): - train_seq_files.extend(os.path.join(dirpath, x) for x in filenames) -print(train_seq_files) - - -# In[25]: - - -filenames = [] -for train_seq_name in train_seq_files: - train_seq = h5py.File(train_seq_name, "r") - num_cameras = train_seq['INFO']['NUM_CAMERAS'].value[0] - num_frames = train_seq['INFO']['COUNT'].value[0] - train_seq.close() - for frame_idx in range(num_frames): - for cam_idx in range(num_cameras): - filename_str = train_seq_name + '__' + 'FRAME{:04d}/RAW/CAM{:d}/'.format(frame_idx, cam_idx) - filenames.append(filename_str) - - - -# In[26]: - - def _read_hdf5_func(filename, label): filename_decoded = filename.decode("utf-8") - print(filename_decoded) + #print(filename_decoded) h5_file_name, group_name = filename_decoded.split('__') h5_file = h5py.File(h5_file_name, "r") #print(group_name) @@ -76,6 +41,7 @@ def _read_hdf5_func(filename, label): out=depth_image_scaled, casting='unsafe') depth_image_scaled = depth_image_scaled.astype(np.uint8) + depth_image_scaled = np.expand_dims(depth_image_scaled,2) # Read labels label_image_path = group_name + 'LABEL' @@ -83,24 +49,33 @@ def _read_hdf5_func(filename, label): h5_file.close() return depth_image_scaled, label_image -labels = [0]*len(filenames) -dataset = tf.data.Dataset.from_tensor_slices((filenames, labels)) -dataset = dataset.shuffle(buffer_size=10000) -dataset = dataset.map( - lambda filename, label: tuple(tf.py_func( - _read_hdf5_func, [filename, labels], [tf.uint8, tf.uint8])), num_parallel_calls=1) - - -dataset = dataset.batch(1) -dataset = dataset.repeat() -dataset = dataset.prefetch(1) - - # In[27]: - - -iterator = dataset.make_one_shot_iterator() -next_example = iterator.get_next() - -sess.run(next_example) +def load_datase(train_seq_folder, batch_size): + train_seq_files = [] + for (dirpath, dirnames, filenames) in os.walk(train_seq_folder): + train_seq_files.extend(os.path.join(dirpath, x) for x in filenames) + filenames = [] + for train_seq_name in train_seq_files: + train_seq = h5py.File(train_seq_name, "r") + num_cameras = train_seq['INFO']['NUM_CAMERAS'].value[0] + num_frames = train_seq['INFO']['COUNT'].value[0] + train_seq.close() + for frame_idx in range(0, num_frames, 5): + for cam_idx in range(num_cameras): + filename_str = train_seq_name + '__' + 'FRAME{:04d}/RAW/CAM{:d}/'.format(frame_idx, cam_idx) + filenames.append(filename_str) + + labels = [0]*len(filenames) + dataset = tf.data.Dataset.from_tensor_slices((filenames, labels)) + dataset = dataset.shuffle(buffer_size=10000) + dataset = dataset.map( + lambda filename, label: tuple(tf.py_func( + _read_hdf5_func, [filename, labels], [tf.uint8, tf.uint8])), num_parallel_calls=1) + + + dataset = dataset.batch(batch_size) + dataset = dataset.repeat() + dataset = dataset.prefetch(1) + + return dataset diff --git a/models/basic/basic_model.py b/models/basic/basic_model.py index d16a162..2eac4a0 100644 --- a/models/basic/basic_model.py +++ b/models/basic/basic_model.py @@ -36,7 +36,8 @@ def __init__(self, args): self.params.img_height = self.args.img_height self.params.num_channels = self.args.num_channels self.params.num_classes = self.args.num_classes - #self.params.class_weights = np.load(self.args.data_dir + 'weights.npy') + self.params.class_weights = np.ones(self.params.num_classes) * 1.0 / self.params.num_classes + #np.load(self.args.data_dir + 'weights.npy') self.params.weighted_loss = self.args.weighted_loss # Input self.x_pl = None @@ -106,7 +107,9 @@ def build(self): def init_input(self): with tf.name_scope('input'): self.x_pl = tf.placeholder(tf.float32, - [self.args.batch_size, self.params.img_height, self.params.img_width, self.params.num_channels]) + [self.args.batch_size, + self.params.img_height, + self.params.img_width, self.params.num_channels]) self.y_pl = tf.placeholder(tf.int32, [self.args.batch_size, self.params.img_height, self.params.img_width]) # self.curr_learning_rate= tf.placeholder(tf.float32) diff --git a/models/encoders/depthnet.py b/models/encoders/depthnet.py index 4b2c98c..394c07c 100644 --- a/models/encoders/depthnet.py +++ b/models/encoders/depthnet.py @@ -71,82 +71,82 @@ def encoder_build(self): is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv1_1) - self.conv2_1 = depthwise_separable_conv2d('conv_ds_2', self.conv1_1, width_multiplier=self.width_multiplier, + self.conv2_1 = depthwise_separable_conv2d('conv_2_1', self.conv1_1, width_multiplier=self.width_multiplier, num_filters=64, kernel_size=(3, 3), padding='SAME', stride=(1, 1), batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd, activation=tf.nn.relu6) self._debug(self.conv2_1) - self.conv2_2 = depthwise_separable_conv2d('conv_ds_3', self.conv2_1, width_multiplier=self.width_multiplier, + self.conv2_2 = depthwise_separable_conv2d('conv_2_2', self.conv2_1, width_multiplier=self.width_multiplier, num_filters=128, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv2_2) - self.conv3_1 = depthwise_separable_conv2d('conv_ds_4', self.conv2_2, width_multiplier=self.width_multiplier, + self.conv3_1 = depthwise_separable_conv2d('conv_3_1', self.conv2_2, width_multiplier=self.width_multiplier, num_filters=128, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv3_1) - self.conv3_2 = depthwise_separable_conv2d('conv_ds_5', self.conv3_1, width_multiplier=self.width_multiplier, + self.conv3_2 = depthwise_separable_conv2d('conv_3_2', self.conv3_1, width_multiplier=self.width_multiplier, num_filters=256, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv3_2) - self.conv4_1 = depthwise_separable_conv2d('conv_ds_6', self.conv3_2, width_multiplier=self.width_multiplier, + self.conv4_1 = depthwise_separable_conv2d('conv_4_1', self.conv3_2, width_multiplier=self.width_multiplier, num_filters=256, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv4_1) - self.conv4_2 = depthwise_separable_conv2d('conv_ds_7', self.conv4_1, width_multiplier=self.width_multiplier, + self.conv4_2 = depthwise_separable_conv2d('conv_4_2', self.conv4_1, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv4_2) - self.conv5_1 = depthwise_separable_conv2d('conv_ds_8', self.conv4_2, width_multiplier=self.width_multiplier, + self.conv5_1 = depthwise_separable_conv2d('conv_5_1', self.conv4_2, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv5_1) - self.conv5_2 = depthwise_separable_conv2d('conv_ds_9', self.conv5_1, width_multiplier=self.width_multiplier, + self.conv5_2 = depthwise_separable_conv2d('conv_5_2', self.conv5_1, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv5_2) - self.conv5_3 = depthwise_separable_conv2d('conv_ds_10', self.conv5_2, + self.conv5_3 = depthwise_separable_conv2d('conv_5_3', self.conv5_2, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv5_3) - self.conv5_4 = depthwise_separable_conv2d('conv_ds_11', self.conv5_3, + self.conv5_4 = depthwise_separable_conv2d('conv_5_4', self.conv5_3, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv5_4) - self.conv5_5 = depthwise_separable_conv2d('conv_ds_12', self.conv5_4, + self.conv5_5 = depthwise_separable_conv2d('conv_5_5', self.conv5_4, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv5_5) - self.conv5_6 = depthwise_separable_conv2d('conv_ds_13', self.conv5_5, + self.conv5_6 = depthwise_separable_conv2d('conv_5_6', self.conv5_5, width_multiplier=self.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv5_6) - self.conv6_1 = depthwise_separable_conv2d('conv_ds_14', self.conv5_6, + self.conv6_1 = depthwise_separable_conv2d('conv_6_1', self.conv5_6, width_multiplier=self.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, diff --git a/train/basic_train.py b/train/basic_train.py index 7b300b7..4111d0b 100644 --- a/train/basic_train.py +++ b/train/basic_train.py @@ -91,14 +91,14 @@ def load_model(self, model): Load the latest checkpoint :return: """ - + ''' try: # This is for loading the pretrained weights if they can't be loaded during initialization. model.encoder.load_pretrained_weights(self.sess) print("Pretrained weights of the encoder is loaded") except AttributeError: pass - + ''' print("Searching for a checkpoint") latest_checkpoint = tf.train.latest_checkpoint(self.args.checkpoint_dir) if latest_checkpoint: diff --git a/train/new_train.py b/train/new_train.py deleted file mode 100644 index 9fc0f66..0000000 --- a/train/new_train.py +++ /dev/null @@ -1,454 +0,0 @@ -""" -New trainer faster than ever -""" - -from metrics.metrics import Metrics -from utils.reporter import Reporter -from utils.misc import timeit - -from tqdm import tqdm -import numpy as np -import tensorflow as tf -import matplotlib -import time - -matplotlib.use('Agg') -import matplotlib.pyplot as plt - - -class NewTrain(object): - def __init__(self, args, sess, model): - print("\nTraining is initializing itself\n") - - self.args = args - self.sess = sess - self.model = model - - # shortcut for model params - self.params = self.model.params - - # To initialize all variables - self.init = None - self.init_model() - - # Create a saver object - self.saver = tf.train.Saver(max_to_keep=self.args.max_to_keep, - keep_checkpoint_every_n_hours=10, - save_relative_paths=True) - - self.saver_best = tf.train.Saver(max_to_keep=1, - save_relative_paths=True) - - # Load from latest checkpoint if found - self.load_model() - - ################################################################################## - # Init summaries - - # Summary variables - self.scalar_summary_tags = ['mean_iou_on_val', - 'train-loss-per-epoch', 'val-loss-per-epoch', - 'train-acc-per-epoch', 'val-acc-per-epoch'] - self.images_summary_tags = [ - ('train_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]), - ('val_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3])] - - self.summary_tags = [] - self.summary_placeholders = {} - self.summary_ops = {} - # init summaries and it's operators - self.init_summaries() - # Create summary writer - self.summary_writer = tf.summary.FileWriter(self.args.summary_dir, self.sess.graph) - ################################################################################## - if self.args.mode == 'train': - self.num_iterations_training_per_epoch = self.args.tfrecord_train_len // self.args.batch_size - self.num_iterations_validation_per_epoch = self.args.tfrecord_val_len // self.args.batch_size - else: - self.test_data = None - self.test_data_len = None - self.num_iterations_testing_per_epoch = None - self.load_test_data() - ################################################################################## - # Init metrics class - self.metrics = Metrics(self.args.num_classes) - # Init reporter class - if self.args.mode == 'train' or 'overfit': - self.reporter = Reporter(self.args.out_dir + 'report_train.json', self.args) - elif self.args.mode == 'test': - self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args) - ################################################################################## - - @timeit - def load_test_data(self): - print("Loading Testing data..") - self.test_data = {'X': np.load(self.args.data_dir + "X_val.npy"), - 'Y': np.load(self.args.data_dir + "Y_val.npy")} - self.test_data_len = self.test_data['X'].shape[0] - self.test_data['X'].shape[0] % self.args.batch_size - print("Test-shape-x -- " + str(self.test_data['X'].shape)) - print("Test-shape-y -- " + str(self.test_data['Y'].shape)) - self.num_iterations_testing_per_epoch = (self.test_data_len + self.args.batch_size - 1) // self.args.batch_size - print("Test data is loaded") - - @timeit - def init_model(self): - print("Initializing the variables of the model") - self.init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) - self.sess.run(self.init) - print("Initialization finished") - - def save_model(self): - """ - Save Model Checkpoint - :return: - """ - print("saving a checkpoint") - self.saver.save(self.sess, self.args.checkpoint_dir, self.model.global_step_tensor) - print("Saved a checkpoint") - - def save_best_model(self): - """ - Save BEST Model Checkpoint - :return: - """ - print("saving a checkpoint for the best model") - self.saver_best.save(self.sess, self.args.checkpoint_best_dir, self.model.global_step_tensor) - print("Saved a checkpoint for the best model") - - def load_best_model(self): - """ - Load the best model checkpoint - :return: - """ - print("loading a checkpoint for BEST ONE") - latest_checkpoint = tf.train.latest_checkpoint(self.args.checkpoint_best_dir) - if latest_checkpoint: - print("Loading model checkpoint {} ...\n".format(latest_checkpoint)) - self.saver_best.restore(self.sess, latest_checkpoint) - else: - print("ERROR NO best checkpoint found") - exit(-1) - print("BEST MODEL LOADED..") - - def init_summaries(self): - """ - Create the summary part of the graph - :return: - """ - with tf.variable_scope('train-summary-per-epoch'): - for tag in self.scalar_summary_tags: - self.summary_tags += tag - self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag) - self.summary_ops[tag] = tf.summary.scalar(tag, self.summary_placeholders[tag]) - for tag, shape in self.images_summary_tags: - self.summary_tags += tag - self.summary_placeholders[tag] = tf.placeholder('float32', shape, name=tag) - self.summary_ops[tag] = tf.summary.image(tag, self.summary_placeholders[tag], max_outputs=10) - - def add_summary(self, step, summaries_dict=None, summaries_merged=None): - """ - Add the summaries to tensorboard - :param step: - :param summaries_dict: - :param summaries_merged: - :return: - """ - if summaries_dict is not None: - summary_list = self.sess.run([self.summary_ops[tag] for tag in summaries_dict.keys()], - {self.summary_placeholders[tag]: value for tag, value in - summaries_dict.items()}) - for summary in summary_list: - self.summary_writer.add_summary(summary, step) - if summaries_merged is not None: - self.summary_writer.add_summary(summaries_merged, step) - - @timeit - def load_model(self): - """ - Load the latest checkpoint - :return: - """ - try: - # This is for loading the pretrained weights if they can't be loaded during initialization. - self.model.encoder.load_pretrained_weights(self.sess) - except AttributeError: - pass - - print("Searching for a checkpoint") - latest_checkpoint = tf.train.latest_checkpoint(self.args.checkpoint_dir) - if latest_checkpoint: - print("Loading model checkpoint {} ...\n".format(latest_checkpoint)) - self.saver.restore(self.sess, latest_checkpoint) - print("Model loaded from the latest checkpoint\n") - else: - print("\n.. No ckpt, SO First time to train :D ..\n") - - def train(self): - print("Training mode will begin NOW ..") - tf.train.start_queue_runners(sess=self.sess) - curr_lr = self.model.args.learning_rate - for cur_epoch in range(self.model.global_epoch_tensor.eval(self.sess) + 1, self.args.num_epochs + 1, 1): - - # init tqdm and get the epoch value - tt = tqdm(range(self.num_iterations_training_per_epoch), total=self.num_iterations_training_per_epoch, - desc="epoch-" + str(cur_epoch) + "-") - - # init acc and loss lists - loss_list = [] - acc_list = [] - - # loop by the number of iterations - for cur_iteration in tt: - - # get the cur_it for the summary - cur_it = self.model.global_step_tensor.eval(self.sess) - - # Feed this variables to the network - feed_dict = { - self.model.handle: self.model.training_handle, - self.model.is_training: True, - self.model.curr_learning_rate: curr_lr - } - - # Run the feed forward but the last iteration finalize what you want to do - if cur_iteration < self.num_iterations_training_per_epoch - 1: - - # run the feed_forward - _, loss, acc, summaries_merged = self.sess.run( - [self.model.train_op, self.model.loss, self.model.accuracy, self.model.merged_summaries], - feed_dict=feed_dict) - # log loss and acc - loss_list += [loss] - acc_list += [acc] - # summarize - self.add_summary(cur_it, summaries_merged=summaries_merged) - - else: - # run the feed_forward - _, loss, acc, summaries_merged, segmented_imgs = self.sess.run( - [self.model.train_op, self.model.loss, self.model.accuracy, - self.model.merged_summaries, self.model.segmented_summary], - feed_dict=feed_dict) - # log loss and acc - loss_list += [loss] - acc_list += [acc] - total_loss = np.mean(loss_list) - total_acc = np.mean(acc_list) - # summarize - summaries_dict = dict() - summaries_dict['train-loss-per-epoch'] = total_loss - summaries_dict['train-acc-per-epoch'] = total_acc - summaries_dict['train_prediction_sample'] = segmented_imgs - self.add_summary(cur_it, summaries_dict=summaries_dict, summaries_merged=summaries_merged) - - # report - self.reporter.report_experiment_statistics('train-acc', 'epoch-' + str(cur_epoch), str(total_acc)) - self.reporter.report_experiment_statistics('train-loss', 'epoch-' + str(cur_epoch), str(total_loss)) - self.reporter.finalize() - - # Update the Global step - self.model.global_step_assign_op.eval(session=self.sess, - feed_dict={self.model.global_step_input: cur_it + 1}) - - # Update the Cur Epoch tensor - # it is the last thing because if it is interrupted it repeat this - self.model.global_epoch_assign_op.eval(session=self.sess, - feed_dict={self.model.global_epoch_input: cur_epoch + 1}) - - # print in console - tt.close() - print("epoch-" + str(cur_epoch) + "-" + "loss:" + str(total_loss) + "-" + " acc:" + str(total_acc)[ - :6]) - - # Break the loop to finalize this epoch - break - - # Update the Global step - self.model.global_step_assign_op.eval(session=self.sess, - feed_dict={self.model.global_step_input: cur_it + 1}) - - # Save the current checkpoint - if cur_epoch % self.args.save_every == 0: - self.save_model() - - # Test the model on validation - if cur_epoch % self.args.test_every == 0: - self.test_per_epoch(step=self.model.global_step_tensor.eval(self.sess), - epoch=self.model.global_epoch_tensor.eval(self.sess)) - - if cur_epoch % self.args.learning_decay_every == 0: - curr_lr = curr_lr * self.args.learning_decay - print('Current learning rate is ', curr_lr) - - print("Training Finished") - - def test_per_epoch(self, step, epoch): - print("Validation at step:" + str(step) + " at epoch:" + str(epoch) + " ..") - - # init tqdm and get the epoch value - tt = tqdm(range(self.num_iterations_validation_per_epoch), total=self.num_iterations_validation_per_epoch, - desc="Val-epoch-" + str(epoch) + "-") - - # init acc and loss lists - loss_list = [] - acc_list = [] - inf_list = [] - - # reset metrics - self.metrics.reset() - - # get the maximum iou to compare with and save the best model - max_iou = self.model.best_iou_tensor.eval(self.sess) - - # init dataset to validation - self.sess.run(self.model.validation_iterator.initializer) - - # loop by the number of iterations - for cur_iteration in tt: - # Feed this variables to the network - feed_dict = { - self.model.handle: self.model.validation_handle, - self.model.is_training: False - } - - # Run the feed forward but the last iteration finalize what you want to do - if cur_iteration < self.num_iterations_validation_per_epoch - 1: - - start = time.time() - # run the feed_forward - next_img, out_argmax, loss, acc = self.sess.run( - [self.model.next_img, self.model.out_argmax, self.model.loss, self.model.accuracy], - feed_dict=feed_dict) - end = time.time() - # log loss and acc - loss_list += [loss] - acc_list += [acc] - inf_list += [end - start] - # log metrics - self.metrics.update_metrics_batch(out_argmax, next_img[1]) - - else: - start = time.time() - # run the feed_forward - next_img, out_argmax, loss, acc, segmented_imgs = self.sess.run( - [self.model.next_img, self.model.out_argmax, self.model.loss, self.model.accuracy, - self.model.segmented_summary], - feed_dict=feed_dict) - end = time.time() - # log loss and acc - loss_list += [loss] - acc_list += [acc] - inf_list += [end - start] - # log metrics - self.metrics.update_metrics_batch(out_argmax, next_img[1]) - # mean over batches - total_loss = np.mean(loss_list) - total_acc = np.mean(acc_list) - mean_iou = self.metrics.compute_final_metrics(self.num_iterations_validation_per_epoch) - mean_iou_arr = self.metrics.iou - mean_inference = str(np.mean(inf_list)) + '-seconds' - # summarize - summaries_dict = dict() - summaries_dict['val-loss-per-epoch'] = total_loss - summaries_dict['val-acc-per-epoch'] = total_acc - summaries_dict['mean_iou_on_val'] = mean_iou - summaries_dict['val_prediction_sample'] = segmented_imgs - self.add_summary(step, summaries_dict=summaries_dict) - self.summary_writer.flush() - - # report - self.reporter.report_experiment_statistics('validation-acc', 'epoch-' + str(epoch), str(total_acc)) - self.reporter.report_experiment_statistics('validation-loss', 'epoch-' + str(epoch), str(total_loss)) - self.reporter.report_experiment_statistics('avg_inference_time_on_validation', 'epoch-' + str(epoch), - str(mean_inference)) - self.reporter.report_experiment_validation_iou('epoch-' + str(epoch), str(mean_iou), mean_iou_arr) - self.reporter.finalize() - - # print in console - tt.close() - print("Val-epoch-" + str(epoch) + "-" + "loss:" + str(total_loss) + "-" + - "acc:" + str(total_acc)[:6] + "-mean_iou:" + str(mean_iou)) - print("Last_max_iou: " + str(max_iou)) - if mean_iou > max_iou: - print("This validation got a new best iou. so we will save this one") - # save the best model - self.save_best_model() - # Set the new maximum - self.model.best_iou_assign_op.eval(session=self.sess, - feed_dict={self.model.best_iou_input: mean_iou}) - else: - print("hmm not the best validation epoch :/..") - - # Break the loop to finalize this epoch - break - - def test(self): - print("Testing mode will begin NOW..") - - # load the best model checkpoint to test on it - self.load_best_model() - - # init tqdm and get the epoch value - tt = tqdm(range(self.test_data_len)) - naming = np.load(self.args.data_dir + 'names_train.npy') - - # init acc and loss lists - loss_list = [] - acc_list = [] - img_list = [] - - # idx of image - idx = 0 - - # reset metrics - self.metrics.reset() - - # loop by the number of iterations - for cur_iteration in tt: - # load mini_batches - x_batch = self.test_data['X'][idx:idx + 1] - y_batch = self.test_data['Y'][idx:idx + 1] - - # update idx of mini_batch - idx += 1 - - # Feed this variables to the network - feed_dict = {self.model.x_pl: x_batch, - self.model.y_pl: y_batch, - self.model.is_training: False - } - - # run the feed_forward - out_argmax, loss, acc, summaries_merged, segmented_imgs = self.sess.run( - [self.model.out_argmax, self.model.loss, self.model.accuracy, - self.model.merged_summaries, self.model.segmented_summary], - feed_dict=feed_dict) - - np.save(self.args.out_dir + 'npy/' + str(cur_iteration) + '.npy', out_argmax[0]) - plt.imsave(self.args.out_dir + 'imgs/' + 'test_' + str(cur_iteration) + '.png', segmented_imgs[0]) - - # log loss and acc - loss_list += [loss] - acc_list += [acc] - - # log metrics - self.metrics.update_metrics(out_argmax[0], y_batch[0], 0, 0) - - # mean over batches - total_loss = np.mean(loss_list) - total_acc = np.mean(acc_list) - mean_iou = self.metrics.compute_final_metrics(self.test_data_len) - - # print in console - tt.close() - print("Here the statistics") - print("Total_loss: " + str(total_loss)) - print("Total_acc: " + str(total_acc)[:6]) - print("mean_iou: " + str(mean_iou)) - - print("Plotting imgs") - - def finalize(self): - self.reporter.finalize() - self.summary_writer.close() - self.save_model() diff --git a/train/train_psy.py b/train/train_psy.py index 7f250aa..8b33fa3 100644 --- a/train/train_psy.py +++ b/train/train_psy.py @@ -5,6 +5,7 @@ import h5py import tensorflow as tf import numpy as np +import data_load as dl from train.basic_train import BasicTrain from metrics.metrics import Metrics from utils.reporter import Reporter @@ -42,8 +43,8 @@ def __init__(self, args, sess, train_model, test_model): 'train-loss-per-epoch', 'val-loss-per-epoch', 'train-acc-per-epoch', 'val-acc-per-epoch'] self.images_summary_tags = [ - ('train_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3]), - ('val_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, 3])] + ('train_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, self.args.num_channels]), + ('val_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, self.args.num_channels])] self.summary_tags = [] self.summary_placeholders = {} self.summary_ops = {} @@ -63,7 +64,9 @@ def __init__(self, args, sess, train_model, test_model): elif self.args.mode == 'test': self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args) ################################################################################## - + train_seq_folder = self.args.data_dir + 'train_seq' + self.train_dataset = dl.load_datase(train_seq_folder, self.args.batch_size) + self.dataset_iterator = self.train_dataset.make_one_shot_iterator() def init_summaries(self): """ @@ -110,15 +113,15 @@ def train(self): for _ in tt: # get the cur_it for the summary cur_it = self.model.global_step_tensor.eval(self.sess) - iterator = self.train_dataset.make_one_shot_iterator() - next_x, next_y = iterator.get_next() - self.sess.run(next_x, next_y) + next_batch = self.dataset_iterator.get_next() + x_batch, y_batch = self.sess.run(next_batch) + # Feed this variables to the network feed_dict = {self.model.x_pl: x_batch, self.model.y_pl: y_batch, self.model.is_training: True #self.model.curr_learning_rate:curr_lr - # } + } # run the feed_forward _, loss, acc, summaries_merged = self.sess.run( From f45c7ef3a91a16c948f8acb43da24470fd762bcf Mon Sep 17 00:00:00 2001 From: limorhe Date: Tue, 25 Sep 2018 17:24:04 -0500 Subject: [PATCH 5/6] work with smaller images in data set (avoid oom) set sample output for visualization run validation on the same input as last training- give different results --- .../unet_depthnet_train.yaml | 13 +- data_load.py | 30 +++-- models/basic/basic_model.py | 17 ++- models/encoders/mobilenet.py | 32 ++--- models/encoders/resnet_18.py | 4 +- train/train_psy.py | 115 +++++++++++------- utils/img_utils.py | 106 ++++++++-------- utils/seg_dataloader.py | 3 +- 8 files changed, 183 insertions(+), 137 deletions(-) diff --git a/config/experiments_config/unet_depthnet_train.yaml b/config/experiments_config/unet_depthnet_train.yaml index e3aa6e9..21df4da 100644 --- a/config/experiments_config/unet_depthnet_train.yaml +++ b/config/experiments_config/unet_depthnet_train.yaml @@ -1,13 +1,14 @@ # Directories arguments -data_dir: "seq" +data_dir: "synthetic_seq" exp_dir: "unet_depthnet" out_dir: "unet_depthnet" # Data arguments -img_height: 720 -img_width: 1280 +# image dimensions are reduced to enable run on my pc +img_height: 360 +img_width: 640 num_channels: 1 -num_classes: 26 +num_classes: 28 # Train arguments num_epochs: 200 @@ -15,9 +16,9 @@ batch_size: 1 shuffle: True data_mode: "experiment" save_every: 5 -test_every: 5 +test_every: 1 #5 max_to_keep: 2 -weighted_loss: False +weighted_loss: False #consider this random_cropping: False freeze_encoder: False diff --git a/data_load.py b/data_load.py index f6e55fc..322d55b 100644 --- a/data_load.py +++ b/data_load.py @@ -13,8 +13,10 @@ import os import h5py +import warnings import tensorflow as tf import numpy as np +import skimage.transform as st from train.basic_train import BasicTrain from metrics.metrics import Metrics from utils.reporter import Reporter @@ -24,7 +26,9 @@ MIN_DEPTH = 300 MAX_DEPTH = 1500 -def _read_hdf5_func(filename, label): +def _read_hdf5_func(filename, label, h, w): + out_size = (h, w); + #print('file: ' + str(filename)) filename_decoded = filename.decode("utf-8") #print(filename_decoded) h5_file_name, group_name = filename_decoded.split('__') @@ -39,18 +43,26 @@ def _read_hdf5_func(filename, label): depth_image_scaled -= MIN_DEPTH np.floor_divide(depth_image_scaled, (MAX_DEPTH - MIN_DEPTH + 1) / 256, out=depth_image_scaled, casting='unsafe') - - depth_image_scaled = depth_image_scaled.astype(np.uint8) - depth_image_scaled = np.expand_dims(depth_image_scaled,2) - # Read labels label_image_path = group_name + 'LABEL' label_image = h5_file[label_image_path].value h5_file.close() - return depth_image_scaled, label_image + #resize output + depth_resize = depth_image_scaled + if(depth_image_scaled.shape != out_size): + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + depth_resize = st.resize(depth_resize, out_size, + order = 0, preserve_range = True) + label_image = st.resize(label_image, out_size, + order = 0, preserve_range = True) + label_image = label_image.astype('uint8') + depth_image = depth_resize.astype(np.uint8) + depth_image = np.expand_dims(depth_image,2) + return depth_image, label_image + -# In[27]: -def load_datase(train_seq_folder, batch_size): +def load_dataset(train_seq_folder, batch_size, h, w): train_seq_files = [] for (dirpath, dirnames, filenames) in os.walk(train_seq_folder): train_seq_files.extend(os.path.join(dirpath, x) for x in filenames) @@ -70,7 +82,7 @@ def load_datase(train_seq_folder, batch_size): dataset = dataset.shuffle(buffer_size=10000) dataset = dataset.map( lambda filename, label: tuple(tf.py_func( - _read_hdf5_func, [filename, labels], [tf.uint8, tf.uint8])), num_parallel_calls=1) + _read_hdf5_func, [filename, labels, h, w], [tf.uint8, tf.uint8])), num_parallel_calls=1) dataset = dataset.batch(batch_size) diff --git a/models/basic/basic_model.py b/models/basic/basic_model.py index 2eac4a0..9ce440f 100644 --- a/models/basic/basic_model.py +++ b/models/basic/basic_model.py @@ -4,7 +4,7 @@ You can override any function you want """ -from utils.img_utils import decode_labels +import utils.img_utils as imu import numpy as np import tensorflow as tf @@ -125,6 +125,7 @@ def init_output(self): with tf.name_scope('output'): self.out_softmax = tf.nn.softmax(self.logits) self.out_argmax = tf.argmax(self.out_softmax, axis=3, output_type=tf.int32) + self.out_conf = tf.reduce_max(self.out_softmax, axis = 3) def get_class_weighting(self): self.wghts = tf.one_hot(self.y_pl, dtype='float32', depth=self.params.num_classes) * \ @@ -171,11 +172,15 @@ def init_summaries(self): self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.y_pl, self.out_argmax), tf.float32)) with tf.name_scope('segmented_output'): - input_summary = tf.cast(self.x_pl, tf.uint8) - # labels_summary = tf.py_func(decode_labels, [self.y_pl, self.params.num_classes], tf.uint8) - preds_summary = tf.py_func(decode_labels, [self.out_argmax, self.params.num_classes], tf.uint8) - self.segmented_summary = tf.concat(axis=2, values=[input_summary, - preds_summary]) # Concatenate row-wise + #input_summary = tf.cast(self.x_pl, tf.uint8) + input_summary = tf.py_func(imu.decode_input, [self.x_pl], tf.uint8) + labels_summary = tf.py_func(imu.decode_labels, [self.y_pl, self.params.num_classes], tf.uint8) + preds_summary = tf.py_func(imu.decode_labels, [self.out_argmax, self.params.num_classes], tf.uint8) + conf_summary = tf.py_func(imu.decode_conf, [self.out_argmax], tf.uint8) + img_1 = tf.concat(axis=1, values=[input_summary,labels_summary]) + img_2 = tf.concat(axis=1, values=[preds_summary,conf_summary]) + self.segmented_summary = tf.concat(axis=2, values = [img_1, img_2]) + self.test_segmented_summary = tf.concat(axis = 1, values = [preds_summary, conf_summary]) # Every step evaluate these summaries with tf.name_scope('train-summary'): diff --git a/models/encoders/mobilenet.py b/models/encoders/mobilenet.py index e35f4d6..c2fb290 100644 --- a/models/encoders/mobilenet.py +++ b/models/encoders/mobilenet.py @@ -10,7 +10,8 @@ class MobileNet: """ # MEAN = [103.939, 116.779, 123.68] - MEAN = [73.29132098, 83.04442645, 72.5238962] +# MEAN = [73.29132098, 83.04442645, 72.5238962] + MEAN = 128.0 #TODO def __init__(self, x_input, num_classes, pretrained_path, @@ -65,94 +66,97 @@ def encoder_build(self): print("Building the MobileNet..") with tf.variable_scope('mobilenet_encoder'): with tf.name_scope('Pre_Processing'): + preprocessed_input = (self.x_input - MobileNet.MEAN) / 255.0 + ''' red, green, blue = tf.split(self.x_input, num_or_size_splits=3, axis=3) preprocessed_input = tf.concat([ (blue - MobileNet.MEAN[0]) / 255.0, (green - MobileNet.MEAN[1]) / 255.0, (red - MobileNet.MEAN[2]) / 255.0, ], 3) + ''' self.conv1_1 = conv2d('conv_1', preprocessed_input, num_filters=int(round(32 * self.width_multiplier)), kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv1_1) - self.conv2_1 = depthwise_separable_conv2d('conv_ds_2', self.conv1_1, width_multiplier=self.width_multiplier, + self.conv2_1 = depthwise_separable_conv2d('conv_2_1', self.conv1_1, width_multiplier=self.width_multiplier, num_filters=64, kernel_size=(3, 3), padding='SAME', stride=(1, 1), batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd, activation=tf.nn.relu6) self._debug(self.conv2_1) - self.conv2_2 = depthwise_separable_conv2d('conv_ds_3', self.conv2_1, width_multiplier=self.width_multiplier, + self.conv2_2 = depthwise_separable_conv2d('conv_2_2', self.conv2_1, width_multiplier=self.width_multiplier, num_filters=128, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv2_2) - self.conv3_1 = depthwise_separable_conv2d('conv_ds_4', self.conv2_2, width_multiplier=self.width_multiplier, + self.conv3_1 = depthwise_separable_conv2d('conv_3_1', self.conv2_2, width_multiplier=self.width_multiplier, num_filters=128, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv3_1) - self.conv3_2 = depthwise_separable_conv2d('conv_ds_5', self.conv3_1, width_multiplier=self.width_multiplier, + self.conv3_2 = depthwise_separable_conv2d('conv_3_2', self.conv3_1, width_multiplier=self.width_multiplier, num_filters=256, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv3_2) - self.conv4_1 = depthwise_separable_conv2d('conv_ds_6', self.conv3_2, width_multiplier=self.width_multiplier, + self.conv4_1 = depthwise_separable_conv2d('conv_4_1', self.conv3_2, width_multiplier=self.width_multiplier, num_filters=256, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv4_1) - self.conv4_2 = depthwise_separable_conv2d('conv_ds_7', self.conv4_1, width_multiplier=self.width_multiplier, + self.conv4_2 = depthwise_separable_conv2d('conv_4_2', self.conv4_1, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv4_2) - self.conv5_1 = depthwise_separable_conv2d('conv_ds_8', self.conv4_2, width_multiplier=self.width_multiplier, + self.conv5_1 = depthwise_separable_conv2d('conv_5_1', self.conv4_2, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv5_1) - self.conv5_2 = depthwise_separable_conv2d('conv_ds_9', self.conv5_1, width_multiplier=self.width_multiplier, + self.conv5_2 = depthwise_separable_conv2d('conv_5_2', self.conv5_1, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv5_2) - self.conv5_3 = depthwise_separable_conv2d('conv_ds_10', self.conv5_2, + self.conv5_3 = depthwise_separable_conv2d('conv_5_3', self.conv5_2, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv5_3) - self.conv5_4 = depthwise_separable_conv2d('conv_ds_11', self.conv5_3, + self.conv5_4 = depthwise_separable_conv2d('conv_5_4', self.conv5_3, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv5_4) - self.conv5_5 = depthwise_separable_conv2d('conv_ds_12', self.conv5_4, + self.conv5_5 = depthwise_separable_conv2d('conv_5_5', self.conv5_4, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv5_5) - self.conv5_6 = depthwise_separable_conv2d('conv_ds_13', self.conv5_5, + self.conv5_6 = depthwise_separable_conv2d('conv_5_6', self.conv5_5, width_multiplier=self.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv5_6) - self.conv6_1 = depthwise_separable_conv2d('conv_ds_14', self.conv5_6, + self.conv6_1 = depthwise_separable_conv2d('conv_6_1', self.conv5_6, width_multiplier=self.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, diff --git a/models/encoders/resnet_18.py b/models/encoders/resnet_18.py index 80a76e6..ab422ef 100644 --- a/models/encoders/resnet_18.py +++ b/models/encoders/resnet_18.py @@ -81,9 +81,9 @@ def build(self): self.x_preprocessed = self.x_input * (1.0 / 255.0) # self.x_preprocessed= self.x_input stat= torchfile.load('stat.t7') - self.resnet_mean= stat.transpose(1,2,0) + #self.resnet_mean= stat.transpose(1,2,0) # self.resnet_mean = tf.constant([0.2869, 0.3251, 0.2839], dtype=tf.float32) - self.x_preprocessed = (self.x_preprocessed - self.resnet_mean) #/ self.resnet_std + #self.x_preprocessed = (self.x_preprocessed - self.resnet_mean) #/ self.resnet_std # red, green, blue = tf.split(self.x_preprocessed, num_or_size_splits=3, axis=3) # self.x_preprocessed = tf.concat([blue,green,red], 3) diff --git a/train/train_psy.py b/train/train_psy.py index 8b33fa3..385b0cd 100644 --- a/train/train_psy.py +++ b/train/train_psy.py @@ -43,8 +43,10 @@ def __init__(self, args, sess, train_model, test_model): 'train-loss-per-epoch', 'val-loss-per-epoch', 'train-acc-per-epoch', 'val-acc-per-epoch'] self.images_summary_tags = [ - ('train_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, self.args.num_channels]), - ('val_prediction_sample', [None, self.params.img_height, self.params.img_width * 2, self.args.num_channels])] + ('train_prediction_sample', [None, self.params.img_height * 2, + self.params.img_width * 2, 3]), + ('val_prediction_sample', [None, self.params.img_height * 2, + self.params.img_width, 3])] self.summary_tags = [] self.summary_placeholders = {} self.summary_ops = {} @@ -52,9 +54,10 @@ def __init__(self, args, sess, train_model, test_model): self.init_summaries() # Create summary writer self.summary_writer = tf.summary.FileWriter(self.args.summary_dir, self.sess.graph) - - self.num_iterations_training_per_epoch = 1000#self.train_data_len // self.args.batch_size - + #TODO + #!!! self.num_iterations_training_per_epoch should be larger to be meaninfull !!! + self.num_iterations_training_per_epoch = 5#self.train_data_len // self.args.batch_size + self.num_iterations_validation_per_epoch = 1 ################################################################################## # Init metrics class self.metrics = Metrics(self.args.num_classes) @@ -65,9 +68,20 @@ def __init__(self, args, sess, train_model, test_model): self.reporter = Reporter(self.args.out_dir + 'report_test.json', self.args) ################################################################################## train_seq_folder = self.args.data_dir + 'train_seq' - self.train_dataset = dl.load_datase(train_seq_folder, self.args.batch_size) - self.dataset_iterator = self.train_dataset.make_one_shot_iterator() - + test_seq_folder = self.args.data_dir + 'test_seq' + valid_seq_folder = test_seq_folder #TODO create validation folder + self.train_dataset = dl.load_dataset(train_seq_folder, + self.args.batch_size, + self.args.img_height, + self.args.img_width) + self.valid_dataset = dl.load_dataset(valid_seq_folder, + self.args.batch_size, + self.args.img_height, + self.args.img_width) + + self.dataset_train_iterator = self.train_dataset.make_one_shot_iterator() + self.dataset_valid_iterator = self.valid_dataset.make_one_shot_iterator() + def init_summaries(self): """ Create the summary part of the graph @@ -103,6 +117,7 @@ def add_summary(self, step, summaries_dict=None, summaries_merged=None): def train(self): print("Training mode will begin NOW ..") # curr_lr= self.model.args.learning_rate + next_element = self.dataset_train_iterator.get_next() for cur_epoch in range(self.model.global_epoch_tensor.eval(self.sess) + 1, self.args.num_epochs + 1, 1): tt = tqdm(range(self.num_iterations_training_per_epoch), total=self.num_iterations_training_per_epoch, @@ -110,10 +125,10 @@ def train(self): # init acc and loss lists loss_list = [] acc_list = [] - for _ in tt: + for cur_iteration in tt: # get the cur_it for the summary cur_it = self.model.global_step_tensor.eval(self.sess) - next_batch = self.dataset_iterator.get_next() + next_batch = next_element x_batch, y_batch = self.sess.run(next_batch) # Feed this variables to the network @@ -122,23 +137,29 @@ def train(self): self.model.is_training: True #self.model.curr_learning_rate:curr_lr } + if cur_iteration < self.num_iterations_training_per_epoch - 1: + # run the feed_forward + _, loss, acc, summaries_merged = self.sess.run( + [self.model.train_op, self.model.loss, self.model.accuracy, + self.model.merged_summaries], + feed_dict=feed_dict) + self.add_summary(cur_it, summaries_merged=summaries_merged) + else: + #also get images + _, loss, acc, summaries_merged, segmented_imgs = self.sess.run( + [self.model.train_op, self.model.loss, self.model.accuracy, + self.model.merged_summaries, self.model.segmented_summary], + feed_dict=feed_dict) + #TODO remove this + self.last_input = [x_batch, y_batch] - # run the feed_forward - _, loss, acc, summaries_merged = self.sess.run( - [self.model.train_op, self.model.loss, self.model.accuracy, - self.model.merged_summaries], - feed_dict=feed_dict) # log loss and acc loss_list += [loss] acc_list += [acc] - - # Update the Global step - self.model.global_step_assign_op.eval(session=self.sess, - feed_dict={self.model.global_step_input: cur_it + 1}) total_loss = np.mean(loss_list) total_acc = np.mean(acc_list) - + # summarize summaries_dict = dict() summaries_dict['train-loss-per-epoch'] = total_loss @@ -146,13 +167,17 @@ def train(self): if self.args.data_mode != 'experiment_v2': summaries_dict['train_prediction_sample'] = segmented_imgs - # self.add_summary(cur_it, summaries_dict=summaries_dict, summaries_merged=summaries_merged) + self.add_summary(cur_it, summaries_dict=summaries_dict, summaries_merged=summaries_merged) # report self.reporter.report_experiment_statistics('train-acc', 'epoch-' + str(cur_epoch), str(total_acc)) self.reporter.report_experiment_statistics('train-loss', 'epoch-' + str(cur_epoch), str(total_loss)) self.reporter.finalize() + # Update the Global step + self.model.global_step_assign_op.eval(session=self.sess, + feed_dict={self.model.global_step_input: cur_it + 1}) + # Update the Cur Epoch tensor # it is the last thing because if it is interrupted it repeat this self.model.global_epoch_assign_op.eval(session=self.sess, @@ -176,7 +201,6 @@ def train(self): def test_per_epoch(self, step, epoch): print("Validation at step:" + str(step) + " at epoch:" + str(epoch) + " ..") - # init tqdm and get the epoch value tt = tqdm(range(self.num_iterations_validation_per_epoch), total=self.num_iterations_validation_per_epoch, desc="Val-epoch-" + str(epoch) + "-") @@ -184,62 +208,63 @@ def test_per_epoch(self, step, epoch): # init acc and loss lists loss_list = [] acc_list = [] - inf_list = [] # reset metrics self.metrics.reset() # get the maximum iou to compare with and save the best model max_iou = self.model.best_iou_tensor.eval(self.sess) - + next_element = self.dataset_valid_iterator.get_next() # loop by the number of iterations - for _ in tt: - # load minibatches - x_batch = self.val_data['X'][idx:idx + self.args.batch_size] - y_batch = self.val_data['Y'][idx:idx + self.args.batch_size] - if self.args.data_mode == 'experiment_v2': - y_batch_large = self.val_data['Y_large'][idx:idx + self.args.batch_size] - - # update idx of minibatch - idx += self.args.batch_size - + for cur_iteration in tt: # Feed this variables to the network + #next_batch = next_element + #x_batch, y_batch = self.sess.run(next_batch) + x_batch, y_batch = self.last_input #TODO replace with prev lines feed_dict = {self.model.x_pl: x_batch, self.model.y_pl: y_batch, self.model.is_training: False } - start = time.time() # run the feed_forward + if cur_iteration < self.num_iterations_validation_per_epoch - 1: + out_argmax, loss, acc, summaries_merged = self.sess.run( + [self.model.out_argmax, + self.model.loss, self.model.accuracy, + self.model.merged_summaries], + feed_dict=feed_dict) + else: + out_argmax, loss, acc, summaries_merged, segmented_imgs = self.sess.run( + [self.test_model.out_argmax, + self.test_model.loss, self.model.accuracy, + self.test_model.merged_summaries, + self.test_model.test_segmented_summary], + feed_dict=feed_dict) - out_argmax, loss, acc, summaries_merged = self.sess.run( - [self.model.out_argmax, self.model.loss, self.model.accuracy, self.model.merged_summaries], - feed_dict=feed_dict) - - end = time.time() # log loss and acc loss_list += [loss] acc_list += [acc] - inf_list += [end - start] # log metrics self.metrics.update_metrics_batch(out_argmax, y_batch) - # mean over batches total_acc = np.mean(acc_list) mean_iou = self.metrics.compute_final_metrics(self.num_iterations_validation_per_epoch) mean_iou_arr = self.metrics.iou - mean_inference = str(np.mean(inf_list)) + '-seconds' + # summarize summaries_dict = dict() summaries_dict['val-acc-per-epoch'] = total_acc summaries_dict['mean_iou_on_val'] = mean_iou + summaries_dict['val_prediction_sample'] = segmented_imgs + cur_it = self.model.global_step_tensor.eval(self.sess) + self.add_summary(cur_it, summaries_dict=summaries_dict, summaries_merged=summaries_merged) # report self.reporter.report_experiment_statistics('validation-acc', 'epoch-' + str(epoch), str(total_acc)) - self.reporter.report_experiment_statistics('avg_inference_time_on_validation', 'epoch-' + str(epoch), - str(mean_inference)) +# self.reporter.report_experiment_statistics('avg_inference_time_on_validation', 'epoch-' + str(epoch), +# str(mean_inference)) self.reporter.report_experiment_validation_iou('epoch-' + str(epoch), str(mean_iou), mean_iou_arr) self.reporter.finalize() diff --git a/utils/img_utils.py b/utils/img_utils.py index 6d58ccf..3102a55 100755 --- a/utils/img_utils.py +++ b/utils/img_utils.py @@ -1,51 +1,42 @@ from PIL import Image import numpy as np +import matplotlib.cm as cm # colour map -label_colours_global = [(128, 64, 128), # 'road' - (244, 35, 232), # 'sidewalk' - (70, 70, 70), # 'building' - (102, 102, 156), # 'wall' - (190, 153, 153), # 'fence' - (153, 153, 153), # 'pole' - (250, 170, 30), # 'traffic light' - (220, 220, 0), # 'traffic sign' - (107, 142, 35), # 'vegetation' - (152, 251, 152), # 'terrain' - (70, 130, 180), # 'sky' - (220, 20, 60), # 'person' - (255, 0, 0), # 'rider' - (0, 0, 142), # 'car' - (0, 0, 70), # 'truck' - (0, 60, 100), # 'bus' - (0, 80, 100), # 'train' - (0, 0, 230), # 'motorcycle' - (119, 11, 32), # 'bicycle' - (0, 0, 0), ] # None -#label_colours_global = [(0,0,0), -# (0,0,0), -# (128, 64, 128), # 'road' -# (244, 35, 232), # 'sidewalk' -# (70, 70, 70), # 'building' -# (102, 102, 156), # 'wall' -# (190, 153, 153), # 'fence' -# (153, 153, 153), # 'pole' -# (250, 170, 30), # 'traffic light' -# (220, 220, 0), # 'traffic sign' -# (107, 142, 35), # 'vegetation' -# (152, 251, 152), # 'terrain' -# (70, 130, 180), # 'sky' -# (220, 20, 60), # 'person' -# (255, 0, 0), # 'rider' -# (0, 0, 142), # 'car' -# (0, 0, 70), # 'truck' -# (0, 60, 100), # 'bus' -# (0, 80, 100), # 'train' -# (0, 0, 230), # 'motorcycle' -# (119, 11, 32), # 'bicycle' -# ] # None - - +label_colours_global = [(255,255,255), # 'nal' + (255, 106, 0), # 'body' + (255, 0, 0), # 'neck' + #right arm + (255, 178, 127), + (255, 127, 127), + (182, 255, 0), + (218, 255, 127), + (255, 216, 0), + (107, 63, 127), + #left arm + (255, 233, 127), + (0, 148, 255), + (255, 0, 110), + (48, 48, 48), + (76, 255, 0), + (63, 73, 127), + #head, hips + (0, 255, 33), + (0, 255, 255), + #right leg + (0, 255, 144), + (127, 116, 63), + (127, 201, 255), + (165, 255, 127), + (214, 127, 255), + #left leg + (178, 0, 255), + (127, 63, 63), + (127, 255, 255), + (127, 255, 197), + (161, 127, 255), + #not a user + (72, 0, 255),] def decode_labels(mask, num_classes): """Decode batch of segmentation masks. @@ -61,16 +52,6 @@ def decode_labels(mask, num_classes): # init colours array colours = label_colours_global - # if num_classes == 7: - # colours = label_colours_scala_7 - # elif num_classes == 6: - # colours = label_colours_scala_6 - # elif num_classes == 5: - # colours = label_colours_scala_5 - # else: - # print("ERROR this number of classes don't have a defined colours") - # exit(-1) - # Check the length of the colours with num_classes assert (num_classes == len(colours)), 'num_classes %d should be equal the number colours %d.' % (num_classes, len(colours)) # Get the shape of the mask @@ -87,3 +68,20 @@ def decode_labels(mask, num_classes): pixels[k_, j_] = colours[k] outputs[i] = np.array(img) return outputs + +def decode_input(imm): + n, h, w, _ = imm.shape + outputs = np.zeros((n, h, w, 3), dtype=np.uint8) + for i in range(n): + for c in range(3): + outputs[i,:,:,c] = np.array(imm[i, :, :, 0]) + return outputs + +def decode_conf(imm): + n, h, w = imm.shape + outputs = np.zeros((n, h, w, 3), dtype=np.uint8) + cmap = cm.get_cmap('jet') + for i in range(n): + colored = np.round(256 * cmap(np.array(imm[i, :, :]))) + outputs[i,:,:,:] = colored[:,:,:3] + return outputs diff --git a/utils/seg_dataloader.py b/utils/seg_dataloader.py index aa8d1dc..cdc62ba 100644 --- a/utils/seg_dataloader.py +++ b/utils/seg_dataloader.py @@ -5,7 +5,8 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework.ops import convert_to_tensor -from tensorflow.contrib.data import Iterator +#from tensorflow.data import Iterator +Iterator = tf.data.Iterator #import cv2 #import matplotlib.pyplot as plt import scipy From 972af8b41743b2f273ded429558ce1a12058c69d Mon Sep 17 00:00:00 2001 From: limorhe Date: Wed, 17 Oct 2018 16:00:34 -0500 Subject: [PATCH 6/6] more updates more flexible configuration options make batch normalization depend on batch size change summary images --- agent.py | 1 + .../unet_depthnet_train.yaml | 14 ++-- .../unet_depthnet_train_small.yaml | 36 ++++++++++ data_load.py => data/data_load_psy.py | 9 ++- layers/convolution.py | 4 +- models/basic/basic_model.py | 11 ++- models/encoders/depthnet.py | 63 ++++++++-------- models/unet_depthnet.py | 71 +++++++++++-------- train/train_psy.py | 28 ++++---- utils/img_utils.py | 3 + 10 files changed, 148 insertions(+), 92 deletions(-) create mode 100644 config/experiments_config/unet_depthnet_train_small.yaml rename data_load.py => data/data_load_psy.py (92%) diff --git a/agent.py b/agent.py index 1327c86..86d31a9 100644 --- a/agent.py +++ b/agent.py @@ -42,6 +42,7 @@ def __init__(self, args): def build_model(self): if self.mode == 'train' or self.mode == 'overfit': # validation phase + print('Building Train Network') with tf.variable_scope('network') as scope: self.model = self.model(self.args) self.model.build() diff --git a/config/experiments_config/unet_depthnet_train.yaml b/config/experiments_config/unet_depthnet_train.yaml index 21df4da..04fe5b6 100644 --- a/config/experiments_config/unet_depthnet_train.yaml +++ b/config/experiments_config/unet_depthnet_train.yaml @@ -5,20 +5,22 @@ out_dir: "unet_depthnet" # Data arguments # image dimensions are reduced to enable run on my pc -img_height: 360 -img_width: 640 +img_height: 740 +img_width: 1280 num_channels: 1 num_classes: 28 # Train arguments num_epochs: 200 -batch_size: 1 +batch_size: 20 +train_data_len: 2000 shuffle: True +dropout_keep_prob: 0.95 data_mode: "experiment" -save_every: 5 -test_every: 1 #5 +save_every: 10 +test_every: 10 max_to_keep: 2 -weighted_loss: False #consider this +weighted_loss: False random_cropping: False freeze_encoder: False diff --git a/config/experiments_config/unet_depthnet_train_small.yaml b/config/experiments_config/unet_depthnet_train_small.yaml new file mode 100644 index 0000000..9a3b822 --- /dev/null +++ b/config/experiments_config/unet_depthnet_train_small.yaml @@ -0,0 +1,36 @@ +# Directories arguments +data_dir: "synthetic_seq" +exp_dir: "unet_depthnet_small" +out_dir: "unet_depthnet_small" + +# Data arguments +# image dimensions are reduced to enable run on my pc +img_height: 125 +img_width: 125 +num_channels: 1 +num_classes: 28 + +# Train arguments +num_epochs: 200 +batch_size: 20 +train_data_len: 2000 +shuffle: True +dropout_keep_prob: 0.95 +data_mode: "experiment" +save_every: 10 +test_every: 10 +max_to_keep: 2 +weighted_loss: False +random_cropping: False +freeze_encoder: False + +# Models arguments +learning_rate: 0.0001 +weight_decay: 0.0005 +bias : 0.0 +batchnorm_enabled: True +#pretrained_path: "pretrained_weights/mobilenet_v1.pkl" + +# Misc arguments +verbose: False + diff --git a/data_load.py b/data/data_load_psy.py similarity index 92% rename from data_load.py rename to data/data_load_psy.py index 322d55b..75329a3 100644 --- a/data_load.py +++ b/data/data_load_psy.py @@ -70,13 +70,18 @@ def load_dataset(train_seq_folder, batch_size, h, w): for train_seq_name in train_seq_files: train_seq = h5py.File(train_seq_name, "r") num_cameras = train_seq['INFO']['NUM_CAMERAS'].value[0] - num_frames = train_seq['INFO']['COUNT'].value[0] + num_frames = train_seq['INFO']['COUNT'].value[0] * 5 train_seq.close() for frame_idx in range(0, num_frames, 5): for cam_idx in range(num_cameras): filename_str = train_seq_name + '__' + 'FRAME{:04d}/RAW/CAM{:d}/'.format(frame_idx, cam_idx) filenames.append(filename_str) - + + num_images = len(filenames) + int_num_images = int(np.floor(num_images / batch_size) * batch_size) + if num_images != int_num_images : + del filenames[int_num_images:] + labels = [0]*len(filenames) dataset = tf.data.Dataset.from_tensor_slices((filenames, labels)) dataset = dataset.shuffle(buffer_size=10000) diff --git a/layers/convolution.py b/layers/convolution.py index 3edc630..6c9ed72 100644 --- a/layers/convolution.py +++ b/layers/convolution.py @@ -370,7 +370,7 @@ def depthwise_separable_conv2d(name, x, w_depthwise=None, w_pointwise=None, widt kernel_size=(3, 3), padding='SAME', stride=(1, 1), initializer=tf.contrib.layers.xavier_initializer(), l2_strength=0.0, biases=(0.0, 0.0), - activation=None, batchnorm_enabled=True, + dropout_keep_prob = -1, activation=None, batchnorm_enabled=True, is_training=True): total_num_filters = int(round(num_filters * width_multiplier)) with tf.variable_scope(name) as scope: @@ -382,7 +382,7 @@ def depthwise_separable_conv2d(name, x, w_depthwise=None, w_pointwise=None, widt conv_o = conv2d('pointwise', x=conv_a, w=w_pointwise, num_filters=total_num_filters, kernel_size=(1, 1), initializer=initializer, l2_strength=l2_strength, bias=biases[1], activation=activation, - batchnorm_enabled=batchnorm_enabled, is_training=is_training) + batchnorm_enabled=batchnorm_enabled, is_training=is_training, dropout_keep_prob = dropout_keep_prob) return conv_o diff --git a/models/basic/basic_model.py b/models/basic/basic_model.py index 9ce440f..cb4c721 100644 --- a/models/basic/basic_model.py +++ b/models/basic/basic_model.py @@ -172,15 +172,12 @@ def init_summaries(self): self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.y_pl, self.out_argmax), tf.float32)) with tf.name_scope('segmented_output'): - #input_summary = tf.cast(self.x_pl, tf.uint8) input_summary = tf.py_func(imu.decode_input, [self.x_pl], tf.uint8) - labels_summary = tf.py_func(imu.decode_labels, [self.y_pl, self.params.num_classes], tf.uint8) +# labels_summary = tf.py_func(imu.decode_labels, [self.y_pl, self.params.num_classes], tf.uint8) preds_summary = tf.py_func(imu.decode_labels, [self.out_argmax, self.params.num_classes], tf.uint8) - conf_summary = tf.py_func(imu.decode_conf, [self.out_argmax], tf.uint8) - img_1 = tf.concat(axis=1, values=[input_summary,labels_summary]) - img_2 = tf.concat(axis=1, values=[preds_summary,conf_summary]) - self.segmented_summary = tf.concat(axis=2, values = [img_1, img_2]) - self.test_segmented_summary = tf.concat(axis = 1, values = [preds_summary, conf_summary]) +# conf_summary = tf.py_func(imu.decode_conf, [self.out_argmax], tf.uint8) + self.segmented_summary = tf.concat(axis=1, values=[preds_summary,input_summary]) + self.test_segmented_summary = tf.concat(axis = 1, values = [preds_summary, input_summary]) # Every step evaluate these summaries with tf.name_scope('train-summary'): diff --git a/models/encoders/depthnet.py b/models/encoders/depthnet.py index 394c07c..dc195f3 100644 --- a/models/encoders/depthnet.py +++ b/models/encoders/depthnet.py @@ -13,6 +13,8 @@ def __init__(self, x_input, num_classes, pretrained_path, train_flag, + batchnorm_enabled, + dropout_keep_prob, width_multipler=1.0, weight_decay=5e-4): @@ -20,6 +22,8 @@ def __init__(self, x_input, self.x_input = x_input self.num_classes = num_classes self.train_flag = train_flag + self.batchnorm_enabled = batchnorm_enabled + self.dropout_keep_prob=dropout_keep_prob self.wd = weight_decay self.pretrained_path = os.path.realpath(os.getcwd()) + "/" + pretrained_path self.width_multiplier = width_multipler @@ -67,91 +71,94 @@ def encoder_build(self): self.conv1_1 = conv2d('conv_1', preprocessed_input, num_filters=int(round(32 * self.width_multiplier)), kernel_size=(3, 3), - padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, - is_training=self.train_flag, l2_strength=self.wd) + padding='SAME', stride=(2, 2), activation=tf.nn.relu6, + batchnorm_enabled=self.batchnorm_enabled, + is_training=self.train_flag, + dropout_keep_prob=self.dropout_keep_prob,l2_strength=self.wd) self._debug(self.conv1_1) self.conv2_1 = depthwise_separable_conv2d('conv_2_1', self.conv1_1, width_multiplier=self.width_multiplier, num_filters=64, kernel_size=(3, 3), padding='SAME', stride=(1, 1), - batchnorm_enabled=True, is_training=self.train_flag, - l2_strength=self.wd, activation=tf.nn.relu6) + batchnorm_enabled=self.batchnorm_enabled, is_training=self.train_flag, + l2_strength=self.wd, dropout_keep_prob=self.dropout_keep_prob, + activation=tf.nn.relu6) self._debug(self.conv2_1) self.conv2_2 = depthwise_separable_conv2d('conv_2_2', self.conv2_1, width_multiplier=self.width_multiplier, num_filters=128, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, - batchnorm_enabled=True, is_training=self.train_flag, - l2_strength=self.wd) + batchnorm_enabled=self.batchnorm_enabled, is_training=self.train_flag, + dropout_keep_prob=self.dropout_keep_prob, l2_strength=self.wd) self._debug(self.conv2_2) self.conv3_1 = depthwise_separable_conv2d('conv_3_1', self.conv2_2, width_multiplier=self.width_multiplier, num_filters=128, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, - batchnorm_enabled=True, is_training=self.train_flag, - l2_strength=self.wd) + batchnorm_enabled=self.batchnorm_enabled, is_training=self.train_flag, + dropout_keep_prob=self.dropout_keep_prob, l2_strength=self.wd) self._debug(self.conv3_1) self.conv3_2 = depthwise_separable_conv2d('conv_3_2', self.conv3_1, width_multiplier=self.width_multiplier, num_filters=256, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, - batchnorm_enabled=True, is_training=self.train_flag, - l2_strength=self.wd) + batchnorm_enabled=self.batchnorm_enabled, is_training=self.train_flag, + dropout_keep_prob=self.dropout_keep_prob, l2_strength=self.wd) self._debug(self.conv3_2) self.conv4_1 = depthwise_separable_conv2d('conv_4_1', self.conv3_2, width_multiplier=self.width_multiplier, num_filters=256, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, - batchnorm_enabled=True, is_training=self.train_flag, - l2_strength=self.wd) + batchnorm_enabled=self.batchnorm_enabled, is_training=self.train_flag, + dropout_keep_prob=self.dropout_keep_prob, l2_strength=self.wd) self._debug(self.conv4_1) self.conv4_2 = depthwise_separable_conv2d('conv_4_2', self.conv4_1, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, - batchnorm_enabled=True, is_training=self.train_flag, - l2_strength=self.wd) + batchnorm_enabled=self.batchnorm_enabled, is_training=self.train_flag, + dropout_keep_prob=self.dropout_keep_prob, l2_strength=self.wd) self._debug(self.conv4_2) self.conv5_1 = depthwise_separable_conv2d('conv_5_1', self.conv4_2, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, - batchnorm_enabled=True, is_training=self.train_flag, - l2_strength=self.wd) + batchnorm_enabled=self.batchnorm_enabled, is_training=self.train_flag, + dropout_keep_prob=self.dropout_keep_prob, l2_strength=self.wd) self._debug(self.conv5_1) self.conv5_2 = depthwise_separable_conv2d('conv_5_2', self.conv5_1, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, - batchnorm_enabled=True, is_training=self.train_flag, - l2_strength=self.wd) + batchnorm_enabled=self.batchnorm_enabled, is_training=self.train_flag, + dropout_keep_prob=self.dropout_keep_prob, l2_strength=self.wd) self._debug(self.conv5_2) self.conv5_3 = depthwise_separable_conv2d('conv_5_3', self.conv5_2, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, - batchnorm_enabled=True, is_training=self.train_flag, - l2_strength=self.wd) + batchnorm_enabled=self.batchnorm_enabled, is_training=self.train_flag, + dropout_keep_prob=self.dropout_keep_prob, l2_strength=self.wd) self._debug(self.conv5_3) self.conv5_4 = depthwise_separable_conv2d('conv_5_4', self.conv5_3, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, - batchnorm_enabled=True, is_training=self.train_flag, - l2_strength=self.wd) + batchnorm_enabled=self.batchnorm_enabled, is_training=self.train_flag, + dropout_keep_prob=self.dropout_keep_prob, l2_strength=self.wd) self._debug(self.conv5_4) self.conv5_5 = depthwise_separable_conv2d('conv_5_5', self.conv5_4, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, - batchnorm_enabled=True, is_training=self.train_flag, - l2_strength=self.wd) + batchnorm_enabled=self.batchnorm_enabled, is_training=self.train_flag, + dropout_keep_prob=self.dropout_keep_prob, l2_strength=self.wd) self._debug(self.conv5_5) self.conv5_6 = depthwise_separable_conv2d('conv_5_6', self.conv5_5, width_multiplier=self.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, - batchnorm_enabled=True, is_training=self.train_flag, - l2_strength=self.wd) + batchnorm_enabled=self.batchnorm_enabled, is_training=self.train_flag, + dropout_keep_prob=self.dropout_keep_prob, l2_strength=self.wd) self._debug(self.conv5_6) self.conv6_1 = depthwise_separable_conv2d('conv_6_1', self.conv5_6, width_multiplier=self.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, - batchnorm_enabled=True, is_training=self.train_flag, - l2_strength=self.wd) + batchnorm_enabled=self.batchnorm_enabled, is_training=self.train_flag, + dropout_keep_prob=self.dropout_keep_prob, l2_strength=self.wd) self._debug(self.conv6_1) # Pooling is removed. self.score_fr = conv2d('conv_1c_1x1', self.conv6_1, num_filters=self.num_classes, l2_strength=self.wd, diff --git a/models/unet_depthnet.py b/models/unet_depthnet.py index c499e8e..1d1a5e3 100644 --- a/models/unet_depthnet.py +++ b/models/unet_depthnet.py @@ -29,95 +29,104 @@ def init_network(self): Building the Network here :return: """ - + batch_size = self.x_pl.shape[0] + #TODO test this size once running on a strong enough cmputer + batchnorm_enabled = batch_size > 10 # Init DepthNet as an encoder self.encoder = DepthNet(x_input=self.x_pl, num_classes=self.params.num_classes, - pretrained_path=self.args.pretrained_path, - train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) + pretrained_path=self.args.pretrained_path, batchnorm_enabled = batchnorm_enabled, + train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay, + dropout_keep_prob=self.args.dropout_keep_prob,) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('upscale_1'): - self.expand11 = conv2d('expand1_1', x=self.encoder.conv5_6, batchnorm_enabled=True, is_training= self.is_training, + self.expand11 = conv2d('expand1_1', x=self.encoder.conv5_6, batchnorm_enabled=batchnorm_enabled, is_training= self.is_training, num_filters=self.encoder.conv5_5.shape.as_list()[3], kernel_size=(1, 1), - l2_strength=self.encoder.wd) + dropout_keep_prob=self.args.dropout_keep_prob, l2_strength=self.encoder.wd) self._debug(self.expand11) self.upscale1 = conv2d_transpose('upscale1', x=self.expand11,is_training= self.is_training, - output_shape=self.encoder.conv5_5.shape.as_list(), batchnorm_enabled=True, - kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) + output_shape=self.encoder.conv5_5.shape.as_list(), batchnorm_enabled=batchnorm_enabled, + kernel_size=(4, 4), stride=(2, 2), + dropout_keep_prob=self.args.dropout_keep_prob,l2_strength=self.encoder.wd) self._debug(self.upscale1) self.add1 = tf.add(self.upscale1, self.encoder.conv5_5) self._debug(self.add1) - self.expand12 = conv2d('expand1_2', x=self.add1, batchnorm_enabled=True,is_training= self.is_training, + self.expand12 = conv2d('expand1_2', x=self.add1, batchnorm_enabled=batchnorm_enabled,is_training= self.is_training, num_filters=self.encoder.conv5_5.shape.as_list()[3], kernel_size=(1, 1), - l2_strength=self.encoder.wd) + dropout_keep_prob=self.args.dropout_keep_prob, l2_strength=self.encoder.wd) self._debug(self.expand12) with tf.name_scope('upscale_2'): - self.expand21 = conv2d('expand2_1', x=self.expand12, batchnorm_enabled=True,is_training= self.is_training, + self.expand21 = conv2d('expand2_1', x=self.expand12, batchnorm_enabled=batchnorm_enabled,is_training= self.is_training, num_filters=self.encoder.conv4_1.shape.as_list()[3], kernel_size=(1, 1), - l2_strength=self.encoder.wd) + dropout_keep_prob=self.args.dropout_keep_prob, l2_strength=self.encoder.wd) self._debug(self.expand21) self.upscale2 = conv2d_transpose('upscale2', x=self.expand21,is_training= self.is_training, - output_shape=self.encoder.conv4_1.shape.as_list(),batchnorm_enabled=True, - kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) + output_shape=self.encoder.conv4_1.shape.as_list(),batchnorm_enabled=batchnorm_enabled, + kernel_size=(4, 4), stride=(2, 2), + dropout_keep_prob=self.args.dropout_keep_prob, l2_strength=self.encoder.wd) self._debug(self.upscale2) self.add2 = tf.add(self.upscale2, self.encoder.conv4_1) self._debug(self.add2) - self.expand22 = conv2d('expand2_2', x=self.add2, batchnorm_enabled=True,is_training= self.is_training, + self.expand22 = conv2d('expand2_2', x=self.add2, batchnorm_enabled=batchnorm_enabled,is_training= self.is_training, num_filters=self.encoder.conv4_1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand22) with tf.name_scope('upscale_3'): - self.expand31 = conv2d('expand3_1', x=self.expand22, batchnorm_enabled=True,is_training= self.is_training, + self.expand31 = conv2d('expand3_1', x=self.expand22, batchnorm_enabled=batchnorm_enabled,is_training= self.is_training, num_filters=self.encoder.conv3_1.shape.as_list()[3], kernel_size=(1, 1), - l2_strength=self.encoder.wd) + dropout_keep_prob=self.args.dropout_keep_prob, l2_strength=self.encoder.wd) self._debug(self.expand31) - self.upscale3 = conv2d_transpose('upscale3', x=self.expand31, batchnorm_enabled=True,is_training= self.is_training, + self.upscale3 = conv2d_transpose('upscale3', x=self.expand31, batchnorm_enabled=batchnorm_enabled,is_training= self.is_training, output_shape=self.encoder.conv3_1.shape.as_list(), - kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) + kernel_size=(4, 4), stride=(2, 2), + dropout_keep_prob=self.args.dropout_keep_prob, l2_strength=self.encoder.wd) self._debug(self.upscale3) self.add3 = tf.add(self.upscale3, self.encoder.conv3_1) self._debug(self.add3) - self.expand32 = conv2d('expand3_2', x=self.add3, batchnorm_enabled=True,is_training= self.is_training, + self.expand32 = conv2d('expand3_2', x=self.add3, batchnorm_enabled=batchnorm_enabled,is_training= self.is_training, num_filters=self.encoder.conv3_1.shape.as_list()[3], kernel_size=(1, 1), - l2_strength=self.encoder.wd) + dropout_keep_prob=self.args.dropout_keep_prob, l2_strength=self.encoder.wd) self._debug(self.expand32) with tf.name_scope('upscale_4'): - self.expand41 = conv2d('expand4_1', x=self.expand32, batchnorm_enabled=True,is_training= self.is_training, + self.expand41 = conv2d('expand4_1', x=self.expand32, batchnorm_enabled=batchnorm_enabled,is_training= self.is_training, num_filters=self.encoder.conv2_1.shape.as_list()[3], kernel_size=(1, 1), - l2_strength=self.encoder.wd) + dropout_keep_prob=self.args.dropout_keep_prob, l2_strength=self.encoder.wd) self._debug(self.expand41) - self.upscale4 = conv2d_transpose('upscale4', x=self.expand41, batchnorm_enabled=True,is_training= self.is_training, + self.upscale4 = conv2d_transpose('upscale4', x=self.expand41, batchnorm_enabled=batchnorm_enabled,is_training= self.is_training, output_shape=self.encoder.conv2_1.shape.as_list(), - kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) + kernel_size=(4, 4), stride=(2, 2), + dropout_keep_prob=self.args.dropout_keep_prob, l2_strength=self.encoder.wd) self._debug(self.upscale4) self.add4 = tf.add(self.upscale4, self.encoder.conv2_1) self._debug(self.add4) - self.expand42 = conv2d('expand4_2', x=self.add4, batchnorm_enabled=True,is_training= self.is_training, + self.expand42 = conv2d('expand4_2', x=self.add4, batchnorm_enabled=batchnorm_enabled,is_training= self.is_training, num_filters=self.encoder.conv2_1.shape.as_list()[3], kernel_size=(1, 1), - l2_strength=self.encoder.wd) + dropout_keep_prob=self.args.dropout_keep_prob, l2_strength=self.encoder.wd) self._debug(self.expand42) with tf.name_scope('upscale_5'): - self.upscale5 = conv2d_transpose('upscale5', x=self.expand42, batchnorm_enabled=True,is_training= self.is_training, + self.upscale5 = conv2d_transpose('upscale5', x=self.expand42, batchnorm_enabled=batchnorm_enabled,is_training= self.is_training, output_shape=self.x_pl.shape.as_list()[0:3] + [ self.encoder.conv2_1.shape.as_list()[3]], + dropout_keep_prob=self.args.dropout_keep_prob, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale5) - self.expand5 = conv2d('expand5', x=self.upscale5, batchnorm_enabled=True,is_training= self.is_training, - num_filters=self.encoder.conv1_1.shape.as_list()[3], kernel_size=(1, 1),dropout_keep_prob=0.5, + self.expand5 = conv2d('expand5', x=self.upscale5, batchnorm_enabled=batchnorm_enabled,is_training= self.is_training, + num_filters=self.encoder.conv1_1.shape.as_list()[3], kernel_size=(1, 1), + dropout_keep_prob=self.args.dropout_keep_prob, l2_strength=self.encoder.wd) self._debug(self.expand5) with tf.name_scope('final_score'): self.fscore = conv2d('fscore', x=self.expand5, - num_filters=self.params.num_classes, kernel_size=(1, 1), - l2_strength=self.encoder.wd) + num_filters=self.params.num_classes, + kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.fscore) self.logits = self.fscore diff --git a/train/train_psy.py b/train/train_psy.py index 385b0cd..a3b3010 100644 --- a/train/train_psy.py +++ b/train/train_psy.py @@ -5,8 +5,8 @@ import h5py import tensorflow as tf import numpy as np -import data_load as dl from train.basic_train import BasicTrain +from data.data_load_psy import load_dataset from metrics.metrics import Metrics from utils.reporter import Reporter from utils.misc import timeit @@ -44,7 +44,7 @@ def __init__(self, args, sess, train_model, test_model): 'train-acc-per-epoch', 'val-acc-per-epoch'] self.images_summary_tags = [ ('train_prediction_sample', [None, self.params.img_height * 2, - self.params.img_width * 2, 3]), + self.params.img_width, 3]), ('val_prediction_sample', [None, self.params.img_height * 2, self.params.img_width, 3])] self.summary_tags = [] @@ -54,9 +54,7 @@ def __init__(self, args, sess, train_model, test_model): self.init_summaries() # Create summary writer self.summary_writer = tf.summary.FileWriter(self.args.summary_dir, self.sess.graph) - #TODO - #!!! self.num_iterations_training_per_epoch should be larger to be meaninfull !!! - self.num_iterations_training_per_epoch = 5#self.train_data_len // self.args.batch_size + self.num_iterations_training_per_epoch = int((self.args.train_data_len - 1) / self.args.batch_size) + 1 self.num_iterations_validation_per_epoch = 1 ################################################################################## # Init metrics class @@ -70,11 +68,11 @@ def __init__(self, args, sess, train_model, test_model): train_seq_folder = self.args.data_dir + 'train_seq' test_seq_folder = self.args.data_dir + 'test_seq' valid_seq_folder = test_seq_folder #TODO create validation folder - self.train_dataset = dl.load_dataset(train_seq_folder, + self.train_dataset = load_dataset(train_seq_folder, self.args.batch_size, self.args.img_height, self.args.img_width) - self.valid_dataset = dl.load_dataset(valid_seq_folder, + self.valid_dataset = load_dataset(valid_seq_folder, self.args.batch_size, self.args.img_height, self.args.img_width) @@ -137,7 +135,9 @@ def train(self): self.model.is_training: True #self.model.curr_learning_rate:curr_lr } - if cur_iteration < self.num_iterations_training_per_epoch - 1: + save_image = (cur_iteration == self.num_iterations_training_per_epoch - 1) and \ + (cur_epoch % self.args.save_every == 0) + if not save_image: # run the feed_forward _, loss, acc, summaries_merged = self.sess.run( [self.model.train_op, self.model.loss, self.model.accuracy, @@ -150,8 +150,6 @@ def train(self): [self.model.train_op, self.model.loss, self.model.accuracy, self.model.merged_summaries, self.model.segmented_summary], feed_dict=feed_dict) - #TODO remove this - self.last_input = [x_batch, y_batch] # log loss and acc loss_list += [loss] @@ -164,8 +162,7 @@ def train(self): summaries_dict = dict() summaries_dict['train-loss-per-epoch'] = total_loss summaries_dict['train-acc-per-epoch'] = total_acc - - if self.args.data_mode != 'experiment_v2': + if cur_epoch % self.args.save_every == 0: summaries_dict['train_prediction_sample'] = segmented_imgs self.add_summary(cur_it, summaries_dict=summaries_dict, summaries_merged=summaries_merged) @@ -218,9 +215,8 @@ def test_per_epoch(self, step, epoch): # loop by the number of iterations for cur_iteration in tt: # Feed this variables to the network - #next_batch = next_element - #x_batch, y_batch = self.sess.run(next_batch) - x_batch, y_batch = self.last_input #TODO replace with prev lines + next_batch = next_element + x_batch, y_batch = self.sess.run(next_batch) feed_dict = {self.model.x_pl: x_batch, self.model.y_pl: y_batch, self.model.is_training: False @@ -336,4 +332,4 @@ def finalize(self): self.summary_writer.close() self.save_model() - \ No newline at end of file + diff --git a/utils/img_utils.py b/utils/img_utils.py index 3102a55..d564411 100755 --- a/utils/img_utils.py +++ b/utils/img_utils.py @@ -56,6 +56,7 @@ def decode_labels(mask, num_classes): assert (num_classes == len(colours)), 'num_classes %d should be equal the number colours %d.' % (num_classes, len(colours)) # Get the shape of the mask n, h, w = mask.shape + n = min(n,3) # Create the output numpy array outputs = np.zeros((n, h, w, 3), dtype=np.uint8) # Loop on images @@ -71,6 +72,7 @@ def decode_labels(mask, num_classes): def decode_input(imm): n, h, w, _ = imm.shape + n = min(n,3) outputs = np.zeros((n, h, w, 3), dtype=np.uint8) for i in range(n): for c in range(3): @@ -79,6 +81,7 @@ def decode_input(imm): def decode_conf(imm): n, h, w = imm.shape + n = min(n,3) outputs = np.zeros((n, h, w, 3), dtype=np.uint8) cmap = cm.get_cmap('jet') for i in range(n):