Skip to content
Open
17 changes: 9 additions & 8 deletions keras_frcnn/FixedBatchNormalization.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from keras.engine import Layer, InputSpec
from keras import initializers, regularizers
from keras import backend as K
#from keras.engine import Layer, InputSpec
from tensorflow.keras.layers import Layer, InputSpec
from tensorflow.keras import initializers, regularizers
from tensorflow.keras import backend as K


class FixedBatchNormalization(Layer):
Expand All @@ -23,20 +24,20 @@ def build(self, input_shape):
self.input_spec = [InputSpec(shape=input_shape)]
shape = (input_shape[self.axis],)

self.gamma = self.add_weight(shape,
self.gamma = self.add_weight(shape=shape,
initializer=self.gamma_init,
regularizer=self.gamma_regularizer,
name='{}_gamma'.format(self.name),
trainable=False)
self.beta = self.add_weight(shape,
self.beta = self.add_weight(shape=shape,
initializer=self.beta_init,
regularizer=self.beta_regularizer,
name='{}_beta'.format(self.name),
trainable=False)
self.running_mean = self.add_weight(shape, initializer='zero',
self.running_mean = self.add_weight(shape=shape, initializer='zero',
name='{}_running_mean'.format(self.name),
trainable=False)
self.running_std = self.add_weight(shape, initializer='one',
self.running_std = self.add_weight(shape=shape, initializer='one',
name='{}_running_std'.format(self.name),
trainable=False)

Expand Down Expand Up @@ -80,4 +81,4 @@ def get_config(self):
'gamma_regularizer': self.gamma_regularizer.get_config() if self.gamma_regularizer else None,
'beta_regularizer': self.beta_regularizer.get_config() if self.beta_regularizer else None}
base_config = super(FixedBatchNormalization, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
return dict(list(base_config.items()) + list(config.items()))
26 changes: 14 additions & 12 deletions keras_frcnn/RoiPoolingConv.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from keras.engine.topology import Layer
import keras.backend as K
#from keras.engine.topology import Layer
from tensorflow.keras.layers import Layer, InputSpec
from tensorflow.keras import backend as K

if K.backend() == 'tensorflow':
import tensorflow as tf
Expand All @@ -26,22 +27,22 @@ class RoiPoolingConv(Layer):
'''
def __init__(self, pool_size, num_rois, **kwargs):

self.dim_ordering = K.image_dim_ordering()
assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'

self.dim_ordering = K.image_data_format()
#assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
assert self.dim_ordering in {'channels_first', 'channels_last'}, 'dim_ordering must be in {channels_first, channels_last}'
self.pool_size = pool_size
self.num_rois = num_rois

super(RoiPoolingConv, self).__init__(**kwargs)

def build(self, input_shape):
if self.dim_ordering == 'th':
if self.dim_ordering == 'channels_first':
self.nb_channels = input_shape[0][1]
elif self.dim_ordering == 'tf':
elif self.dim_ordering == 'channels_last':
self.nb_channels = input_shape[0][3]

def compute_output_shape(self, input_shape):
if self.dim_ordering == 'th':
if self.dim_ordering == 'channels_first':
return None, self.num_rois, self.nb_channels, self.pool_size, self.pool_size
else:
return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels
Expand Down Expand Up @@ -72,7 +73,7 @@ def call(self, x, mask=None):
#NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op
# in theano. The theano implementation is much less efficient and leads to long compile times

if self.dim_ordering == 'th':
if self.dim_ordering == 'channels_first':
for jy in range(num_pool_regions):
for ix in range(num_pool_regions):
x1 = x + ix * row_length
Expand All @@ -96,19 +97,20 @@ def call(self, x, mask=None):
pooled_val = K.max(xm, axis=(2, 3))
outputs.append(pooled_val)

elif self.dim_ordering == 'tf':
elif self.dim_ordering == 'channels_last':
x = K.cast(x, 'int32')
y = K.cast(y, 'int32')
w = K.cast(w, 'int32')
h = K.cast(h, 'int32')

rs = tf.image.resize_images(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))
#rs = tf.image.resize_images(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))
rs = tf.image.resize(img[:, y:y+h, x:x+w, :], (self.pool_size, self.pool_size))
outputs.append(rs)

final_output = K.concatenate(outputs, axis=0)
final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels))

if self.dim_ordering == 'th':
if self.dim_ordering == 'channels_first':
final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3))
else:
final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))
Expand Down
2 changes: 1 addition & 1 deletion keras_frcnn/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from keras import backend as K
from tensorflow.keras import backend as K
import math

class Config:
Expand Down
26 changes: 17 additions & 9 deletions keras_frcnn/resnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,17 @@
from __future__ import print_function
from __future__ import absolute_import

from keras.layers import Input, Add, Dense, Activation, Flatten, Convolution2D, MaxPooling2D, ZeroPadding2D, \
from tensorflow.keras.layers import Input, Add, Dense, Activation, Flatten, Convolution2D, MaxPooling2D, ZeroPadding2D, \
AveragePooling2D, TimeDistributed

from keras import backend as K
from tensorflow.keras import backend as K

from keras_frcnn.RoiPoolingConv import RoiPoolingConv
from keras_frcnn.FixedBatchNormalization import FixedBatchNormalization

def get_weight_path():
if K.image_dim_ordering() == 'th':
#if K.image_dim_ordering() == 'th':
if K.image_data_format() == 'channels_first':
return 'resnet50_weights_th_dim_ordering_th_kernels_notop.h5'
else:
return 'resnet50_weights_tf_dim_ordering_tf_kernels.h5'
Expand All @@ -39,7 +40,8 @@ def identity_block(input_tensor, kernel_size, filters, stage, block, trainable=T

nb_filter1, nb_filter2, nb_filter3 = filters

if K.image_dim_ordering() == 'tf':
#if K.image_dim_ordering() == 'tf':
if K.image_data_format() == 'channels_last':
bn_axis = 3
else:
bn_axis = 1
Expand Down Expand Up @@ -68,7 +70,8 @@ def identity_block_td(input_tensor, kernel_size, filters, stage, block, trainabl
# identity block time distributed

nb_filter1, nb_filter2, nb_filter3 = filters
if K.image_dim_ordering() == 'tf':
#if K.image_dim_ordering() == 'tf':
if K.image_data_format() == 'channels_last':
bn_axis = 3
else:
bn_axis = 1
Expand All @@ -95,7 +98,8 @@ def identity_block_td(input_tensor, kernel_size, filters, stage, block, trainabl
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2), trainable=True):

nb_filter1, nb_filter2, nb_filter3 = filters
if K.image_dim_ordering() == 'tf':
#if K.image_dim_ordering() == 'tf':
if K.image_data_format() == 'channels_last':
bn_axis = 3
else:
bn_axis = 1
Expand Down Expand Up @@ -127,7 +131,8 @@ def conv_block_td(input_tensor, kernel_size, filters, stage, block, input_shape,
# conv block time distributed

nb_filter1, nb_filter2, nb_filter3 = filters
if K.image_dim_ordering() == 'tf':
#if K.image_dim_ordering() == 'tf':
if K.image_data_format() == 'channels_last':
bn_axis = 3
else:
bn_axis = 1
Expand Down Expand Up @@ -156,7 +161,8 @@ def conv_block_td(input_tensor, kernel_size, filters, stage, block, input_shape,
def nn_base(input_tensor=None, trainable=False):

# Determine proper input shape
if K.image_dim_ordering() == 'th':
#if K.image_dim_ordering() == 'th':
if K.image_data_format() == 'channels_first':
input_shape = (3, None, None)
else:
input_shape = (None, None, 3)
Expand All @@ -169,12 +175,14 @@ def nn_base(input_tensor=None, trainable=False):
else:
img_input = input_tensor

if K.image_dim_ordering() == 'tf':
#if K.image_dim_ordering() == 'tf':
if K.image_data_format() == 'channels_last':
bn_axis = 3
else:
bn_axis = 1

x = ZeroPadding2D((3, 3))(img_input)
print(x)

x = Convolution2D(64, (7, 7), strides=(2, 2), name='conv1', trainable = trainable)(x)
x = FixedBatchNormalization(axis=bn_axis, name='bn_conv1')(x)
Expand Down
12 changes: 6 additions & 6 deletions keras_frcnn/roi_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,30 +229,30 @@ def rpn_to_roi(rpn_layer, regr_layer, C, dim_ordering, use_regr=True, max_boxes=

assert rpn_layer.shape[0] == 1

if dim_ordering == 'th':
if dim_ordering == 'channels_first':
(rows,cols) = rpn_layer.shape[2:]

elif dim_ordering == 'tf':
elif dim_ordering == 'channels_last':
(rows, cols) = rpn_layer.shape[1:3]

curr_layer = 0
if dim_ordering == 'tf':
if dim_ordering == 'channels_last':
A = np.zeros((4, rpn_layer.shape[1], rpn_layer.shape[2], rpn_layer.shape[3]))
elif dim_ordering == 'th':
elif dim_ordering == 'channels_first':
A = np.zeros((4, rpn_layer.shape[2], rpn_layer.shape[3], rpn_layer.shape[1]))

for anchor_size in anchor_sizes:
for anchor_ratio in anchor_ratios:

anchor_x = (anchor_size * anchor_ratio[0])/C.rpn_stride
anchor_y = (anchor_size * anchor_ratio[1])/C.rpn_stride
if dim_ordering == 'th':
if dim_ordering == 'channels_first':
regr = regr_layer[0, 4 * curr_layer:4 * curr_layer + 4, :, :]
else:
regr = regr_layer[0, :, :, 4 * curr_layer:4 * curr_layer + 4]
regr = np.transpose(regr, (2, 0, 1))

X, Y = np.meshgrid(np.arange(cols),np. arange(rows))
X, Y = np.meshgrid(np.arange(cols),np.arange(rows))

A[0, :, :, curr_layer] = X - anchor_x/2
A[1, :, :, curr_layer] = Y - anchor_y/2
Expand Down
20 changes: 11 additions & 9 deletions test_frcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
from optparse import OptionParser
import time
from keras_frcnn import config
from keras import backend as K
from keras.layers import Input
from keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from keras_frcnn import roi_helpers
import matplotlib.pyplot as plt

sys.setrecursionlimit(40000)

Expand Down Expand Up @@ -106,7 +107,8 @@ def get_real_coordinates(ratio, x1, y1, x2, y2):
elif C.network == 'vgg':
num_features = 512

if K.image_dim_ordering() == 'th':
#if K.image_dim_ordering() == 'th':
if K.image_data_format() == 'channels_first':
input_shape_img = (3, None, None)
input_shape_features = (num_features, None, None)
else:
Expand Down Expand Up @@ -158,14 +160,15 @@ def get_real_coordinates(ratio, x1, y1, x2, y2):

X, ratio = format_img(img, C)

if K.image_dim_ordering() == 'tf':
#if K.image_dim_ordering() == 'tf':
if K.image_data_format() == 'channels_last':
X = np.transpose(X, (0, 2, 3, 1))

# get the feature maps and output from the RPN
[Y1, Y2, F] = model_rpn.predict(X)


R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7)
R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_data_format(), overlap_thresh=0.7)

# convert from (x1,y1,x2,y2) to (x,y,w,h)
R[:, 2] -= R[:, 0]
Expand Down Expand Up @@ -242,6 +245,5 @@ def get_real_coordinates(ratio, x1, y1, x2, y2):

print('Elapsed time = {}'.format(time.time() - st))
print(all_dets)
cv2.imshow('img', img)
cv2.waitKey(0)
# cv2.imwrite('./results_imgs/{}.png'.format(idx),img)

cv2.imwrite('./result_images/{}_treated.png'.format(img_name),img)