From b73c367f86cbd254459aa060d8002a8748dba87f Mon Sep 17 00:00:00 2001 From: Niklas H Date: Tue, 23 Mar 2021 12:16:51 +0100 Subject: [PATCH] Changes to make it work --- components/agent.py | 64 ++++-- components/autoencoder.py | 58 +++-- components/state_builder.py | 63 ++++- cross_circle_gym/envs/cross_circle_base.py | 141 ++++++++---- .../envs/cross_circle_mixed_rand.py | 5 +- main.py | 216 ++++++++++-------- scripts/hyperparameter.sh | 52 +++++ scripts/training.sh | 43 ++++ utils.py | 116 ++++++++++ 9 files changed, 568 insertions(+), 190 deletions(-) create mode 100644 scripts/hyperparameter.sh create mode 100644 scripts/training.sh create mode 100644 utils.py diff --git a/components/agent.py b/components/agent.py index 8b1edeb..798792f 100644 --- a/components/agent.py +++ b/components/agent.py @@ -70,10 +70,13 @@ def update_target_model(self): def remember(self, state, action, reward, next_state, done): self.memory.append((state, action, reward, next_state, done)) - def act(self, state): - if np.random.rand() <= self.epsilon: - return random.randrange(self.action_size) - act_values = self.model.predict(state) + def act(self, state,random=True): + if random: + if np.random.rand() <= self.epsilon: + return random.randrange(self.action_size) + act_values = self.model.predict(state) + else: + act_values = self.model.predict(state) return np.argmax(act_values[0]) # returns action def replay(self, batch_size): @@ -98,24 +101,30 @@ def save(self, name): class TabularAgent: '''RL agent as described in the DSRL paper''' - def __init__(self, action_size, neighbor_radius=25): + def __init__(self, action_size,alpha,epsilon_decay,neighbor_radius=25): self.action_size = action_size + self.alpha = alpha self.epsilon = 1 - self.epsilon_decay = 0.999 + self.epsilon_decay = epsilon_decay self.epsilon_min = 0.1 self.gamma = 0.95 self.neighbor_radius=neighbor_radius + self.offset = neighbor_radius*2 self.tables = {} - def act(self, state): + def act(self, state,random_act=True): ''' Determines action to take based on given state State: Array of interactions (entities in each interaction are presorted by type for consistency) Returns: action to take, chosen e-greedily ''' + if not random_act: + return np.argmax(self._total_rewards(state)) if np.random.rand() <= self.epsilon: - print('random action, e:', self.epsilon) + #print('random action, e:', self.epsilon) + if self.epsilon > self.epsilon_min: + self.epsilon *= self.epsilon_decay return random.randrange(self.action_size) if self.epsilon > self.epsilon_min: @@ -125,20 +134,37 @@ def act(self, state): def update(self, state, action, reward, next_state, done): '''Update tables based on reward and action taken''' - curr_tr = self._total_rewards(state) - next_tr = self._total_rewards(next_state) - print('Reward for action {}: {}. Current total rewards: {}'.format(action, reward, curr_tr)) - print('Next Total Reward:', next_tr) + + for interaction in state: type_1, type_2 = interaction['types_after'] # TODO resolve: should this too be types_before? table = self.tables.setdefault(type_1, {}).setdefault(type_2, self._make_table()) - - if done: - table[interaction['loc_difference']][action] = reward + id1,id2 = interaction['interaction'] + interaction_next_state = [inter for inter in next_state if inter['interaction']==(id1,id2)] + if len(interaction_next_state)==0: + continue + elif len(interaction_next_state)>1: + raise ValueError('This should not happen') else: - table[interaction['loc_difference']][action] = \ - reward + self.gamma * (np.max(next_tr) - curr_tr[action]) + #print('Now we should update the Q-values') + #print(f'The current reward is {reward}') + interaction_next_state = interaction_next_state[0] + interaction['loc_difference'] = (interaction['loc_difference'][0]+self.offset,interaction['loc_difference'][1]+self.offset) + interaction_next_state['loc_difference'] = (interaction_next_state['loc_difference'][0]+self.offset,interaction_next_state['loc_difference'][1]+self.offset) + #print(interaction_next_state['loc_difference']) + #print(interaction['loc_difference']) + next_action_value = table[interaction_next_state['loc_difference']] + #print(f'The next action value {next_action_value}') + if done: + table[interaction['loc_difference']][action] = reward + else: + #print(f'Q-value before update {table[interaction["loc_difference"]][action]}') + #print(f'Location {interaction["loc_difference"]}') + #print(f"The new value should be {table[interaction['loc_difference']][action] + self.alpha*(reward + self.gamma * np.max(next_action_value) - table[interaction['loc_difference']][action])}") + #print(interaction['loc_difference']) + table[interaction['loc_difference']][action] = table[interaction['loc_difference']][action] + self.alpha*(reward + self.gamma * np.max(next_action_value) - table[interaction['loc_difference']][action]) + #print(f'Q-value after update {table[interaction["loc_difference"]][action]}') def _total_rewards(self, interactions): action_rewards = np.zeros(self.action_size) @@ -154,8 +180,8 @@ def _make_table(self): 3-D table: rows = loc_difference_x, cols = loc_difference_y, z = q-values for actions Rows and cols added to as needed ''' - return np.zeros((self.neighbor_radius * 2, self.neighbor_radius * 2, self.action_size), - dtype=int) + return np.zeros((self.neighbor_radius * 8, self.neighbor_radius * 8, self.action_size), + dtype=float) def save(self, filename): '''Save agent's tables''' diff --git a/components/autoencoder.py b/components/autoencoder.py index d316494..889c64b 100644 --- a/components/autoencoder.py +++ b/components/autoencoder.py @@ -15,9 +15,9 @@ class SymbolAutoencoder(): '''Implements the DSRL paper section 3.1. Extract entities from raw image''' - def __init__(self, input_shape, neighbor_radius=25): + def __init__(self, input_shape,filter_size,neighbor_radius=25): self.neighbor_radius = neighbor_radius - + self.filter_size = filter_size input_img = Input(shape=input_shape) encoded = Conv2D(16, (5, 5), activation='relu', padding='same')(input_img) encoded = MaxPooling2D((POOL_SIZE, POOL_SIZE), padding='same')(encoded) @@ -30,6 +30,8 @@ def __init__(self, input_shape, neighbor_radius=25): self.autoencoder = Model(input_img, decoded) self.autoencoder.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) + self.repr_entity_activations = [] + def train(self, train_data, epochs=50, batch_size=128, shuffle=True, validation=None, tensorboard=False): '''Train the autoencoder on provided images''' @@ -62,21 +64,22 @@ def _extract_positions(self, encoded_image): features -= background_value #apply the local maximum filter; all pixel of maximal value #in their neighborhood are set to 1 - filtered = maximum_filter(features, size=(4, 4)) #TODO: Abstract size - filtered = np.asarray(filtered == features, dtype=int) - np.asarray(filtered == 0, - dtype=int) + filtered = maximum_filter(features, size=(self.filter_size, self.filter_size)) #TODO: Abstract size + filtered = np.asarray(filtered == features, dtype=int) - np.asarray(filtered == 0,dtype=int) filtered.reshape(encoded_image.shape[:-1]) filtered *= POOL_SIZE # Pooling = downsampling = everything is scaled down by POOL_SIZE #2d image of the positions, and just the indices return filtered, np.transpose(np.nonzero(filtered)) - def visualize(self, images): + def visualize(self, images,show=False): '''Visualize autoencoder processing steps''' if len(images) > 20: raise Exception('Too many visualization images, please provide <= 20') logger.info('Visualizing...') + encoded_imgs = self.encode(images) + print(f'Encoded Image {encoded_imgs.shape}') position_maps = [self._extract_positions(x)[0] for x in encoded_imgs] decoded_imgs = self.predict(images) @@ -90,14 +93,14 @@ def flatten_to_img(array): plt_i = i+1 # display original axis = plt.subplot(4, n_plots, plt_i) - plt.imshow(flatten_to_img(images[i])) + plt.imshow(images[i]) plt.gray() axis.get_xaxis().set_visible(False) axis.get_yaxis().set_visible(False) # display reconstruction axis = plt.subplot(4, n_plots, plt_i + n_plots) - plt.imshow(flatten_to_img(decoded_imgs[i])) + plt.imshow(decoded_imgs[i]) plt.gray() axis.get_xaxis().set_visible(False) axis.get_yaxis().set_visible(False) @@ -117,8 +120,10 @@ def flatten_to_img(array): axis.get_xaxis().set_visible(False) axis.get_yaxis().set_visible(False) - print('\nPlot visible, close it to proceed') - plt.show() + if show: + plt.show() + #print('\nPlot visible, close it to proceed') + return plt.gcf() def get_entities(self, image): ''' @@ -128,31 +133,35 @@ def get_entities(self, image): etc. } ''' - + #print('Inside the get entities function') encoded = self.encode(image.reshape((1,) + image.shape))[0] pos_map, entities = self._extract_positions(encoded) - repr_entity_activations = [] # Representative depth slice for a certain type + #print(f'Number of identified entities: {len(entities)}') + #print(f'Number of identified entities: {entities.shape}') + #print(entities) + + typed_entities = [] # Actual Entity() array found_types = [] # TODO: Enhancements: knn classifier instead of this caveman shit for entity_coords in entities: activations = encoded[entity_coords[0], entity_coords[1], :] - if not repr_entity_activations: - repr_entity_activations.append(activations) + if not self.repr_entity_activations: + self.repr_entity_activations.append(activations) e_type = 'type0' else: - for i, e_activations in enumerate(repr_entity_activations): + for i, e_activations in enumerate(self.repr_entity_activations): dist = sqeuclidean(activations, e_activations) if dist < ENTITY_DIST_THRESHOLD: # Same type - repr_entity_activations[i] = (e_activations + activations) / 2 + self.repr_entity_activations[i] = (e_activations + activations) / 2 e_type = 'type' + str(i) break else: # No type match, make new type - repr_entity_activations.append(activations) - new_type_idx = len(repr_entity_activations) - 1 + self.repr_entity_activations.append(activations) + new_type_idx = len(self.repr_entity_activations) - 1 e_type = 'type' + str(new_type_idx) min_coords = entity_coords-self.neighbor_radius @@ -170,12 +179,12 @@ def get_entities(self, image): return typed_entities, found_types @staticmethod - def from_saved(filename, input_shape, neighbor_radius=None): + def from_saved(filename, input_shape, filter_size, neighbor_radius=None): '''Load autoencoder weights from filename, given input shape''' if neighbor_radius is not None: - ret = SymbolAutoencoder(input_shape, neighbor_radius=neighbor_radius) + ret = SymbolAutoencoder(input_shape,filter_size, neighbor_radius=neighbor_radius) else: - ret = SymbolAutoencoder(input_shape) + ret = SymbolAutoencoder(input_shape,filter_size) ret.autoencoder.load_weights(filename) return ret @@ -214,3 +223,10 @@ def disappeared(self): def _transition(self, from_type, to_type): self.last_transition = [from_type, to_type] self.entity_type = to_type + + def __repr__(self): + text = '' + text += f'Entity ID {self.id} \n' + text += f'Entitiy Type {self.entity_type} \n' + text += f'Position {self.position} \n' + return text diff --git a/components/state_builder.py b/components/state_builder.py index 748007c..0d2f99a 100644 --- a/components/state_builder.py +++ b/components/state_builder.py @@ -15,9 +15,12 @@ def __init__(self, neighbor_radius=25): self.do_not_exist = [] # entities to be removed as they no longer exist self.sim_weights = [2, 1, 1] self.neighbor_radius = neighbor_radius + self.offset = 0 def build_state(self, entities, found_types): '''Tag entities across time, build interactions''' + #print(f'Entities found:') + if not self.tracked_entities and self.next_free_entity_id == 0: # Init type transition matrix self.type_transition_matx = DataFrame(0, @@ -30,21 +33,28 @@ def build_state(self, entities, found_types): # init tracking for objects self._init_tracking(entities) + #print(f'Inititlaized Transition Matrix {self.type_transition_matx}') + #print(f'Inititliazed tracked entities {self.tracked_entities}') + #print(f'Next free entitiy ID {self.next_free_entity_id}') else: # Update type transition matrix if there are new types num_current_types = self.type_transition_matx.shape[0] for e_type in found_types: + # Wrong! if e_type not in self.type_transition_matx.index: # New, never before seen entity type, make new entry in trans matrix # make column self.type_transition_matx.insert(num_current_types, e_type, 0) + num_current_types += 1 # make row self.type_transition_matx.loc[e_type] = np.zeros(num_current_types, dtype=int) # set initial transition to 0 because assumption: objects tend to stay the same self.type_transition_matx.at[e_type, e_type] = 1 + #print(f'Updated transition matrix {self.type_transition_matx}') + # print(self.type_transition_matx) # Update tracking for objects @@ -59,6 +69,7 @@ def restart(self): self.tracked_entities = [] self.next_free_entity_id = 0 self.do_not_exist = [] + self.offset = 0 def _init_tracking(self, entities): '''Set up tags for all existing entities''' @@ -87,7 +98,7 @@ def _is_same_entity(self, old_e, new_e): similarity = self.sim_weights[0] * l_dist + \ self.sim_weights[1] * l_trans + \ self.sim_weights[2] * l_neighbors - similarity = similarity/3 + similarity = similarity/4 # print(l_dist, l_trans, l_neighbors, 'similarity:', similarity) @@ -95,7 +106,7 @@ def _is_same_entity(self, old_e, new_e): def _update_tracking(self, new_entities): '''Track entities across time, using their last state''' - + #print('We are inside the update tracking function') # if an entity is not matched with any in new entities, # place it in possibly_disappeared, and remove it if encountered # If there are any in possibly_disappeared by the time the @@ -103,16 +114,24 @@ def _update_tracking(self, new_entities): possibly_disappeared = [] newly_nonexistent = [] + + #print('We go through the tracked entities') + old_number_of_tracked_entities = len(self.tracked_entities) for i, tracked_e in enumerate(self.tracked_entities): # print(tracked_e.__dict__) if not tracked_e.exists: - print('Marked for deletion next loop', tracked_e.__dict__) - print('---') + #print('Marked for deletion next loop', tracked_e.__dict__) + #print('---') newly_nonexistent.append(i) continue + for new_e_i, new_e in enumerate(new_entities): # print('comparing', new_e.__dict__) if self._is_same_entity(tracked_e, new_e): + #print(f'We found a match for entity {tracked_e.id}') + + #print(f'The position of the new entity {new_e.position} ') + #print(f'The postion of the old entity {tracked_e.position}') # print('same entity') # Update transition matrix # (even if not transitioned, how often the type stays the same is important) @@ -126,8 +145,8 @@ def _update_tracking(self, new_entities): break else: # new entity, and/or tracked_e disappeared - print('match not found', tracked_e.__dict__) - print('---') + #print('match not found', tracked_e.__dict__) + #print('---') possibly_disappeared.append(i) for disapp_idx in possibly_disappeared: # well, they definitely disappeared @@ -139,12 +158,21 @@ def _update_tracking(self, new_entities): # that the entity disappeared self.tracked_entities[disapp_idx].disappeared() + #print(f'Self-Do-Exist: {self.do_not_exist}') + #print(f'len {len(self.tracked_entities)}') + #print(f'These are the entities to be removed {self.do_not_exist}') + #print(f'These are the tracked entities {self.tracked_entities}') self.do_not_exist.reverse() for dne_idx in self.do_not_exist: - print('DNE', dne_idx) - del self.tracked_entities[dne_idx] + #print('DNE', dne_idx) + if self.offset<0: + del self.tracked_entities[dne_idx+self.offset] + else: + del self.tracked_entities[dne_idx] self.do_not_exist = newly_nonexistent # to be removed next time + #print(f'These entities must be removed in the next timestep {self.do_not_exist} ') + #print(f'These are the current tracked entities {self.tracked_entities}') for entity_to_add in new_entities: entity_to_add.id = self.next_free_entity_id @@ -157,6 +185,10 @@ def _update_tracking(self, new_entities): # increment id for next appearing object self.next_free_entity_id += 1 + new_number_of_tracked_entities = len(self.tracked_entities) + self.offset = new_number_of_tracked_entities-old_number_of_tracked_entities + + def _build_representation(self): '''Build time-abstracted representation + object interactions''' @@ -173,8 +205,12 @@ def interaction(el_1, el_2, loc_diff, types_before, types_after): interactions_built = [] # pairs of entities for which interaction has already been built # Build interactions for entity in self.tracked_entities: + + #print([(np.abs((x.position - entity.position)),(x.prev_state['position']-entity.prev_state['position'])-(x.position-entity.position)) for x in self.tracked_entities if np.all(np.abs((x.position - entity.position)) < self.neighbor_radius*2)]) + within_radius = [x for x in self.tracked_entities - if np.all((x.position - entity.position) < self.neighbor_radius*2)] + if np.all(np.abs((x.position - entity.position)) < self.neighbor_radius*2)] + for w_r in within_radius: sorted_e = (entity, w_r) if entity.entity_type < w_r.entity_type else (w_r, entity) interact_ids = (sorted_e[0].id, sorted_e[1].id) @@ -183,8 +219,8 @@ def interaction(el_1, el_2, loc_diff, types_before, types_after): continue # position change - loc_diff = (sorted_e[0].position - sorted_e[0].prev_state['position']) - \ - (sorted_e[1].position - sorted_e[1].prev_state['position']) + loc_diff = (sorted_e[0].prev_state['position'] - sorted_e[1].prev_state['position']) - (sorted_e[0].position - sorted_e[1].position) + #print(f'The loc_diff is {loc_diff}') types_before = (sorted_e[0].prev_state['entity_type'], sorted_e[1].prev_state['entity_type']) types_after = (sorted_e[0].entity_type, sorted_e[1].entity_type) if np.array_equal(loc_diff, (0, 0)) and np.array_equal(types_before, types_after): @@ -193,7 +229,10 @@ def interaction(el_1, el_2, loc_diff, types_before, types_after): interactions.append(interaction(sorted_e[0], sorted_e[1], loc_diff, types_before, types_after)) interactions_built.append(interact_ids) - + #print(f'We now print the calculated interactions') + #for interaction in interactions: + # print(interaction) + #print(f'Number of Interactions {len(interactions)}') return interactions def _mark_transition(self, from_type, to_type): diff --git a/cross_circle_gym/envs/cross_circle_base.py b/cross_circle_gym/envs/cross_circle_base.py index c252923..380af15 100644 --- a/cross_circle_gym/envs/cross_circle_base.py +++ b/cross_circle_gym/envs/cross_circle_base.py @@ -1,15 +1,53 @@ -'''Base class for the DSRL paper toy game''' +'''Base class for the DSRL paper toy game - adapted from the Lua environment here: https://github.com/Kaixhin/rlenvs/blob/master/rlenvs/XOWorld.lua''' + + import gym from gym import spaces from gym.utils import seeding import numpy as np from matplotlib import pyplot as plt from matplotlib.colors import to_rgb -import imageio -import os from skimage.transform import resize + + +AGENT_MASK = np.expand_dims(np.array([[0, 0, 0, 1, 1, 1, 1, 0, 0, 0], + [0, 0, 0, 1, 1, 1, 1, 0, 0, 0], + [0, 0, 0, 1, 1, 1, 1, 0, 0, 0], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 1, 1, 1, 1, 0, 0, 0], + [0, 0, 0, 1, 1, 1, 1, 0, 0, 0], + [0, 0, 0, 1, 1, 1, 1, 0, 0, 0]]),axis=2) + +CROSS_MASK = np.expand_dims(np.array([[1, 1, 1, 1, 0, 0, 0, 1, 1, 1], + [0, 1, 1, 1, 1, 0, 1, 1, 1, 1], + [0, 0, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 0, 0, 1, 1, 1, 1, 1, 0, 0], + [0, 0, 0, 1, 1, 1, 1, 1, 0, 0], + [0, 0, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 0, 1, 1, 1, 1], + [1, 1, 1, 1, 0, 0, 0, 1, 1, 1], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]),axis=2) + +CIRCLE_MASK = np.expand_dims(np.array([[0, 0, 0, 1, 1, 1, 1, 1, 0, 0], + [0, 0, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 0, 0, 0, 1, 1, 1], + [1, 1, 1, 0, 0, 0, 0, 0, 1, 1], + [1, 1, 1, 0, 0, 0, 0, 0, 1, 1], + [1, 1, 1, 0, 0, 0, 0, 0, 1, 1], + [1, 1, 1, 1, 0, 0, 0, 1, 1, 1], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [0, 0, 1, 1, 1, 1, 1, 1, 1, 0]]),axis=2) + +MASKS = {'circle':CIRCLE_MASK,'cross':CROSS_MASK,'agent':AGENT_MASK} + + class Entity(object): def __init__(self, y, x, h, w, kind, center=False, z=None): if center: @@ -66,13 +104,14 @@ class CrossCircleBase(gym.Env): } def __init__( - self, field_dim=100, background_colour='white', shape_colours="white white white", - entity_size=10, min_entities=25, max_entities=50, max_overlap_factor=0.2, overlap_factor=0.25, step_size=10): + self, field_dim=84, background_colour='white', shape_colours="white white white", + entity_size=10, min_entities=16, max_entities=16, max_overlap_factor=0.0, overlap_factor=0.2, step_size=1, color_state=False): self.field_dim = field_dim self.background_colour = background_colour self.shape_colours = shape_colours self.entity_size = entity_size + self.color_state = color_state self.min_entities = min_entities self.max_entities = max_entities @@ -82,18 +121,19 @@ def __init__( self.step_size = step_size self.action_space = spaces.Discrete(4) - self.observation_space = spaces.Box(0, 1, shape=(self.field_dim, self.field_dim, 3)) + self.observation_space = spaces.Box(0, 1, shape=(self.field_dim, self.field_dim, 1)) self.reward_range = (-1, 1) self.entities = {'cross': [], 'circle': []} self.agent = None self.masks = {} + for entity_type in 'circle cross agent'.split(): - f = os.path.join(os.path.dirname(__file__), "images", "{}.png".format(entity_type)) - mask = imageio.imread(f) + mask = MASKS[entity_type] mask = resize(mask, (self.entity_size, self.entity_size), mode='edge', preserve_range=True) - self.masks[entity_type] = np.tile(mask[..., 3:], (1, 1, 3)) / 255. + self.masks[entity_type] = mask + self.background_colour = None if background_colour: @@ -117,7 +157,10 @@ def __init__( @property def combined_state(self): '''Add state layers into one array''' - image = np.zeros((self.field_dim, self.field_dim, 3)) * self.background_colour + if self.color_state: + image = np.zeros((self.field_dim, self.field_dim, 3)) * self.background_colour + else: + image = np.zeros((self.field_dim, self.field_dim, 1)) all_entities = [] for entity_type, entities in self.entities.items(): @@ -131,10 +174,12 @@ def combined_state(self): continue _alpha = self.masks[entity.kind] - if self.shape_colours is None: - _image = np.random.rand(self.entity_size, self.entity_size, 3) - else: - _image = np.tile(self.shape_colours[entity.kind], (self.entity_size, self.entity_size, 1)) + + if self.color_state: + if self.shape_colours is None: + _image = np.random.rand(self.entity_size, self.entity_size, 3) + else: + _image = np.tile(self.shape_colours[entity.kind], (self.entity_size, self.entity_size, 1)) top = int(entity.top) bottom = top + int(entity.h) @@ -142,7 +187,10 @@ def combined_state(self): left = int(entity.left) right = left + int(entity.w) - image[top:bottom, left:right, ...] = _alpha * _image + (1 - _alpha) * image[top:bottom, left:right, ...] + if self.color_state: + image[top:bottom, left:right, ...] = _alpha * _image + (1 - _alpha) * image[top:bottom, left:right, ...] + else: + image[top:bottom, left:right] = _alpha return image @@ -193,8 +241,13 @@ def layout(self, random=True, mixed=True, min_entities=None, max_entities=None, if random: sub_image_shapes = [(self.entity_size, self.entity_size) for i in range(n_entities)] + + entities = self._sample_entities(sub_image_shapes, self.max_overlap_factor) + if entities==0: + return 0 + for i, e in enumerate(entities): if mixed and i % 2 == 0: entity_type = 'cross' @@ -237,40 +290,44 @@ def _sample_entities(self, patch_shapes, max_overlap_factor=None, size_std=None) rects = [] for i in range(n_rects): - n_tries = 0 - while True: - if size_std is None: - shape_multipliers = 1. - else: - shape_multipliers = np.maximum(np.random.randn(2) * size_std + 1.0, 0.5) + try: + n_tries = 0 + while True: + if size_std is None: + shape_multipliers = 1. + else: + shape_multipliers = np.maximum(np.random.randn(2) * size_std + 1.0, 0.5) - m, n = np.ceil(shape_multipliers * patch_shapes[i, :2]).astype('i') + m, n = np.ceil(shape_multipliers * patch_shapes[i, :2]).astype('i') - rect = Entity( - np.random.randint(0, self.field_dim-m+1), - np.random.randint(0, self.field_dim-n+1), m, n, kind=None) + rect = Entity( + np.random.randint(0, self.field_dim-m+1), + np.random.randint(0, self.field_dim-n+1), m, n, kind=None) - if max_overlap_factor is None: - rects.append(rect) - break - else: - violation = False - for r in rects: - if rect.overlap_area(r) / (self.entity_size**2) > max_overlap_factor: - violation = True - break - - if not violation: + if max_overlap_factor is None: rects.append(rect) break + else: + violation = False + for r in rects: + if rect.overlap_area(r) / (self.entity_size**2) > max_overlap_factor: + violation = True + break + + if not violation: + rects.append(rect) + break + + n_tries += 1 - n_tries += 1 + if n_tries > 10000: + raise Exception( + "Could not fit rectangles. " + "(n_rects: {}, field_dim: {}, max_overlap_factor: {})".format( + n_rects, self.field_dim, max_overlap_factor)) + except: + return 0 - if n_tries > 10000: - raise Exception( - "Could not fit rectangles. " - "(n_rects: {}, field_dim: {}, max_overlap_factor: {})".format( - n_rects, self.field_dim, max_overlap_factor)) return rects diff --git a/cross_circle_gym/envs/cross_circle_mixed_rand.py b/cross_circle_gym/envs/cross_circle_mixed_rand.py index 1135939..5544573 100644 --- a/cross_circle_gym/envs/cross_circle_mixed_rand.py +++ b/cross_circle_gym/envs/cross_circle_mixed_rand.py @@ -16,9 +16,12 @@ def make_random_state(self, min_entities=1, max_entities=30): 'cross': np.zeros((self.field_dim, self.field_dim)), 'agent': np.zeros((self.field_dim, self.field_dim)) } - self.layout(random=True, + error = self.layout(random=True, mixed=True, min_entities=min_entities, max_entities=max_entities, random_agent=True) + if error==0: + return [] + return self.combined_state diff --git a/main.py b/main.py index 16d87be..032b55c 100644 --- a/main.py +++ b/main.py @@ -1,25 +1,29 @@ '''Main module for the paper's algorithm''' -#pylint:disable=C0103,R0913 + import argparse -import os.path -import pickle +import os + +from collections import deque +from datetime import datetime + + import numpy as np +import tensorflow as tf +import tqdm -import gym from gym import logger -from sklearn.model_selection import train_test_split -#pylint:disable=W0611 import cross_circle_gym -#pylint:enable=W0611 -from components.autoencoder import SymbolAutoencoder + from components.state_builder import StateRepresentationBuilder -from components.agent import TabularAgent #, DDQNAgent +from components.agent import TabularAgent +from utils import prepare_training +# Experiment Parameters parser = argparse.ArgumentParser(description=None) -parser.add_argument('env_id', nargs='?', default='CrossCircle-MixedRand-v0', - help='Select the environment to run') +parser.add_argument('--experiment_name', type=str, default='default', help='Name of the experiment') parser.add_argument('--load', type=str, help='load existing model from filename provided') +parser.add_argument('--image_dir', type=str, help='laod images from directory provided') parser.add_argument('--episodes', '-e', type=int, default=1000, help='number of DQN training episodes') parser.add_argument('--load-train', action='store_true', @@ -29,109 +33,131 @@ help='activate own improvements over original paper') parser.add_argument('--visualize', '--vis', action='store_true', help='plot autoencoder input & output') -parser.add_argument('--save', type=str, help='save model to filename provided') - -args = parser.parse_args() - -TRAIN_IMAGES_FILE = 'train_images.pkl' -NEIGHBOR_RADIUS = 25 # 1/2 side of square in which to search for neighbors - -# You can set the level to logger.DEBUG or logger.WARN if you -# want to change the amount of output. -logger.setLevel(logger.INFO) - - - -env = gym.make(args.env_id) -seed = env.seed(1)[0] +parser.add_argument('--save', type=str, help='save model to directory provided') +parser.add_argument('--logdir',type=str,default='./logs', help='Log directory') +parser.add_argument('--log_level',type=str,default='warn',help='Detail of logging output') +parser.add_argument('--evaluation_frequency', type=int, default=100, + help='How often to evaluate the agent') +parser.add_argument('--tensorboard', action='store_true', default=False, + help='Switch on tensorboard for the autoencoder training') +parser.add_argument('--play', action='store_true', default=False, + help='Choose the agents action for 20 timesteps to see what the autoencoder does') + +# Environment +parser.add_argument('--random', action='store_true', default=False, + help='Should the position of the entities be random') +parser.add_argument('--double', action='store_true', default=False, + help='Only negative objects (circles) or also positive ones (cross)') +parser.add_argument('--n_entities', type=int, default=16, + help='Number of entities in the environment') +parser.add_argument('--entity_size', type=int, default=10, help='Size of the entities') +parser.add_argument('--neighborhood_size', type=int, default=10, + help='Size of the neighborhood') +parser.add_argument('--step_size', type=float, default=1.0, help='Step-Size') +parser.add_argument('--overlap_factor', type=float, default=0.01, + help='How much must an gent overlap with an entitiy to collect it') +parser.add_argument('--colour_state', action='store_true', default=False, + help='Whether to use the colour image as a state or a one-channel black and white image') + +# Training parameters +parser.add_argument('--alpha', type=float, default=0.01, help='Learning Rate') +parser.add_argument('--epsilon_decay', type=float, default=0.99995, + help='Decay rate of epsilon') +parser.add_argument('--timesteps', type=int, default=100, help='Length of a training episode') + +# Autoencdoer +parser.add_argument('--filter_size', default=10, type=int, help='Size of the filter') +args = parser.parse_args() -def make_autoencoder_train_data(num, min_entities=1, max_entities=30): - '''Make training images for the autoencoder''' - temp_env = gym.make('CrossCircle-MixedRand-v0') - temp_env.seed(0) - states = [] - for _ in range(num): - states.append(temp_env.make_random_state(min_entities, max_entities)) - return np.asarray(states) +now = datetime.now().strftime("%d_%m_%Y_%H_%M_%S") +args.logdir = os.path.join(args.logdir,args.experiment_name,now) -if not os.path.exists(TRAIN_IMAGES_FILE) or args.new_images: - logger.info('Making test images...') - images = make_autoencoder_train_data(5000, max_entities=30) - with open(TRAIN_IMAGES_FILE, 'wb') as f: - pickle.dump(images, f) +# Choose environment +if args.random and args.double: + env_id = 'CrossCircle-MixedRand-v0' +elif args.random and not args.double: + env_id = 'CrossCircle-NegRand-v0' +elif not args.random and args.double: + env_id = 'CrossCircle-MixedGrid-v0' else: - logger.info('Loading test images...') - with open(TRAIN_IMAGES_FILE, 'rb') as f: - images = pickle.load(f) - -#input_shape = images[0].shape + (1,) -input_shape = images[0].shape -if args.load: - autoencoder = SymbolAutoencoder.from_saved(args.load, - images[0].shape, - neighbor_radius=NEIGHBOR_RADIUS) + env_id = 'CrossCircle-NegGrid-v0' +args.env_id = env_id + +# Set logger +if args.log_level=='warn': + logger.setLevel(logger.WARN) +elif args.log_level=='info': + logger.setLevel(logger.INFO) else: - autoencoder = SymbolAutoencoder(images[0].shape, neighbor_radius=NEIGHBOR_RADIUS) + raise NotImplementedError('Log-level not implemented') +args.logger = logger -if args.load_train or args.visualize or not args.load: - logger.info('Splitting sets...') - X_train, X_test = train_test_split(images, test_size=0.2, random_state=seed) - X_train, X_val = train_test_split(X_train, test_size=0.2, random_state=seed) - - if args.load_train or not args.load: - logger.info('Training...') - autoencoder.train(X_train, epochs=10, validation=X_val) - - if args.visualize: - #Visualize autoencoder - vis_imgs = X_test[:10] - autoencoder.visualize(vis_imgs) - -if args.save: - autoencoder.save_weights(args.save) +autoencoder,env = prepare_training(args) +state_builder = StateRepresentationBuilder(neighbor_radius=args.neighborhood_size) +action_size = env.action_space.n +agent = TabularAgent(action_size,args.alpha,args.epsilon_decay,args.neighborhood_size) -# entities, found_types = autoencoder.get_entities(X_test[0]) +done = False +time_steps = args.timesteps -state_builder = StateRepresentationBuilder() -# state = state_builder.build_state(entities, found_types) -# print(state) +number_of_evaluations = 0 +buffered_rewards = deque(maxlen=200) -# state_size = None # TODO -action_size = env.action_space.n -# if args.enhancements: -# agent = DDQNAgent(state_size, action_size) -# else: -agent = TabularAgent(action_size) -# # agent.load('./save/cartpole-ddqn.h5') -done = False -batch_size = 32 -time_steps = 100 +summary_writer = tf.summary.create_file_writer(args.logdir) -for e in range(args.episodes): +for e in tqdm.tqdm(range(args.episodes)): state_builder.restart() state = env.reset() - state = np.reshape(state, input_shape) state = state_builder.build_state(*autoencoder.get_entities(state)) - for time in range(time_steps): - env.render(wait=1) + total_reward = 0 + + for t in range(time_steps): action = agent.act(state) next_state, reward, done, _ = env.step(action) - next_state = np.reshape(next_state, input_shape) + total_reward += reward next_state = state_builder.build_state(*autoencoder.get_entities(next_state)) - # next_state = np.reshape(next_state, [1, state_size]) agent.update(state, action, reward, next_state, done) state = next_state if done: break - # if args.enhancements: - # agent.update_target_model() - print('episode: {}/{}, e: {:.2}' - .format(e, args.episodes, agent.epsilon)) - - # if len(agent.memory) > batch_size: - # agent.replay(batch_size) - if e % 10 == 0: - agent.save('tab_agent.h5') + + buffered_rewards.append(total_reward) + + with summary_writer.as_default(): + tf.summary.scalar('Averaged Reward',np.mean(buffered_rewards),e) + tf.summary.scalar('Epsilon',agent.epsilon,e) + + + if e % args.evaluation_frequency == 0: + number_of_evaluations += 1 + agent.save(os.path.join(args.logdir,'tab_agent.h5')) + evaluation_reward = [] + with summary_writer.as_default(): + for i in range(10): + done = False + state_builder.restart() + image = env.reset() + state = state_builder.build_state(*autoencoder.get_entities(image)) + total_reward = 0 + for t in range(time_steps): + action = agent.act(state,random_act=False) + next_image, reward, done, _ = env.step(action) + if i==0: + tf.summary.image(f'Agent Behaviour {number_of_evaluations}',np.reshape(image,(1,)+image.shape),t) + total_reward += reward + next_state = state_builder.build_state(*autoencoder.get_entities(next_image)) + state = next_state + image = next_image + evaluation_reward.append(total_reward) + + tf.summary.scalar('Evaluation Reward',np.mean(evaluation_reward),number_of_evaluations) + + + + + + + diff --git a/scripts/hyperparameter.sh b/scripts/hyperparameter.sh new file mode 100644 index 0000000..7d7b33c --- /dev/null +++ b/scripts/hyperparameter.sh @@ -0,0 +1,52 @@ +#! /bin/bash + +# Script to perform Hyperparameter Search + +load='../autoencoder_models/gray_10_model.h5' # If pretrained autoencoder exist here is the file-path of the model +image_dir='../' +logdir='../logs' +log_level='info' # info, warn + +evaluation_frequency=50 + + +# Environment +n_entities=16 +entity_size=10 +neighborhood_size=10 +step_size=1.0 +overlap_factor=0.01 + +# Training parameters +epsilon_decay=0.99999 + + +# Autoencdoer +filter_size=7 + +for alpha in 0.1 0.01 0.001 +do + for neighborhood_size in 10 20 50 + do + for step_size in 1 2 5 10 + experiment_name="Alpha_{$alpha}_neighborhood_size_{$neighborhood_size}_Step_{$step_size}" + echo "Experiment {$experiment_name} starts" + python ../main.py --experiment_name $experiment_name \ + --load $load \ + --logdir $logdir \ + --image_dir $image_dir \ + --log_level $log_level \ + --evaluation_frequency $evaluation_frequency \ + --n_entities $n_entities \ + --entity_size $entity_size \ + --neighborhood_size $neighborhood_size \ + --step_size $step_size \ + --overlap_factor $overlap_factor \ + --alpha $alpha \ + --epsilon_decay $epsilon_decay \ + --filter_size $filter_size + done + done +done + + diff --git a/scripts/training.sh b/scripts/training.sh new file mode 100644 index 0000000..2cc3c81 --- /dev/null +++ b/scripts/training.sh @@ -0,0 +1,43 @@ +#! /bin/bash + +experiment_name='default' + +load='../autoencoder_models/gray_10_model.h5' # If pretrained autoencoder exist here is the file-path of the model +image_dir='../' +logdir='../logs' +log_level='info' # info, warn + +evaluation_frequency=50 + + +# Environment +n_entities=16 +entity_size=10 +neighborhood_size=10 +step_size=2.0 +overlap_factor=0.01 + +# Training parameters +alpha=0.01 +epsilon_decay=0.99999 + + +# Autoencdoer +filter_size=7 + +python ../main.py --experiment_name $experiment_name \ + --load $load \ + --logdir $logdir \ + --image_dir $image_dir \ + --log_level $log_level \ + --evaluation_frequency $evaluation_frequency \ + --n_entities $n_entities \ + --entity_size $entity_size \ + --neighborhood_size $neighborhood_size \ + --step_size $step_size \ + --overlap_factor $overlap_factor \ + --alpha $alpha \ + --epsilon_decay $epsilon_decay \ + --filter_size $filter_size\ + + diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..91a3fce --- /dev/null +++ b/utils.py @@ -0,0 +1,116 @@ +import os + +import gym +import numpy as np +import pickle +from sklearn.model_selection import train_test_split + +from components.autoencoder import SymbolAutoencoder + +def make_autoencoder_train_data(env_parameters, num, args, min_entities=1, max_entities=30): + ''' + Make training images for the autoencoder + + env_parameters: (dict) dictionary that specifies the properties of the environment + num: (int) number of samples the data should consist of + min_entities, max_entities: (int) min/max number of entities that can appear \ + in a single environment frame + + return: (np.array) BxWxHxC dataset of environment images + ''' + + temp_env = gym.make('CrossCircle-MixedRand-v0',**env_parameters) + temp_env.seed(0) + states = [] + for i in range(num): + state = temp_env.make_random_state(min_entities, max_entities) + if len(state)==0: + continue + states.append(state) + args.logger.info(f'Final number of states collected in the current configuration {len(states)}') + + if (len(states)/num)<0.8: + raise Exception('With the current environment configuration entities do /' + 'not fit onto the grid without overlapping too much') + return np.asarray(states) + +def prepare_training(args): + ''' + (1) Creates environment + (2) Checks whether training images for the autoencoder exist, if not creates them + (3) Creates the autoencoder + (4) Trains or loads the weights of the autoencoder + + return: trained autoencoder, environment + ''' + + # Create the environment + env_parameters = {'entity_size': args.entity_size, + 'min_entities': args.n_entities, + 'max_entities': args.n_entities, + 'step_size': args.step_size, + 'overlap_factor': args.overlap_factor} + env = gym.make(args.env_id, **env_parameters) + seed = env.seed(1)[0] + + # Load or create images + if args.colour_state: + GRAY = 'colour' + else: + GRAY = 'gray' + + TRAIN_IMAGES_FILE = f'train_images_{GRAY}.pkl' + print(os.path.join(args.image_dir,TRAIN_IMAGES_FILE)) + if not os.path.exists(os.path.join(args.image_dir,TRAIN_IMAGES_FILE)) or args.new_images: + args.logger.info('Making test images...') + images = make_autoencoder_train_data(env_parameters, 5000, args, max_entities=20) + with open(os.path.join(args.image_dir,TRAIN_IMAGES_FILE), 'wb') as f: + pickle.dump(images, f) + else: + args.logger.info('Loading test images...') + with open(os.path.join(args.image_dir,TRAIN_IMAGES_FILE), 'rb') as f: + images = pickle.load(f) + + # Create the autoencoder + input_shape = images[0].shape + if args.load: + autoencoder = SymbolAutoencoder.from_saved(args.load, + input_shape, + args.filter_size, + neighbor_radius=args.neighborhood_size) + else: + autoencoder = SymbolAutoencoder(input_shape, args.filter_size, neighbor_radius=args.neighborhood_size) + + + # Train or load autoencoder + if args.load_train or args.visualize or not args.load: + args.logger.info('Splitting sets...') + X_train, X_test = train_test_split(images, test_size=0.2, random_state=seed) + X_train, X_val = train_test_split(X_train, test_size=0.2, random_state=seed) + + if args.load_train or not args.load: + args.logger.info('Training...') + autoencoder.train(X_train, epochs=10, validation=X_val,tensorboard=args.tensorboard) + + if args.visualize: + # Visualize autoencoder + vis_imgs = X_test[:10] + autoencoder.visualize(vis_imgs) + + if args.save: + autoencoder.save_weights(os.path.join(args.save, f'{GRAY}_{args.entity_size}_model.h5')) + + + # Visualize the results of the autoencoder + if args.play: + # Visualize your own moves for 10 steps + state = env.reset() + for i in range(20): + state = np.reshape(state, (1,) + input_shape) + autoencoder.visualize(state,show=True) + action = int(input('Next action: ')) + state, reward, _, _ = env.step(action) + print(f'The overall reward is {reward}') + + return autoencoder, env +