From 4076cd519c88a94315dc2206b5c86c62eec81952 Mon Sep 17 00:00:00 2001 From: Chao Gao Date: Thu, 22 Oct 2020 12:42:25 -0600 Subject: [PATCH] inconsistency fix for stochastic deep sea --- bsuite/environments/deep_sea.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bsuite/environments/deep_sea.py b/bsuite/environments/deep_sea.py index 5d7ff7d7..634f787d 100644 --- a/bsuite/environments/deep_sea.py +++ b/bsuite/environments/deep_sea.py @@ -118,11 +118,12 @@ def _step(self, action: int) -> dm_env.TimeStep: action_right = action == self._action_mapping[self._row, self._column] # Reward calculation - if self._column == self._size - 1 and action_right: + if self._column == self._size - 1 and action_right \ + and (self._rng.rand() > 1 / self._size or self._deterministic): reward += 1. self._denoised_return += 1. if not self._deterministic: # Noisy rewards on the 'end' of chain. - if self._row == self._size - 1 and self._column in [0, self._size - 1]: + if self._row == self._size - 1 and 0 <= self._column <= self._size - 1: reward += self._rng.randn() # Transition dynamics