modified training step display
This commit is contained in:
@@ -32,13 +32,16 @@ class MemoryMaze:
|
||||
spaces = self._env.observation_space.spaces.copy()
|
||||
else:
|
||||
spaces = {self._obs_key: self._env.observation_space}
|
||||
return gym.spaces.Dict({
|
||||
**spaces,
|
||||
"reward": gym.spaces.Box(-np.inf, np.inf, (), dtype=np.float32),
|
||||
"is_first": gym.spaces.Box(0, 1, (), dtype=bool),
|
||||
"is_last": gym.spaces.Box(0, 1, (), dtype=bool),
|
||||
"is_terminal": gym.spaces.Box(0, 1, (), dtype=bool),
|
||||
})
|
||||
return gym.spaces.Dict(
|
||||
{
|
||||
**spaces,
|
||||
"reward": gym.spaces.Box(-np.inf, np.inf, (), dtype=np.float32),
|
||||
"is_first": gym.spaces.Box(0, 1, (), dtype=bool),
|
||||
"is_last": gym.spaces.Box(0, 1, (), dtype=bool),
|
||||
"is_terminal": gym.spaces.Box(0, 1, (), dtype=bool),
|
||||
}
|
||||
)
|
||||
|
||||
@property
|
||||
def action_space(self):
|
||||
space = self._env.action_space
|
||||
@@ -49,7 +52,7 @@ class MemoryMaze:
|
||||
obs, reward, done, info = self._env.step(action)
|
||||
if not self._obs_is_dict:
|
||||
obs = {self._obs_key: obs}
|
||||
obs['reward'] = reward
|
||||
obs["reward"] = reward
|
||||
obs["is_first"] = False
|
||||
obs["is_last"] = done
|
||||
obs["is_terminal"] = info.get("is_terminal", False)
|
||||
|
||||
Reference in New Issue
Block a user