modified training step display

This commit is contained in:
NM512
2023-06-24 23:05:45 +09:00
parent f3fe3a872e
commit 34a44916f7
2 changed files with 22 additions and 16 deletions

View File

@@ -32,13 +32,16 @@ class MemoryMaze:
spaces = self._env.observation_space.spaces.copy()
else:
spaces = {self._obs_key: self._env.observation_space}
return gym.spaces.Dict({
**spaces,
"reward": gym.spaces.Box(-np.inf, np.inf, (), dtype=np.float32),
"is_first": gym.spaces.Box(0, 1, (), dtype=bool),
"is_last": gym.spaces.Box(0, 1, (), dtype=bool),
"is_terminal": gym.spaces.Box(0, 1, (), dtype=bool),
})
return gym.spaces.Dict(
{
**spaces,
"reward": gym.spaces.Box(-np.inf, np.inf, (), dtype=np.float32),
"is_first": gym.spaces.Box(0, 1, (), dtype=bool),
"is_last": gym.spaces.Box(0, 1, (), dtype=bool),
"is_terminal": gym.spaces.Box(0, 1, (), dtype=bool),
}
)
@property
def action_space(self):
space = self._env.action_space
@@ -49,7 +52,7 @@ class MemoryMaze:
obs, reward, done, info = self._env.step(action)
if not self._obs_is_dict:
obs = {self._obs_key: obs}
obs['reward'] = reward
obs["reward"] = reward
obs["is_first"] = False
obs["is_last"] = done
obs["is_terminal"] = info.get("is_terminal", False)