modified training step display

2023-06-24 23:05:45 +09:00
parent f3fe3a872e
commit 34a44916f7
2 changed files with 22 additions and 16 deletions
--- a/envs/memorymaze.py
+++ b/envs/memorymaze.py
@@ -32,13 +32,16 @@ class MemoryMaze:
            spaces = self._env.observation_space.spaces.copy()
        else:
            spaces = {self._obs_key: self._env.observation_space}
-        return gym.spaces.Dict({
-            **spaces,
-            "reward": gym.spaces.Box(-np.inf, np.inf, (), dtype=np.float32),
-            "is_first": gym.spaces.Box(0, 1, (), dtype=bool),
-            "is_last": gym.spaces.Box(0, 1, (), dtype=bool),
-            "is_terminal": gym.spaces.Box(0, 1, (), dtype=bool),
-        })
+        return gym.spaces.Dict(
+            {
+                **spaces,
+                "reward": gym.spaces.Box(-np.inf, np.inf, (), dtype=np.float32),
+                "is_first": gym.spaces.Box(0, 1, (), dtype=bool),
+                "is_last": gym.spaces.Box(0, 1, (), dtype=bool),
+                "is_terminal": gym.spaces.Box(0, 1, (), dtype=bool),
+            }
+        )
+
    @property
    def action_space(self):
        space = self._env.action_space
@@ -49,7 +52,7 @@ class MemoryMaze:
        obs, reward, done, info = self._env.step(action)
        if not self._obs_is_dict:
            obs = {self._obs_key: obs}
-        obs['reward'] = reward
+        obs["reward"] = reward
        obs["is_first"] = False
        obs["is_last"] = done
        obs["is_terminal"] = info.get("is_terminal", False)