From 43e1b2ab889094e5bf4f1939a279cbb81486a90e Mon Sep 17 00:00:00 2001 From: NM512 Date: Mon, 24 Jul 2023 22:26:21 +0900 Subject: [PATCH] fix bug when resetting envs at different time --- tools.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools.py b/tools.py index c13e438..fb34c60 100644 --- a/tools.py +++ b/tools.py @@ -150,15 +150,15 @@ def simulate( indices = [index for index, d in enumerate(done) if d] results = [envs[i].reset() for i in indices] results = [r() for r in results] - for i in indices: - t = results[i].copy() + for index, result in zip(indices, results): + t = result.copy() t = {k: convert(v) for k, v in t.items()} # action will be added to transition in add_to_cache t["reward"] = 0.0 t["discount"] = 1.0 # initial state should be added to cache - add_to_cache(cache, envs[i].id, t) - for index, result in zip(indices, results): + add_to_cache(cache, envs[index].id, t) + # replace obs with done by initial state obs[index] = result # step agents obs = {k: np.stack([o[k] for o in obs]) for k in obs[0]}