Fix missing advantage computation when reward_EMA is disabled
This PR fixes an issue where reward_EMA=False caused adv to be undefined in _compute_actor_loss. Previously, adv was only computed inside the reward_EMA branch, which resulted in a runtime error when the option was disabled.
This commit is contained in:
@@ -409,6 +409,8 @@ class ImagBehavior(nn.Module):
|
|||||||
metrics.update(tools.tensorstats(normed_target, "normed_target"))
|
metrics.update(tools.tensorstats(normed_target, "normed_target"))
|
||||||
metrics["EMA_005"] = to_np(self.ema_vals[0])
|
metrics["EMA_005"] = to_np(self.ema_vals[0])
|
||||||
metrics["EMA_095"] = to_np(self.ema_vals[1])
|
metrics["EMA_095"] = to_np(self.ema_vals[1])
|
||||||
|
else:
|
||||||
|
adv = target - base
|
||||||
|
|
||||||
if self._config.imag_gradient == "dynamics":
|
if self._config.imag_gradient == "dynamics":
|
||||||
actor_target = adv
|
actor_target = adv
|
||||||
|
|||||||
Reference in New Issue
Block a user