merged action head into MLP and modified configs

2024-01-05 10:26:48 +09:00
parent e0f2017e28
commit e0487f8206
5 changed files with 133 additions and 231 deletions
--- a/configs.yaml
+++ b/configs.yaml
@@ -47,26 +47,25 @@ defaults:
  dyn_temp_post: True
  grad_heads: ['decoder', 'reward', 'cont']
  units: 512
-  reward_layers: 2
-  cont_layers: 2
-  value_layers: 2
-  actor_layers: 2
  act: 'SiLU'
  norm: 'LayerNorm'
  encoder:
-    {mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: 'LayerNorm', cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 2, mlp_units: 512, symlog_inputs: True}
+    {mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 2, mlp_units: 512, symlog_inputs: True}
  decoder:
-    {mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: 'LayerNorm', cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 2, mlp_units: 512, cnn_sigmoid: False, image_dist: mse, vector_dist: symlog_mse}
-  value_head: 'symlog_disc'
-  reward_head: 'symlog_disc'
+    {mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 2, mlp_units: 512, cnn_sigmoid: False, image_dist: mse, vector_dist: symlog_mse, outscale: 1.0}
+  actor:
+    {layers: 2, dist: 'normal', entropy: 3e-4, unimix_ratio: 0.01, min_std: 0.1, max_std: 1.0, temp: 0.1, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 1.0}
+  critic:
+    {layers: 2, dist: 'symlog_disc', slow_target: True, slow_target_update: 1, slow_target_fraction: 0.02, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 0.0}
+  reward_head:
+    {layers: 2, dist: 'symlog_disc', scale: 1.0, outscale: 0.0}
+  cont_head:
+    {layers: 2, scale: 1.0, outscale: 1.0}
  dyn_scale: 0.5
  rep_scale: 0.1
  kl_free: 1.0
-  cont_scale: 1.0
-  reward_scale: 1.0
  weight_decay: 0.0
  unimix_ratio: 0.01
-  action_unimix_ratio: 0.01
  initial: 'learned'

  # Training
@@ -77,15 +76,7 @@ defaults:
  model_lr: 1e-4
  opt_eps: 1e-8
  grad_clip: 1000
-  value_lr: 3e-5
-  actor_lr: 3e-5
-  ac_opt_eps: 1e-5
-  value_grad_clip: 100
-  actor_grad_clip: 100
  dataset_size: 1000000
-  slow_value_target: True
-  slow_target_update: 1
-  slow_target_fraction: 0.02
  opt: 'adam'

  # Behavior.
@@ -95,18 +86,10 @@ defaults:
  imag_gradient: 'dynamics'
  imag_gradient_mix: 0.0
  imag_sample: True
-  actor_dist: 'normal'
-  actor_entropy: 3e-4
-  actor_state_entropy: 0.0
-  actor_init_std: 1.0
-  actor_min_std: 0.1
-  actor_max_std: 1.0
-  actor_temp: 0.1
-  expl_amount: 0.0
+  expl_amount: 0
  eval_state_mean: False
  collect_dyn_sample: True
  behavior_stop_grad: True
-  value_decay: 0.0
  future_entropy: False

  # Exploration
@@ -150,13 +133,12 @@ crafter:
  dyn_hidden: 1024
  dyn_deter: 4096
  units: 1024
-  reward_layers: 5
-  cont_layers: 5
-  value_layers: 5
-  actor_layers: 5
  encoder: {mlp_keys: '$^', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
  decoder: {mlp_keys: '$^', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
-  actor_dist: 'onehot'
+  actor: {layers: 5, dist: 'onehot'}
+  value: {layers: 5}
+  reward_head: {layers: 5}
+  cont_head: {layers: 5}
  imag_gradient: 'reinforce'

 atari100k:
@@ -166,7 +148,7 @@ atari100k:
  train_ratio: 1024
  video_pred_log: true
  eval_episode_num: 100
-  actor_dist: 'onehot'
+  actor: {dist: 'onehot'}
  imag_gradient: 'reinforce'
  stickey: False
  lives: unused
@@ -189,13 +171,12 @@ minecraft:
  dyn_hidden: 1024
  dyn_deter: 4096
  units: 1024
-  reward_layers: 5
-  cont_layers: 5
-  value_layers: 5
-  actor_layers: 5
-  encoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath|reward', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
+  encoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath|obs_reward', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
  decoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
-  actor_dist: 'onehot'
+  actor: {layers: 5, dist: 'onehot'}
+  value: {layers: 5}
+  reward_head: {layers: 5}
+  cont_head: {layers: 5}
  imag_gradient: 'reinforce'
  break_speed: 100.0
  time_limit: 36000
@@ -203,7 +184,7 @@ minecraft:
 memorymaze:
  steps: 1e8
  action_repeat: 2
-  actor_dist: 'onehot'
+  actor: {dist: 'onehot'}
  imag_gradient: 'reinforce'
  task: 'memorymaze_9x9'