merged action head into MLP and modified configs
This commit is contained in:
63
configs.yaml
63
configs.yaml
@@ -47,26 +47,25 @@ defaults:
|
||||
dyn_temp_post: True
|
||||
grad_heads: ['decoder', 'reward', 'cont']
|
||||
units: 512
|
||||
reward_layers: 2
|
||||
cont_layers: 2
|
||||
value_layers: 2
|
||||
actor_layers: 2
|
||||
act: 'SiLU'
|
||||
norm: 'LayerNorm'
|
||||
encoder:
|
||||
{mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: 'LayerNorm', cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 2, mlp_units: 512, symlog_inputs: True}
|
||||
{mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 2, mlp_units: 512, symlog_inputs: True}
|
||||
decoder:
|
||||
{mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: 'LayerNorm', cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 2, mlp_units: 512, cnn_sigmoid: False, image_dist: mse, vector_dist: symlog_mse}
|
||||
value_head: 'symlog_disc'
|
||||
reward_head: 'symlog_disc'
|
||||
{mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 2, mlp_units: 512, cnn_sigmoid: False, image_dist: mse, vector_dist: symlog_mse, outscale: 1.0}
|
||||
actor:
|
||||
{layers: 2, dist: 'normal', entropy: 3e-4, unimix_ratio: 0.01, min_std: 0.1, max_std: 1.0, temp: 0.1, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 1.0}
|
||||
critic:
|
||||
{layers: 2, dist: 'symlog_disc', slow_target: True, slow_target_update: 1, slow_target_fraction: 0.02, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 0.0}
|
||||
reward_head:
|
||||
{layers: 2, dist: 'symlog_disc', scale: 1.0, outscale: 0.0}
|
||||
cont_head:
|
||||
{layers: 2, scale: 1.0, outscale: 1.0}
|
||||
dyn_scale: 0.5
|
||||
rep_scale: 0.1
|
||||
kl_free: 1.0
|
||||
cont_scale: 1.0
|
||||
reward_scale: 1.0
|
||||
weight_decay: 0.0
|
||||
unimix_ratio: 0.01
|
||||
action_unimix_ratio: 0.01
|
||||
initial: 'learned'
|
||||
|
||||
# Training
|
||||
@@ -77,15 +76,7 @@ defaults:
|
||||
model_lr: 1e-4
|
||||
opt_eps: 1e-8
|
||||
grad_clip: 1000
|
||||
value_lr: 3e-5
|
||||
actor_lr: 3e-5
|
||||
ac_opt_eps: 1e-5
|
||||
value_grad_clip: 100
|
||||
actor_grad_clip: 100
|
||||
dataset_size: 1000000
|
||||
slow_value_target: True
|
||||
slow_target_update: 1
|
||||
slow_target_fraction: 0.02
|
||||
opt: 'adam'
|
||||
|
||||
# Behavior.
|
||||
@@ -95,18 +86,10 @@ defaults:
|
||||
imag_gradient: 'dynamics'
|
||||
imag_gradient_mix: 0.0
|
||||
imag_sample: True
|
||||
actor_dist: 'normal'
|
||||
actor_entropy: 3e-4
|
||||
actor_state_entropy: 0.0
|
||||
actor_init_std: 1.0
|
||||
actor_min_std: 0.1
|
||||
actor_max_std: 1.0
|
||||
actor_temp: 0.1
|
||||
expl_amount: 0.0
|
||||
expl_amount: 0
|
||||
eval_state_mean: False
|
||||
collect_dyn_sample: True
|
||||
behavior_stop_grad: True
|
||||
value_decay: 0.0
|
||||
future_entropy: False
|
||||
|
||||
# Exploration
|
||||
@@ -150,13 +133,12 @@ crafter:
|
||||
dyn_hidden: 1024
|
||||
dyn_deter: 4096
|
||||
units: 1024
|
||||
reward_layers: 5
|
||||
cont_layers: 5
|
||||
value_layers: 5
|
||||
actor_layers: 5
|
||||
encoder: {mlp_keys: '$^', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
|
||||
decoder: {mlp_keys: '$^', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
|
||||
actor_dist: 'onehot'
|
||||
actor: {layers: 5, dist: 'onehot'}
|
||||
value: {layers: 5}
|
||||
reward_head: {layers: 5}
|
||||
cont_head: {layers: 5}
|
||||
imag_gradient: 'reinforce'
|
||||
|
||||
atari100k:
|
||||
@@ -166,7 +148,7 @@ atari100k:
|
||||
train_ratio: 1024
|
||||
video_pred_log: true
|
||||
eval_episode_num: 100
|
||||
actor_dist: 'onehot'
|
||||
actor: {dist: 'onehot'}
|
||||
imag_gradient: 'reinforce'
|
||||
stickey: False
|
||||
lives: unused
|
||||
@@ -189,13 +171,12 @@ minecraft:
|
||||
dyn_hidden: 1024
|
||||
dyn_deter: 4096
|
||||
units: 1024
|
||||
reward_layers: 5
|
||||
cont_layers: 5
|
||||
value_layers: 5
|
||||
actor_layers: 5
|
||||
encoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath|reward', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
|
||||
encoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath|obs_reward', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
|
||||
decoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
|
||||
actor_dist: 'onehot'
|
||||
actor: {layers: 5, dist: 'onehot'}
|
||||
value: {layers: 5}
|
||||
reward_head: {layers: 5}
|
||||
cont_head: {layers: 5}
|
||||
imag_gradient: 'reinforce'
|
||||
break_speed: 100.0
|
||||
time_limit: 36000
|
||||
@@ -203,7 +184,7 @@ minecraft:
|
||||
memorymaze:
|
||||
steps: 1e8
|
||||
action_repeat: 2
|
||||
actor_dist: 'onehot'
|
||||
actor: {dist: 'onehot'}
|
||||
imag_gradient: 'reinforce'
|
||||
task: 'memorymaze_9x9'
|
||||
|
||||
|
||||
Reference in New Issue
Block a user