commit b67b21c5c638b48b1351864572ee834127717e98
Author: Nicklas Hansen <hello@nicklashansen.com>
Date:   Wed Oct 25 18:26:00 2023 -0700

    first commit

diff --git a/.gitignore b/.gitignore
new file mode 100755
index 0000000..313b476
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,125 @@
+# logging
+tdmpc2/logs/
+tdmpc2/outputs/
+tdmpc2/wandb/
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..ef47324
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,20 @@
+# Contributing to TD-MPC2
+We want to make contributing to this repository as easy and transparent as
+possible.
+
+## Pull requests
+We actively welcome your pull requests.
+
+1. Fork the repo and create your branch from `main`.
+2. If you have added code that should be tested, add tests.
+3. If you have changed APIs, update the documentation.
+4. Make sure your code lints.
+5. Issue that pull request!
+
+## Issues
+We use GitHub issues to track public bugs. Please ensure your description is
+clear and has sufficient instructions to be able to reproduce the issue.
+
+## License
+By contributing to TD-MPC2, you agree that your contributions will be licensed
+under the `LICENSE` file in the root directory of this source tree.
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..bc27ced
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) Nicklas Hansen (2023).
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100755
index 0000000..432d720
--- /dev/null
+++ b/README.md
@@ -0,0 +1,136 @@
+<h1>TD-MPC2</span></h1>
+
+Official implementation of
+
+[TD-MPC2: Scalable, Robust World Models for Continuous Control](https://nicklashansen.github.io/td-mpc2) by
+
+[Nicklas Hansen](https://nicklashansen.github.io/), [Hao Su](https://cseweb.ucsd.edu/~haosu/)\*, [Xiaolong Wang](https://xiaolonw.github.io/)\* (UC San Diego)</br>
+
+<img src="assets/0.gif" width="12.5%"><img src="assets/1.gif" width="12.5%"><img src="assets/2.gif" width="12.5%"><img src="assets/3.gif" width="12.5%"><img src="assets/4.gif" width="12.5%"><img src="assets/5.gif" width="12.5%"><img src="assets/6.gif" width="12.5%"><img src="assets/7.gif" width="12.5%"></br>
+
+[[Website]](https://nicklashansen.github.io/td-mpc2) [[Paper]](https://arxiv.org/abs/2310.16828) [[Models]](https://nicklashansen.github.io/td-mpc2/models)  [[Dataset]](https://nicklashansen.github.io/td-mpc2/dataset)
+
+----
+
+## Overview
+
+TD-MPC**2** is a scalable, robust model-based reinforcement learning algorithm. It compares favorably to existing model-free and model-based methods across **104** continuous control tasks spanning multiple domains, with a *single* set of hyperparameters (*right*). We further demonstrate the scalability of TD-MPC**2** by training a single 317M parameter agent to perform **80** tasks across multiple domains, embodiments, and action spaces (*left*). 
+
+<img src="assets/8.png" width="100%" style="max-width: 640px"><br/>
+
+This repository contains code for training and evaluating both single-task online RL and multi-task offline RL TD-MPC**2** agents. We additionally open-source **300+** [model checkpoints](https://nicklashansen.github.io/td-mpc2/models) (including 12 multi-task models) across 4 task domains: [DMControl](https://arxiv.org/abs/1801.00690), [Meta-World](https://meta-world.github.io/), [ManiSkill2](https://maniskill2.github.io/), and [MyoSuite](https://sites.google.com/view/myosuite), as well as our [30-task and 80-task datasets](https://nicklashansen.github.io/td-mpc2/dataset) used to train the multi-task models. We hope that this repository will serve as a useful community resource for future research on model-based RL.
+
+----
+
+## Getting started
+
+You will need a machine with a GPU and at least 12 GB of RAM for single-task online RL with TD-MPC**2**, and 128 GB of RAM for multi-task offline RL on our provided 80-task dataset. A GPU with at least 8 GB of memory is recommended for single-task online RL and for evaluation of the provided multi-task models (up to 317M parameters). Training of the 317M parameter model requires a GPU with at least 24 GB of memory.
+
+We provide a `Dockerfile` for easy installation. You can build the docker image by running
+
+```
+cd docker && docker build . -t <user>/tdmpc2:0.1.0
+```
+
+If you prefer to install dependencies manually, start by installing dependencies via `conda` by running
+
+```
+conda env create -f docker/environment.yml
+```
+
+If you want to run ManiSkill2, you will additionally need to download and link the necessary assets by running
+
+```
+python -m mani_skill2.utils.download_asset all
+```
+
+which downloads assets to `./data`. You may move these assets to any location. Then, add the following line to your `~/.bashrc`:
+
+```
+export MS2_ASSET_DIR=<path>/<to>/<data>
+```
+
+and restart your terminal. Meta-World additionally requires MuJoCo 2.1.0. We host the unrestricted MuJoCo 2.1.0 license (courtesy of Google DeepMind) at [https://www.tdmpc2.com/files/mjkey.txt](https://www.tdmpc2.com/files/mjkey.txt). You can download the license by running
+
+```
+wget https://www.tdmpc2.com/files/mjkey.txt -O ~/.mujoco/mjkey.txt
+```
+
+See `docker/Dockerfile` for installation instructions if you do not already have MuJoCo 2.1.0 installed. MyoSuite requires `gym==0.13.0` which is incompatible with Meta-World and ManiSkill2. Install separately with `pip install myosuite` if desired. Depending on your existing system packages, you may need to install other dependencies. See `docker/Dockerfile` for a list of recommended system packages.
+
+----
+
+## Supported tasks
+
+This codebase currently supports **104** continuous control tasks from **DMControl**, **Meta-World**, **ManiSkill2**, and **MyoSuite**. Specifically, it supports 39 tasks from DMControl (including 11 custom tasks), 50 tasks from Meta-World, 5 tasks from ManiSkill2, and 10 tasks from MyoSuite, and covers all tasks used in the paper. See below table for expected name formatting for each task domain:
+
+| domain | task
+| --- | --- |
+| dmcontrol | dog-run
+| dmcontrol | cheetah-run-backwards
+| metaworld | mw-assembly
+| metaworld | mw-pick-place-wall
+| maniskill | pick-cube
+| maniskill | pick-ycb
+| myosuite  | myo-hand-key-turn
+| myosuite  | myo-hand-key-turn-hard
+
+which can be run by specifying the `task` argument for `evaluation.py`. Multi-task training and evaluation is specified by setting `task=mt80` or `task=mt30` for the 80-task and 30-task sets, respectively.
+
+
+## Example usage
+
+We provide examples on how to evaluate our provided TD-MPC**2** checkpoints, as well as how to train your own TD-MPC**2** agents, below.
+
+### Evaluation
+
+See below examples on how to evaluate downloaded single-task and multi-task checkpoints.
+
+```
+$ python evaluate.py task=mt80 model_size=48 checkpoint=/path/to/mt80-48M.pt
+$ python evaluate.py task=mt30 model_size=317 checkpoint=/path/to/mt30-317M.pt
+$ python evaluate.py task=dog-run checkpoint=/path/to/dog-1.pt save_video=true
+```
+
+All single-task checkpoints expect `model_size=5`. Multi-task checkpoints are available in multiple model sizes. Available arguments are `model_size={1, 5, 19, 48, 317}`. Note that single-task evaluation of multi-task checkpoints is currently not supported. See `config.yaml` for a full list of arguments.
+
+### Training
+
+See below examples on how to train TD-MPC**2** on a single task (online RL) and on multi-task datasets (offline RL). We recommend configuring [Weights and Biases](https://wandb.ai) (`wandb`) in `config.yaml` to track training progress.
+
+```
+$ python train.py task=mt80 model_size=48 batch_size=1024
+$ python train.py task=mt30 model_size=317 batch_size=1024
+$ python train.py task=dog-run steps=7000000
+```
+
+We recommend using default hyperparameters for single-task online RL, including the default model size of 5M parameters (`model_size=5`). Multi-task offline RL benefits from a larger model size, but larger models are also increasingly costly to train and evaluate. Available arguments are `model_size={1, 5, 19, 48, 317}`. See `config.yaml` for a full list of arguments.
+
+----
+
+## Citation
+
+If you find our work useful, please consider citing the paper as follows:
+
+```
+@misc{hansen2023tdmpc2,
+	title={TD-MPC2: Scalable, Robust World Models for Continuous Control}, 
+	author={Nicklas Hansen and Hao Su and Xiaolong Wang},
+	year={2023},
+	eprint={2310.16828},
+	archivePrefix={arXiv},
+	primaryClass={cs.LG}
+}
+```
+
+----
+
+## Contributing
+
+You are very welcome to contribute to this project. Feel free to open an issue or pull request if you have any suggestions or bug reports, but please review our [guidelines](CONTRIBUTING.md) first. Our goal is to build a codebase that can easily be extended to new environments and tasks, and we would love to hear about your experience!
+
+----
+
+## License
+
+This project is licensed under the MIT License - see the `LICENSE` file for details. Note that the repository relies on third-party code, which is subject to their respective licenses.
diff --git a/assets/0.gif b/assets/0.gif
new file mode 100644
index 0000000..ce13634
Binary files /dev/null and b/assets/0.gif differ
diff --git a/assets/1.gif b/assets/1.gif
new file mode 100644
index 0000000..c4e88b7
Binary files /dev/null and b/assets/1.gif differ
diff --git a/assets/2.gif b/assets/2.gif
new file mode 100644
index 0000000..c66e41a
Binary files /dev/null and b/assets/2.gif differ
diff --git a/assets/3.gif b/assets/3.gif
new file mode 100644
index 0000000..9eca22b
Binary files /dev/null and b/assets/3.gif differ
diff --git a/assets/4.gif b/assets/4.gif
new file mode 100644
index 0000000..c7c89cc
Binary files /dev/null and b/assets/4.gif differ
diff --git a/assets/5.gif b/assets/5.gif
new file mode 100644
index 0000000..8f3f67c
Binary files /dev/null and b/assets/5.gif differ
diff --git a/assets/6.gif b/assets/6.gif
new file mode 100644
index 0000000..0993bfc
Binary files /dev/null and b/assets/6.gif differ
diff --git a/assets/7.gif b/assets/7.gif
new file mode 100644
index 0000000..96b2221
Binary files /dev/null and b/assets/7.gif differ
diff --git a/assets/8.png b/assets/8.png
new file mode 100644
index 0000000..8dcacc3
Binary files /dev/null and b/assets/8.png differ
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 0000000..7303219
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,59 @@
+##########################################
+# Dockerfile for TD-MPC2                 #
+# TD-MPC2 Anonymous Authors, 2023 (c)    #
+# -------------------------------------- #
+# Instructions:                          #
+# docker build . -t <user>/tdmpc2:0.1.0  #
+# docker push <user>/tdmpc2:0.1.0        #
+##########################################
+
+# base image
+FROM nvidia/cudagl:11.3.1-devel-ubuntu20.04
+ENV DEBIAN_FRONTEND=noninteractive
+
+# packages
+RUN apt-get -y update && \
+    apt-get install -y --no-install-recommends build-essential git nano rsync vim tree curl \
+    wget unzip htop tmux xvfb patchelf ca-certificates bash-completion libjpeg-dev libpng-dev \
+    ffmpeg cmake swig libssl-dev libcurl4-openssl-dev libopenmpi-dev python3-dev zlib1g-dev \
+    qtbase5-dev qtdeclarative5-dev libglib2.0-0 libglu1-mesa-dev libgl1-mesa-dev libvulkan1 \
+    libgl1-mesa-glx libosmesa6 libosmesa6-dev libglew-dev mesa-utils && \
+    apt-get clean && \
+    apt-get autoremove -y && \
+    rm -rf /var/lib/apt/lists/* && \
+    mkdir /root/.ssh
+
+# miniconda
+RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
+    /bin/bash ~/miniconda.sh -b -p /opt/conda && \
+    rm ~/miniconda.sh && \
+    . /opt/conda/etc/profile.d/conda.sh && \
+    conda init && \
+    conda clean -ya
+ENV PATH /opt/conda/bin:$PATH
+SHELL ["/bin/bash", "-c"]
+
+# conda environment
+COPY nvidia_icd.json /usr/share/vulkan/icd.d/nvidia_icd.json
+COPY environment.yaml /root
+RUN conda env update -n base -f /root/environment.yaml && \
+    rm /root/environment.yaml && \
+    cd /root && \
+    python -m mani_skill2.utils.download_asset all -y && \
+    conda clean -ya && \
+    pip cache purge
+
+# environment variables
+ENV MUJOCO_GL egl
+ENV MS2_ASSET_DIR /root/data
+ENV LD_LIBRARY_PATH /root/.mujoco/mujoco210/bin:${LD_LIBRARY_PATH}
+
+# mujoco (required for metaworld)
+RUN mkdir -p /root/.mujoco && \
+    wget https://www.tdmpc2.com/files/mjkey.txt && \
+    wget https://github.com/deepmind/mujoco/releases/download/2.1.0/mujoco210-linux-x86_64.tar.gz && \
+    tar -xzf mujoco210-linux-x86_64.tar.gz && \
+    rm mujoco210-linux-x86_64.tar.gz && \
+    mv mujoco210 /root/.mujoco/mujoco210 && \
+    mv mjkey.txt /root/.mujoco/mjkey.txt && \
+    python -c "import mujoco_py"
diff --git a/docker/environment.yaml b/docker/environment.yaml
new file mode 100644
index 0000000..9f0e6f1
--- /dev/null
+++ b/docker/environment.yaml
@@ -0,0 +1,67 @@
+name: tdmpc2
+channels:
+  - pytorch-nightly
+  - nvidia
+  - anaconda
+  - conda-forge
+  - defaults
+dependencies:
+  - python=3.9.0
+  - pytorch
+  - torchvision
+  - cudatoolkit=11.7
+  - fluidsynth
+  - portaudio
+  - glew
+  - glib
+  - pillow
+  - pip
+  - pip:
+    - absl-py
+    - click
+    - cloudpickle
+    - gpustat
+    - glfw
+    - kornia
+    - termcolor
+    - gym==0.21.0
+    - pandas
+    - moviepy
+    - ffmpeg
+    - imageio
+    - imageio-ffmpeg
+    - lxml
+    - pyparsing
+    - omegaconf
+    - hydra-core
+    - hydra-submitit-launcher
+    - submitit
+    - patchelf
+    - protobuf
+    - scipy
+    - tqdm
+    - xmltodict
+    - transforms3d
+    - joblib
+    - scikit-image
+    - einops
+    - opencv-python
+    - opencv-contrib-python
+    - filelock
+    - sapien==2.2.1
+    - mani-skill2==0.4.1
+    - tabulate
+    - h5py
+    - trimesh
+    - open3d
+    - rtree
+    - seaborn
+    - mujoco==2.3.1
+    - mujoco-py==2.1.2.14
+    - dm-control
+    - plotly
+    - pyquaternion
+    - git+https://github.com/Farama-Foundation/Metaworld.git@04be337a12305e393c0caf0cbf5ec7755c7c8feb
+    # - myosuite # MyoSuite requires gym==0.13.0 which conflicts with Meta-World & ManiSkill2, install separately if needed
+    - tensordict-nightly
+    - torchrl-nightly
diff --git a/docker/nvidia_icd.json b/docker/nvidia_icd.json
new file mode 100644
index 0000000..e7d75b2
--- /dev/null
+++ b/docker/nvidia_icd.json
@@ -0,0 +1,7 @@
+{
+    "file_format_version" : "1.0.0",
+    "ICD": {
+        "library_path": "libGLX_nvidia.so.0",
+        "api_version" : "1.2.155"
+    }
+}
\ No newline at end of file
diff --git a/results/acrobot-swingup.csv b/results/acrobot-swingup.csv
new file mode 100644
index 0000000..455aa12
--- /dev/null
+++ b/results/acrobot-swingup.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,2.4,3
+100000,275.1,3
+200000,246.4,3
+300000,330.8,3
+400000,321.6,3
+500000,334.5,3
+600000,336.1,3
+700000,450.4,3
+800000,549.5,3
+900000,368.3,3
+1000000,476.7,3
+1100000,410.7,3
+1200000,508.6,3
+1300000,422.2,3
+1400000,576.5,3
+1500000,621.8,3
+1600000,560.9,3
+1700000,562.0,3
+1800000,522.9,3
+1900000,512.0,3
+2000000,585.1,3
+2100000,672.0,3
+2200000,552.2,3
+2300000,603.5,3
+2400000,618.9,3
+2500000,438.4,3
+2600000,555.1,3
+2700000,578.9,3
+2800000,420.4,3
+2900000,664.0,3
+3000000,564.6,3
+3100000,596.9,3
+3200000,431.7,3
+3300000,483.5,3
+3400000,550.6,3
+3500000,598.4,3
+3600000,644.1,3
+3700000,500.0,3
+3800000,617.3,3
+3900000,682.2,3
+4000000,667.5,3
+0,5.6,2
+100000,150.2,2
+200000,423.1,2
+300000,358.9,2
+400000,361.2,2
+500000,446.8,2
+600000,377.1,2
+700000,492.5,2
+800000,560.5,2
+900000,476.1,2
+1000000,593.6,2
+1100000,420.5,2
+1200000,500.2,2
+1300000,548.0,2
+1400000,480.9,2
+1500000,470.4,2
+1600000,599.1,2
+1700000,707.8,2
+1800000,504.5,2
+1900000,484.7,2
+2000000,553.0,2
+2100000,550.1,2
+2200000,596.8,2
+2300000,502.1,2
+2400000,489.2,2
+2500000,439.3,2
+2600000,549.2,2
+2700000,535.0,2
+2800000,691.8,2
+2900000,519.1,2
+3000000,525.6,2
+3100000,564.8,2
+3200000,596.0,2
+3300000,510.4,2
+3400000,560.7,2
+3500000,473.8,2
+3600000,483.5,2
+3700000,628.0,2
+3800000,679.7,2
+3900000,546.2,2
+4000000,609.6,2
+0,7.6,1
+100000,112.1,1
+200000,219.4,1
+300000,350.9,1
+400000,283.2,1
+500000,301.9,1
+600000,343.1,1
+700000,455.5,1
+800000,410.1,1
+900000,476.2,1
+1000000,483.5,1
+1100000,544.5,1
+1200000,514.5,1
+1300000,585.2,1
+1400000,551.1,1
+1500000,554.4,1
+1600000,428.7,1
+1700000,459.6,1
+1800000,542.0,1
+1900000,602.7,1
+2000000,693.8,1
+2100000,514.7,1
+2200000,593.4,1
+2300000,344.7,1
+2400000,494.2,1
+2500000,641.4,1
+2600000,604.3,1
+2700000,510.1,1
+2800000,558.8,1
+2900000,445.7,1
+3000000,524.5,1
+3100000,510.3,1
+3200000,613.1,1
+3300000,594.1,1
+3400000,542.9,1
+3500000,647.1,1
+3600000,528.6,1
+3700000,490.0,1
+3800000,619.4,1
+3900000,644.7,1
+4000000,711.5,1
diff --git a/results/cartpole-balance-sparse.csv b/results/cartpole-balance-sparse.csv
new file mode 100644
index 0000000..aa85652
--- /dev/null
+++ b/results/cartpole-balance-sparse.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,49.9,2
+100000,1000.0,2
+200000,1000.0,2
+300000,1000.0,2
+400000,1000.0,2
+500000,1000.0,2
+600000,1000.0,2
+700000,1000.0,2
+800000,1000.0,2
+900000,1000.0,2
+1000000,1000.0,2
+1100000,1000.0,2
+1200000,1000.0,2
+1300000,1000.0,2
+1400000,1000.0,2
+1500000,1000.0,2
+1600000,1000.0,2
+1700000,1000.0,2
+1800000,1000.0,2
+1900000,1000.0,2
+2000000,1000.0,2
+2100000,1000.0,2
+2200000,1000.0,2
+2300000,1000.0,2
+2400000,1000.0,2
+2500000,1000.0,2
+2600000,1000.0,2
+2700000,1000.0,2
+2800000,1000.0,2
+2900000,1000.0,2
+3000000,1000.0,2
+3100000,1000.0,2
+3200000,1000.0,2
+3300000,1000.0,2
+3400000,1000.0,2
+3500000,1000.0,2
+3600000,1000.0,2
+3700000,1000.0,2
+3800000,1000.0,2
+3900000,1000.0,2
+4000000,1000.0,2
+0,21.9,1
+100000,1000.0,1
+200000,1000.0,1
+300000,1000.0,1
+400000,1000.0,1
+500000,1000.0,1
+600000,1000.0,1
+700000,1000.0,1
+800000,1000.0,1
+900000,1000.0,1
+1000000,1000.0,1
+1100000,1000.0,1
+1200000,1000.0,1
+1300000,1000.0,1
+1400000,1000.0,1
+1500000,1000.0,1
+1600000,1000.0,1
+1700000,1000.0,1
+1800000,967.7,1
+1900000,1000.0,1
+2000000,1000.0,1
+2100000,1000.0,1
+2200000,1000.0,1
+2300000,1000.0,1
+2400000,1000.0,1
+2500000,1000.0,1
+2600000,1000.0,1
+2700000,1000.0,1
+2800000,1000.0,1
+2900000,1000.0,1
+3000000,1000.0,1
+3100000,1000.0,1
+3200000,1000.0,1
+3300000,1000.0,1
+3400000,1000.0,1
+3500000,1000.0,1
+3600000,1000.0,1
+3700000,1000.0,1
+3800000,1000.0,1
+3900000,1000.0,1
+4000000,1000.0,1
+0,25.6,3
+100000,1000.0,3
+200000,1000.0,3
+300000,1000.0,3
+400000,1000.0,3
+500000,1000.0,3
+600000,1000.0,3
+700000,1000.0,3
+800000,1000.0,3
+900000,1000.0,3
+1000000,1000.0,3
+1100000,1000.0,3
+1200000,1000.0,3
+1300000,1000.0,3
+1400000,1000.0,3
+1500000,1000.0,3
+1600000,1000.0,3
+1700000,1000.0,3
+1800000,1000.0,3
+1900000,1000.0,3
+2000000,1000.0,3
+2100000,1000.0,3
+2200000,1000.0,3
+2300000,1000.0,3
+2400000,1000.0,3
+2500000,1000.0,3
+2600000,1000.0,3
+2700000,1000.0,3
+2800000,1000.0,3
+2900000,1000.0,3
+3000000,1000.0,3
+3100000,1000.0,3
+3200000,1000.0,3
+3300000,1000.0,3
+3400000,1000.0,3
+3500000,1000.0,3
+3600000,1000.0,3
+3700000,1000.0,3
+3800000,1000.0,3
+3900000,1000.0,3
+4000000,931.0,3
diff --git a/results/cartpole-balance.csv b/results/cartpole-balance.csv
new file mode 100644
index 0000000..6434369
--- /dev/null
+++ b/results/cartpole-balance.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,270.6,3
+100000,998.4,3
+200000,998.4,3
+300000,998.4,3
+400000,998.9,3
+500000,986.6,3
+600000,995.7,3
+700000,999.3,3
+800000,996.3,3
+900000,992.5,3
+1000000,996.8,3
+1100000,971.0,3
+1200000,996.8,3
+1300000,997.7,3
+1400000,993.5,3
+1500000,996.6,3
+1600000,998.7,3
+1700000,999.0,3
+1800000,997.0,3
+1900000,995.8,3
+2000000,998.0,3
+2100000,999.4,3
+2200000,999.0,3
+2300000,999.0,3
+2400000,999.3,3
+2500000,998.4,3
+2600000,987.0,3
+2700000,998.5,3
+2800000,990.5,3
+2900000,992.4,3
+3000000,996.6,3
+3100000,998.5,3
+3200000,998.6,3
+3300000,997.8,3
+3400000,997.8,3
+3500000,999.1,3
+3600000,999.1,3
+3700000,979.9,3
+3800000,999.2,3
+3900000,994.5,3
+4000000,996.0,3
+0,313.1,2
+100000,997.0,2
+200000,989.4,2
+300000,998.9,2
+400000,943.0,2
+500000,999.0,2
+600000,997.1,2
+700000,997.8,2
+800000,988.6,2
+900000,993.9,2
+1000000,928.0,2
+1100000,982.2,2
+1200000,998.2,2
+1300000,995.5,2
+1400000,996.2,2
+1500000,999.0,2
+1600000,992.4,2
+1700000,983.6,2
+1800000,998.2,2
+1900000,995.9,2
+2000000,998.6,2
+2100000,999.4,2
+2200000,999.0,2
+2300000,999.2,2
+2400000,999.4,2
+2500000,999.3,2
+2600000,999.1,2
+2700000,999.3,2
+2800000,999.5,2
+2900000,999.4,2
+3000000,999.4,2
+3100000,999.4,2
+3200000,999.2,2
+3300000,997.4,2
+3400000,998.5,2
+3500000,999.3,2
+3600000,999.2,2
+3700000,999.3,2
+3800000,999.5,2
+3900000,999.5,2
+4000000,999.4,2
+0,124.3,1
+100000,997.9,1
+200000,998.9,1
+300000,999.1,1
+400000,994.7,1
+500000,996.2,1
+600000,995.4,1
+700000,997.6,1
+800000,990.8,1
+900000,998.2,1
+1000000,999.3,1
+1100000,997.0,1
+1200000,998.6,1
+1300000,999.5,1
+1400000,999.2,1
+1500000,999.4,1
+1600000,997.1,1
+1700000,991.8,1
+1800000,992.8,1
+1900000,996.8,1
+2000000,999.3,1
+2100000,999.0,1
+2200000,999.4,1
+2300000,999.3,1
+2400000,998.4,1
+2500000,996.7,1
+2600000,998.9,1
+2700000,999.4,1
+2800000,998.6,1
+2900000,999.2,1
+3000000,999.3,1
+3100000,999.4,1
+3200000,999.4,1
+3300000,999.4,1
+3400000,999.4,1
+3500000,999.5,1
+3600000,999.3,1
+3700000,999.6,1
+3800000,999.6,1
+3900000,999.6,1
+4000000,999.6,1
diff --git a/results/cartpole-swingup-sparse.csv b/results/cartpole-swingup-sparse.csv
new file mode 100644
index 0000000..7249d01
--- /dev/null
+++ b/results/cartpole-swingup-sparse.csv
@@ -0,0 +1,123 @@
+step,reward,seed
+0,0.0,3
+100000,1.0,3
+200000,784.8,3
+300000,835.9,3
+400000,824.7,3
+500000,844.6,3
+600000,848.7,3
+700000,840.2,3
+800000,847.8,3
+900000,848.7,3
+1000000,849.8,3
+1100000,849.6,3
+1200000,848.0,3
+1300000,849.4,3
+1400000,849.2,3
+1500000,848.8,3
+1600000,845.7,3
+1700000,848.8,3
+1800000,837.2,3
+1900000,848.9,3
+2000000,849.9,3
+2100000,849.7,3
+2200000,850.0,3
+2300000,849.7,3
+2400000,848.3,3
+2500000,850.1,3
+2600000,850.0,3
+2700000,850.0,3
+2800000,259.5,3
+2900000,850.1,3
+3000000,850.0,3
+3100000,849.3,3
+3200000,849.9,3
+3300000,850.0,3
+3400000,848.9,3
+3500000,849.9,3
+3600000,850.0,3
+3700000,850.4,3
+3800000,849.8,3
+3900000,850.1,3
+0,0.0,2
+100000,0.0,2
+200000,21.6,2
+300000,707.7,2
+400000,843.3,2
+500000,844.9,2
+600000,844.6,2
+700000,846.3,2
+800000,845.7,2
+900000,847.9,2
+1000000,828.3,2
+1100000,847.6,2
+1200000,840.2,2
+1300000,848.2,2
+1400000,849.0,2
+1500000,847.9,2
+1600000,848.6,2
+1700000,848.8,2
+1800000,848.9,2
+1900000,849.6,2
+2000000,849.4,2
+2100000,848.9,2
+2200000,850.1,2
+2300000,849.5,2
+2400000,850.2,2
+2500000,850.2,2
+2600000,850.7,2
+2700000,850.7,2
+2800000,850.7,2
+2900000,848.2,2
+3000000,847.3,2
+3100000,849.0,2
+3200000,848.5,2
+3300000,850.0,2
+3400000,781.4,2
+3500000,849.2,2
+3600000,849.9,2
+3700000,850.1,2
+3800000,850.1,2
+3900000,849.6,2
+4000000,850.3,2
+0,0.0,1
+100000,0.0,1
+200000,118.3,1
+300000,842.3,1
+400000,847.0,1
+500000,844.8,1
+600000,848.1,1
+700000,847.1,1
+800000,849.1,1
+900000,847.2,1
+1000000,846.7,1
+1100000,847.5,1
+1200000,848.0,1
+1300000,849.3,1
+1400000,842.6,1
+1500000,154.2,1
+1600000,849.1,1
+1700000,848.3,1
+1800000,849.3,1
+1900000,849.7,1
+2000000,849.1,1
+2100000,411.1,1
+2200000,849.2,1
+2300000,849.6,1
+2400000,850.1,1
+2500000,849.8,1
+2600000,848.1,1
+2700000,849.5,1
+2800000,848.7,1
+2900000,850.0,1
+3000000,850.1,1
+3100000,849.5,1
+3200000,850.4,1
+3300000,850.2,1
+3400000,850.4,1
+3500000,848.8,1
+3600000,849.7,1
+3700000,849.9,1
+3800000,849.6,1
+3900000,850.2,1
+4000000,850.5,1
diff --git a/results/cartpole-swingup.csv b/results/cartpole-swingup.csv
new file mode 100644
index 0000000..a8d1845
--- /dev/null
+++ b/results/cartpole-swingup.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,2.0,2
+100000,789.8,2
+200000,877.6,2
+300000,879.1,2
+400000,880.4,2
+500000,882.0,2
+600000,882.0,2
+700000,881.7,2
+800000,882.3,2
+900000,882.0,2
+1000000,881.1,2
+1100000,879.9,2
+1200000,881.3,2
+1300000,882.3,2
+1400000,880.9,2
+1500000,879.7,2
+1600000,863.9,2
+1700000,879.0,2
+1800000,880.8,2
+1900000,882.6,2
+2000000,882.3,2
+2100000,883.0,2
+2200000,882.8,2
+2300000,882.7,2
+2400000,882.2,2
+2500000,882.8,2
+2600000,882.8,2
+2700000,882.7,2
+2800000,882.8,2
+2900000,882.8,2
+3000000,882.8,2
+3100000,883.0,2
+3200000,883.0,2
+3300000,882.6,2
+3400000,883.0,2
+3500000,882.8,2
+3600000,883.0,2
+3700000,882.4,2
+3800000,882.6,2
+3900000,882.5,2
+4000000,882.5,2
+0,4.8,3
+100000,867.9,3
+200000,879.0,3
+300000,877.4,3
+400000,867.4,3
+500000,880.2,3
+600000,881.2,3
+700000,881.8,3
+800000,879.9,3
+900000,879.7,3
+1000000,881.9,3
+1100000,881.9,3
+1200000,879.1,3
+1300000,880.5,3
+1400000,881.9,3
+1500000,881.8,3
+1600000,881.3,3
+1700000,880.2,3
+1800000,881.8,3
+1900000,882.2,3
+2000000,882.4,3
+2100000,882.8,3
+2200000,883.0,3
+2300000,883.2,3
+2400000,883.1,3
+2500000,883.0,3
+2600000,883.0,3
+2700000,882.5,3
+2800000,882.9,3
+2900000,882.8,3
+3000000,882.9,3
+3100000,883.0,3
+3200000,882.9,3
+3300000,882.9,3
+3400000,882.9,3
+3500000,882.8,3
+3600000,882.8,3
+3700000,882.7,3
+3800000,882.9,3
+3900000,882.9,3
+4000000,883.0,3
+0,1.5,1
+100000,860.5,1
+200000,859.1,1
+300000,864.4,1
+400000,864.2,1
+500000,866.0,1
+600000,860.5,1
+700000,865.7,1
+800000,865.4,1
+900000,864.4,1
+1000000,864.7,1
+1100000,866.8,1
+1200000,865.6,1
+1300000,864.8,1
+1400000,866.5,1
+1500000,866.6,1
+1600000,861.9,1
+1700000,866.6,1
+1800000,863.0,1
+1900000,865.2,1
+2000000,864.4,1
+2100000,866.8,1
+2200000,866.9,1
+2300000,864.3,1
+2400000,866.0,1
+2500000,867.4,1
+2600000,865.8,1
+2700000,867.3,1
+2800000,866.6,1
+2900000,751.6,1
+3000000,866.3,1
+3100000,867.3,1
+3200000,866.1,1
+3300000,867.3,1
+3400000,866.2,1
+3500000,866.9,1
+3600000,865.8,1
+3700000,866.7,1
+3800000,867.1,1
+3900000,867.3,1
+4000000,866.2,1
diff --git a/results/cheetah-jump.csv b/results/cheetah-jump.csv
new file mode 100644
index 0000000..8000947
--- /dev/null
+++ b/results/cheetah-jump.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,179.6,2
+100000,557.8,2
+200000,607.2,2
+300000,642.0,2
+400000,616.6,2
+500000,768.8,2
+600000,786.0,2
+700000,767.5,2
+800000,802.7,2
+900000,809.4,2
+1000000,806.6,2
+1100000,806.7,2
+1200000,808.0,2
+1300000,810.3,2
+1400000,808.1,2
+1500000,814.9,2
+1600000,808.8,2
+1700000,809.0,2
+1800000,813.5,2
+1900000,808.5,2
+2000000,799.6,2
+2100000,814.4,2
+2200000,820.4,2
+2300000,821.6,2
+2400000,821.6,2
+2500000,812.9,2
+2600000,824.2,2
+2700000,817.1,2
+2800000,823.3,2
+2900000,821.1,2
+3000000,825.4,2
+3100000,823.1,2
+3200000,813.2,2
+3300000,822.1,2
+3400000,827.9,2
+3500000,817.5,2
+3600000,823.5,2
+3700000,822.7,2
+3800000,830.1,2
+3900000,826.0,2
+4000000,816.0,2
+0,179.4,1
+100000,581.7,1
+200000,598.5,1
+300000,600.1,1
+400000,587.9,1
+500000,623.6,1
+600000,239.6,1
+700000,729.4,1
+800000,765.5,1
+900000,776.2,1
+1000000,791.4,1
+1100000,803.5,1
+1200000,810.1,1
+1300000,816.9,1
+1400000,819.4,1
+1500000,818.5,1
+1600000,808.8,1
+1700000,815.4,1
+1800000,817.4,1
+1900000,815.3,1
+2000000,809.9,1
+2100000,819.2,1
+2200000,822.2,1
+2300000,822.5,1
+2400000,822.9,1
+2500000,824.0,1
+2600000,825.2,1
+2700000,822.9,1
+2800000,823.6,1
+2900000,821.5,1
+3000000,822.7,1
+3100000,825.6,1
+3200000,823.4,1
+3300000,819.5,1
+3400000,821.7,1
+3500000,819.9,1
+3600000,818.4,1
+3700000,817.8,1
+3800000,822.6,1
+3900000,820.3,1
+4000000,820.4,1
+0,178.7,3
+100000,587.1,3
+200000,594.8,3
+300000,618.6,3
+400000,700.3,3
+500000,734.4,3
+600000,760.8,3
+700000,796.1,3
+800000,804.3,3
+900000,818.0,3
+1000000,811.9,3
+1100000,811.3,3
+1200000,814.7,3
+1300000,811.6,3
+1400000,814.2,3
+1500000,819.5,3
+1600000,812.2,3
+1700000,802.4,3
+1800000,812.8,3
+1900000,813.6,3
+2000000,818.5,3
+2100000,808.0,3
+2200000,812.1,3
+2300000,816.3,3
+2400000,817.5,3
+2500000,819.8,3
+2600000,827.2,3
+2700000,829.8,3
+2800000,830.0,3
+2900000,817.4,3
+3000000,829.8,3
+3100000,827.1,3
+3200000,831.2,3
+3300000,827.9,3
+3400000,822.5,3
+3500000,828.5,3
+3600000,832.6,3
+3700000,832.7,3
+3800000,828.0,3
+3900000,834.3,3
+4000000,832.4,3
diff --git a/results/cheetah-run-back.csv b/results/cheetah-run-back.csv
new file mode 100644
index 0000000..bc8f376
--- /dev/null
+++ b/results/cheetah-run-back.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,7.4,2
+100000,408.8,2
+200000,549.1,2
+300000,576.2,2
+400000,615.8,2
+500000,634.7,2
+600000,675.4,2
+700000,708.1,2
+800000,779.9,2
+900000,791.7,2
+1000000,813.6,2
+1100000,825.0,2
+1200000,812.9,2
+1300000,817.7,2
+1400000,819.9,2
+1500000,825.8,2
+1600000,822.9,2
+1700000,827.3,2
+1800000,833.1,2
+1900000,832.1,2
+2000000,833.7,2
+2100000,824.8,2
+2200000,832.6,2
+2300000,804.6,2
+2400000,719.1,2
+2500000,644.0,2
+2600000,761.5,2
+2700000,779.7,2
+2800000,829.8,2
+2900000,829.5,2
+3000000,827.8,2
+3100000,828.2,2
+3200000,803.8,2
+3300000,832.8,2
+3400000,761.7,2
+3500000,754.1,2
+3600000,830.9,2
+3700000,731.2,2
+3800000,834.4,2
+3900000,834.4,2
+4000000,834.5,2
+0,6.1,3
+100000,489.7,3
+200000,552.5,3
+300000,613.4,3
+400000,720.9,3
+500000,717.1,3
+600000,775.5,3
+700000,819.1,3
+800000,765.4,3
+900000,741.5,3
+1000000,807.2,3
+1100000,824.1,3
+1200000,747.2,3
+1300000,827.1,3
+1400000,831.0,3
+1500000,827.2,3
+1600000,831.4,3
+1700000,837.0,3
+1800000,833.4,3
+1900000,834.2,3
+2000000,835.7,3
+2100000,834.2,3
+2200000,771.0,3
+2300000,745.5,3
+2400000,779.3,3
+2500000,743.4,3
+2600000,834.0,3
+2700000,835.4,3
+2800000,838.1,3
+2900000,806.6,3
+3000000,836.0,3
+3100000,720.8,3
+3200000,837.1,3
+3300000,837.2,3
+3400000,823.1,3
+3500000,780.0,3
+3600000,842.3,3
+3700000,791.2,3
+3800000,837.5,3
+3900000,838.8,3
+4000000,842.2,3
+0,6.0,1
+100000,487.4,1
+200000,599.5,1
+300000,706.0,1
+400000,739.2,1
+500000,780.7,1
+600000,800.6,1
+700000,820.1,1
+800000,817.1,1
+900000,822.9,1
+1000000,780.2,1
+1100000,831.8,1
+1200000,822.0,1
+1300000,823.3,1
+1400000,748.2,1
+1500000,803.4,1
+1600000,826.9,1
+1700000,721.3,1
+1800000,835.4,1
+1900000,668.7,1
+2000000,829.6,1
+2100000,812.8,1
+2200000,728.9,1
+2300000,733.1,1
+2400000,788.2,1
+2500000,784.5,1
+2600000,813.0,1
+2700000,781.1,1
+2800000,770.9,1
+2900000,812.2,1
+3000000,845.9,1
+3100000,854.2,1
+3200000,854.5,1
+3300000,862.0,1
+3400000,861.6,1
+3500000,861.2,1
+3600000,862.3,1
+3700000,865.2,1
+3800000,863.5,1
+3900000,857.3,1
+4000000,860.5,1
diff --git a/results/cheetah-run-backwards.csv b/results/cheetah-run-backwards.csv
new file mode 100644
index 0000000..dc1df3e
--- /dev/null
+++ b/results/cheetah-run-backwards.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,11.7,1
+100000,517.3,1
+200000,589.1,1
+300000,617.6,1
+400000,648.2,1
+500000,631.8,1
+600000,636.5,1
+700000,665.2,1
+800000,647.2,1
+900000,657.5,1
+1000000,649.2,1
+1100000,670.9,1
+1200000,661.6,1
+1300000,643.9,1
+1400000,677.1,1
+1500000,625.0,1
+1600000,680.6,1
+1700000,684.5,1
+1800000,689.4,1
+1900000,684.2,1
+2000000,656.5,1
+2100000,672.5,1
+2200000,677.6,1
+2300000,666.8,1
+2400000,682.3,1
+2500000,687.4,1
+2600000,684.8,1
+2700000,669.5,1
+2800000,683.5,1
+2900000,699.3,1
+3000000,692.4,1
+3100000,701.1,1
+3200000,690.3,1
+3300000,708.6,1
+3400000,715.5,1
+3500000,677.8,1
+3600000,717.1,1
+3700000,716.4,1
+3800000,710.0,1
+3900000,713.3,1
+4000000,688.8,1
+0,8.9,2
+100000,475.7,2
+200000,608.2,2
+300000,680.6,2
+400000,730.0,2
+500000,732.0,2
+600000,751.4,2
+700000,748.9,2
+800000,772.8,2
+900000,773.9,2
+1000000,787.3,2
+1100000,788.0,2
+1200000,802.6,2
+1300000,795.5,2
+1400000,820.1,2
+1500000,823.7,2
+1600000,786.4,2
+1700000,835.4,2
+1800000,832.1,2
+1900000,846.9,2
+2000000,826.8,2
+2100000,824.7,2
+2200000,852.9,2
+2300000,840.2,2
+2400000,722.3,2
+2500000,856.6,2
+2600000,868.1,2
+2700000,785.4,2
+2800000,859.1,2
+2900000,832.5,2
+3000000,837.1,2
+3100000,870.3,2
+3200000,860.0,2
+3300000,862.9,2
+3400000,871.6,2
+3500000,879.5,2
+3600000,871.4,2
+3700000,860.3,2
+3800000,873.8,2
+3900000,879.2,2
+4000000,875.6,2
+0,9.6,3
+100000,568.0,3
+200000,585.0,3
+300000,651.2,3
+400000,670.8,3
+500000,676.2,3
+600000,666.2,3
+700000,676.5,3
+800000,672.0,3
+900000,654.2,3
+1000000,681.1,3
+1100000,679.4,3
+1200000,684.5,3
+1300000,684.3,3
+1400000,656.6,3
+1500000,687.8,3
+1600000,685.9,3
+1700000,691.2,3
+1800000,688.8,3
+1900000,690.4,3
+2000000,688.4,3
+2100000,688.7,3
+2200000,694.2,3
+2300000,688.0,3
+2400000,692.1,3
+2500000,696.4,3
+2600000,697.2,3
+2700000,693.7,3
+2800000,689.8,3
+2900000,700.4,3
+3000000,694.0,3
+3100000,694.1,3
+3200000,695.0,3
+3300000,687.1,3
+3400000,696.4,3
+3500000,700.6,3
+3600000,698.8,3
+3700000,700.4,3
+3800000,690.7,3
+3900000,699.9,3
+4000000,696.3,3
diff --git a/results/cheetah-run-front.csv b/results/cheetah-run-front.csv
new file mode 100644
index 0000000..5dd10d0
--- /dev/null
+++ b/results/cheetah-run-front.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,7.5,2
+100000,318.6,2
+200000,491.8,2
+300000,595.9,2
+400000,539.8,2
+500000,647.9,2
+600000,681.0,2
+700000,674.8,2
+800000,696.5,2
+900000,696.3,2
+1000000,685.5,2
+1100000,658.2,2
+1200000,672.1,2
+1300000,661.2,2
+1400000,671.9,2
+1500000,663.4,2
+1600000,685.6,2
+1700000,666.0,2
+1800000,665.0,2
+1900000,689.3,2
+2000000,681.9,2
+2100000,649.4,2
+2200000,671.0,2
+2300000,677.9,2
+2400000,461.3,2
+2500000,653.0,2
+2600000,684.1,2
+2700000,689.7,2
+2800000,630.6,2
+2900000,657.5,2
+3000000,673.7,2
+3100000,698.7,2
+3200000,704.1,2
+3300000,702.0,2
+3400000,697.2,2
+3500000,692.5,2
+3600000,706.0,2
+3700000,689.0,2
+3800000,703.3,2
+3900000,725.7,2
+4000000,702.6,2
+0,6.2,1
+100000,306.7,1
+200000,509.0,1
+300000,607.6,1
+400000,637.9,1
+500000,689.8,1
+600000,690.6,1
+700000,701.8,1
+800000,692.9,1
+900000,693.5,1
+1000000,693.5,1
+1100000,673.2,1
+1200000,664.4,1
+1300000,701.0,1
+1400000,676.8,1
+1500000,637.2,1
+1600000,683.5,1
+1700000,699.8,1
+1800000,684.7,1
+1900000,664.6,1
+2000000,688.1,1
+2100000,661.3,1
+2200000,712.0,1
+2300000,676.5,1
+2400000,665.7,1
+2500000,641.3,1
+2600000,720.6,1
+2700000,723.2,1
+2800000,718.9,1
+2900000,721.6,1
+3000000,735.6,1
+3100000,749.7,1
+3200000,746.5,1
+3300000,729.7,1
+3400000,755.6,1
+3500000,747.8,1
+3600000,757.6,1
+3700000,749.0,1
+3800000,703.7,1
+3900000,692.1,1
+4000000,698.4,1
+0,6.2,3
+100000,307.1,3
+200000,473.4,3
+300000,592.0,3
+400000,650.8,3
+500000,661.2,3
+600000,697.0,3
+700000,682.2,3
+800000,672.4,3
+900000,674.0,3
+1000000,651.4,3
+1100000,671.4,3
+1200000,667.0,3
+1300000,665.7,3
+1400000,657.8,3
+1500000,475.8,3
+1600000,664.2,3
+1700000,652.7,3
+1800000,652.0,3
+1900000,654.7,3
+2000000,656.1,3
+2100000,640.9,3
+2200000,660.7,3
+2300000,647.9,3
+2400000,578.2,3
+2500000,611.5,3
+2600000,657.2,3
+2700000,658.0,3
+2800000,668.1,3
+2900000,678.4,3
+3000000,677.8,3
+3100000,648.5,3
+3200000,674.6,3
+3300000,669.6,3
+3400000,599.7,3
+3500000,663.6,3
+3600000,676.2,3
+3700000,661.1,3
+3800000,680.0,3
+3900000,655.7,3
+4000000,680.5,3
diff --git a/results/cheetah-run.csv b/results/cheetah-run.csv
new file mode 100644
index 0000000..806e3ff
--- /dev/null
+++ b/results/cheetah-run.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,11.6,2
+100000,613.8,2
+200000,710.5,2
+300000,764.8,2
+400000,822.3,2
+500000,822.1,2
+600000,878.6,2
+700000,898.1,2
+800000,906.2,2
+900000,909.0,2
+1000000,896.9,2
+1100000,911.2,2
+1200000,904.9,2
+1300000,827.8,2
+1400000,906.5,2
+1500000,916.8,2
+1600000,914.3,2
+1700000,914.6,2
+1800000,913.4,2
+1900000,914.6,2
+2000000,918.3,2
+2100000,917.5,2
+2200000,916.4,2
+2300000,919.7,2
+2400000,913.8,2
+2500000,917.3,2
+2600000,913.0,2
+2700000,912.6,2
+2800000,917.0,2
+2900000,918.5,2
+3000000,917.7,2
+3100000,917.9,2
+3200000,847.8,2
+3300000,920.1,2
+3400000,917.5,2
+3500000,919.7,2
+3600000,920.1,2
+3700000,917.4,2
+3800000,921.1,2
+3900000,920.8,2
+4000000,922.9,2
+0,2.4,1
+100000,454.4,1
+200000,522.1,1
+300000,457.8,1
+400000,655.3,1
+500000,593.8,1
+600000,659.6,1
+700000,667.6,1
+800000,673.4,1
+900000,708.3,1
+1000000,725.4,1
+1100000,720.4,1
+1200000,721.9,1
+1300000,731.6,1
+1400000,692.6,1
+1500000,677.1,1
+1600000,727.0,1
+1700000,675.0,1
+1800000,698.2,1
+1900000,694.6,1
+2000000,747.3,1
+2100000,743.1,1
+2200000,751.7,1
+2300000,757.5,1
+2400000,756.4,1
+2500000,720.6,1
+2600000,763.9,1
+2700000,771.1,1
+2800000,778.5,1
+2900000,781.3,1
+3000000,784.2,1
+3100000,777.9,1
+3200000,793.0,1
+3300000,745.2,1
+3400000,762.1,1
+3500000,774.2,1
+3600000,801.8,1
+3700000,816.7,1
+3800000,824.8,1
+3900000,837.2,1
+4000000,848.3,1
+0,3.9,3
+100000,489.2,3
+200000,610.6,3
+300000,550.0,3
+400000,795.7,3
+500000,856.9,3
+600000,820.2,3
+700000,855.5,3
+800000,896.5,3
+900000,889.2,3
+1000000,910.8,3
+1100000,902.7,3
+1200000,912.1,3
+1300000,815.1,3
+1400000,913.2,3
+1500000,915.9,3
+1600000,911.0,3
+1700000,917.2,3
+1800000,915.6,3
+1900000,916.4,3
+2000000,914.3,3
+2100000,917.7,3
+2200000,916.3,3
+2300000,920.0,3
+2400000,913.8,3
+2500000,916.9,3
+2600000,921.3,3
+2700000,920.3,3
+2800000,917.4,3
+2900000,914.9,3
+3000000,919.0,3
+3100000,913.2,3
+3200000,921.3,3
+3300000,922.0,3
+3400000,920.6,3
+3500000,921.7,3
+3600000,921.6,3
+3700000,922.2,3
+3800000,911.8,3
+3900000,919.6,3
+4000000,916.3,3
diff --git a/results/cup-catch.csv b/results/cup-catch.csv
new file mode 100644
index 0000000..d266bc3
--- /dev/null
+++ b/results/cup-catch.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,0.0,3
+100000,973.3,3
+200000,972.1,3
+300000,978.0,3
+400000,979.0,3
+500000,981.8,3
+600000,983.2,3
+700000,986.4,3
+800000,979.9,3
+900000,980.8,3
+1000000,981.7,3
+1100000,987.8,3
+1200000,986.1,3
+1300000,980.2,3
+1400000,985.2,3
+1500000,981.0,3
+1600000,985.5,3
+1700000,987.0,3
+1800000,988.1,3
+1900000,986.8,3
+2000000,986.2,3
+2100000,981.3,3
+2200000,986.1,3
+2300000,981.3,3
+2400000,986.5,3
+2500000,986.8,3
+2600000,979.7,3
+2700000,986.0,3
+2800000,978.8,3
+2900000,983.3,3
+3000000,989.4,3
+3100000,977.5,3
+3200000,984.7,3
+3300000,985.2,3
+3400000,986.0,3
+3500000,987.4,3
+3600000,983.8,3
+3700000,989.2,3
+3800000,976.8,3
+3900000,991.3,3
+4000000,986.5,3
+0,0.0,2
+100000,983.7,2
+200000,977.0,2
+300000,980.2,2
+400000,976.0,2
+500000,985.8,2
+600000,985.8,2
+700000,983.6,2
+800000,984.5,2
+900000,983.1,2
+1000000,984.7,2
+1100000,977.9,2
+1200000,979.8,2
+1300000,976.6,2
+1400000,979.5,2
+1500000,982.7,2
+1600000,988.8,2
+1700000,983.0,2
+1800000,984.5,2
+1900000,984.0,2
+2000000,983.3,2
+2100000,987.3,2
+2200000,979.1,2
+2300000,984.9,2
+2400000,980.5,2
+2500000,984.2,2
+2600000,978.3,2
+2700000,983.5,2
+2800000,981.9,2
+2900000,982.9,2
+3000000,989.1,2
+3100000,983.2,2
+3200000,976.9,2
+3300000,982.9,2
+3400000,975.5,2
+3500000,979.7,2
+3600000,980.9,2
+3700000,980.7,2
+3800000,990.6,2
+3900000,983.1,2
+4000000,984.9,2
+0,0.0,1
+100000,986.7,1
+200000,980.6,1
+300000,975.1,1
+400000,983.8,1
+500000,986.5,1
+600000,982.9,1
+700000,986.1,1
+800000,984.1,1
+900000,981.3,1
+1000000,991.1,1
+1100000,984.0,1
+1200000,985.5,1
+1300000,986.2,1
+1400000,985.7,1
+1500000,983.7,1
+1600000,982.8,1
+1700000,986.0,1
+1800000,984.9,1
+1900000,981.7,1
+2000000,981.6,1
+2100000,982.9,1
+2200000,980.8,1
+2300000,988.0,1
+2400000,981.1,1
+2500000,977.7,1
+2600000,981.1,1
+2700000,977.7,1
+2800000,980.2,1
+2900000,988.5,1
+3000000,976.9,1
+3100000,983.5,1
+3200000,981.9,1
+3300000,985.7,1
+3400000,981.3,1
+3500000,988.4,1
+3600000,986.6,1
+3700000,988.1,1
+3800000,986.1,1
+3900000,981.4,1
+4000000,979.9,1
diff --git a/results/cup-spin.csv b/results/cup-spin.csv
new file mode 100644
index 0000000..05991cd
--- /dev/null
+++ b/results/cup-spin.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,27.4,3
+100000,844.1,3
+200000,843.9,3
+300000,825.2,3
+400000,838.9,3
+500000,847.4,3
+600000,845.9,3
+700000,846.7,3
+800000,845.8,3
+900000,847.9,3
+1000000,847.2,3
+1100000,846.4,3
+1200000,837.9,3
+1300000,847.6,3
+1400000,847.2,3
+1500000,846.5,3
+1600000,848.3,3
+1700000,848.3,3
+1800000,846.9,3
+1900000,845.4,3
+2000000,848.6,3
+2100000,848.1,3
+2200000,848.4,3
+2300000,847.7,3
+2400000,848.0,3
+2500000,848.9,3
+2600000,848.2,3
+2700000,848.2,3
+2800000,846.2,3
+2900000,848.3,3
+3000000,848.0,3
+3100000,846.6,3
+3200000,848.9,3
+3300000,848.6,3
+3400000,847.5,3
+3500000,848.6,3
+3600000,849.0,3
+3700000,848.7,3
+3800000,848.0,3
+3900000,847.9,3
+4000000,848.9,3
+0,0.0,2
+100000,841.4,2
+200000,846.4,2
+300000,848.3,2
+400000,846.9,2
+500000,848.1,2
+600000,848.1,2
+700000,845.0,2
+800000,846.6,2
+900000,845.7,2
+1000000,846.5,2
+1100000,847.5,2
+1200000,847.8,2
+1300000,845.9,2
+1400000,846.7,2
+1500000,848.2,2
+1600000,845.0,2
+1700000,847.6,2
+1800000,848.3,2
+1900000,848.5,2
+2000000,847.7,2
+2100000,847.6,2
+2200000,846.3,2
+2300000,847.5,2
+2400000,847.3,2
+2500000,847.5,2
+2600000,848.3,2
+2700000,848.0,2
+2800000,846.9,2
+2900000,847.2,2
+3000000,847.8,2
+3100000,848.5,2
+3200000,848.5,2
+3300000,848.9,2
+3400000,846.6,2
+3500000,847.9,2
+3600000,849.1,2
+3700000,848.4,2
+3800000,846.4,2
+3900000,842.1,2
+4000000,845.9,2
+0,28.3,1
+100000,844.4,1
+200000,847.0,1
+300000,846.9,1
+400000,847.0,1
+500000,847.9,1
+600000,846.9,1
+700000,847.1,1
+800000,846.7,1
+900000,848.7,1
+1000000,848.3,1
+1100000,846.8,1
+1200000,847.9,1
+1300000,843.8,1
+1400000,848.4,1
+1500000,846.4,1
+1600000,847.4,1
+1700000,847.2,1
+1800000,848.6,1
+1900000,848.8,1
+2000000,847.6,1
+2100000,846.5,1
+2200000,848.9,1
+2300000,848.8,1
+2400000,849.0,1
+2500000,847.0,1
+2600000,847.6,1
+2700000,848.7,1
+2800000,848.4,1
+2900000,848.7,1
+3000000,849.0,1
+3100000,848.7,1
+3200000,847.9,1
+3300000,849.1,1
+3400000,847.5,1
+3500000,848.5,1
+3600000,848.8,1
+3700000,848.2,1
+3800000,849.0,1
+3900000,848.3,1
+4000000,848.5,1
diff --git a/results/dog-run.csv b/results/dog-run.csv
new file mode 100644
index 0000000..8541209
--- /dev/null
+++ b/results/dog-run.csv
@@ -0,0 +1,423 @@
+step,reward,seed
+0,4.5,2
+100000,7.6,2
+200000,86.8,2
+300000,143.8,2
+400000,161.9,2
+500000,142.0,2
+600000,133.2,2
+700000,153.9,2
+800000,159.3,2
+900000,172.7,2
+1000000,234.7,2
+1100000,226.3,2
+1200000,287.8,2
+1300000,304.8,2
+1400000,384.0,2
+1500000,380.6,2
+1600000,345.5,2
+1700000,411.1,2
+1800000,421.8,2
+1900000,468.7,2
+2000000,503.2,2
+2100000,503.2,2
+2200000,518.9,2
+2300000,510.9,2
+2400000,531.4,2
+2500000,537.3,2
+2600000,551.1,2
+2700000,562.1,2
+2800000,572.9,2
+2900000,586.8,2
+3000000,600.0,2
+3100000,560.3,2
+3200000,602.9,2
+3300000,611.5,2
+3400000,626.3,2
+3500000,629.3,2
+3600000,634.0,2
+3700000,648.8,2
+3800000,623.8,2
+3900000,643.9,2
+4000000,655.6,2
+4100000,669.7,2
+4200000,666.4,2
+4300000,668.2,2
+4400000,670.7,2
+4500000,683.0,2
+4600000,700.3,2
+4700000,691.5,2
+4800000,691.1,2
+4900000,702.4,2
+5000000,701.8,2
+5100000,679.9,2
+5200000,709.2,2
+5300000,715.6,2
+5400000,733.2,2
+5500000,725.2,2
+5600000,725.8,2
+5700000,721.6,2
+5800000,747.1,2
+5900000,750.1,2
+6000000,743.6,2
+6100000,701.2,2
+6200000,743.6,2
+6300000,768.2,2
+6400000,756.2,2
+6500000,783.1,2
+6600000,806.5,2
+6700000,740.4,2
+6800000,781.7,2
+6900000,806.4,2
+7000000,802.4,2
+7100000,797.2,2
+7200000,799.0,2
+7300000,817.3,2
+7400000,821.9,2
+7500000,809.4,2
+7600000,847.8,2
+7700000,861.6,2
+7800000,825.3,2
+7900000,853.4,2
+8000000,857.5,2
+8100000,855.0,2
+8200000,868.9,2
+8300000,881.1,2
+8400000,832.6,2
+8500000,807.9,2
+8600000,832.1,2
+8700000,843.4,2
+8800000,836.3,2
+8900000,837.4,2
+9000000,867.5,2
+9100000,860.2,2
+9200000,876.6,2
+9300000,849.5,2
+9400000,878.5,2
+9500000,874.0,2
+9600000,860.9,2
+9700000,901.8,2
+9800000,896.1,2
+9900000,866.1,2
+10000000,873.3,2
+10100000,882.8,2
+10200000,872.6,2
+10300000,896.2,2
+10400000,861.1,2
+10500000,895.9,2
+10600000,878.6,2
+10700000,891.2,2
+10800000,885.4,2
+10900000,890.4,2
+11000000,878.3,2
+11100000,895.6,2
+11200000,882.7,2
+11300000,891.3,2
+11400000,837.0,2
+11500000,870.3,2
+11600000,889.4,2
+11700000,890.4,2
+11800000,872.1,2
+11900000,883.8,2
+12000000,878.4,2
+12100000,882.4,2
+12200000,888.6,2
+12300000,900.0,2
+12400000,891.4,2
+12500000,905.6,2
+12600000,882.9,2
+12700000,888.8,2
+12800000,871.5,2
+12900000,905.9,2
+13000000,894.2,2
+13100000,897.6,2
+13200000,900.3,2
+13300000,893.5,2
+13400000,889.5,2
+13500000,889.3,2
+13600000,898.5,2
+13700000,872.8,2
+13800000,894.2,2
+13900000,895.2,2
+14000000,881.1,2
+0,6.5,1
+100000,39.1,1
+200000,100.9,1
+300000,138.2,1
+400000,136.8,1
+500000,130.1,1
+600000,181.4,1
+700000,153.1,1
+800000,196.1,1
+900000,207.1,1
+1000000,247.7,1
+1100000,245.8,1
+1200000,291.4,1
+1300000,316.9,1
+1400000,56.4,1
+1500000,296.4,1
+1600000,403.0,1
+1700000,420.4,1
+1800000,408.1,1
+1900000,456.0,1
+2000000,414.9,1
+2100000,493.8,1
+2200000,503.9,1
+2300000,506.3,1
+2400000,496.8,1
+2500000,538.9,1
+2600000,546.3,1
+2700000,508.4,1
+2800000,565.0,1
+2900000,566.0,1
+3000000,585.3,1
+3100000,588.1,1
+3200000,592.4,1
+3300000,619.6,1
+3400000,625.3,1
+3500000,625.3,1
+3600000,620.4,1
+3700000,647.8,1
+3800000,644.8,1
+3900000,652.5,1
+4000000,706.2,1
+4100000,692.4,1
+4200000,685.7,1
+4300000,702.0,1
+4400000,699.4,1
+4500000,692.1,1
+4600000,710.5,1
+4700000,741.7,1
+4800000,708.2,1
+4900000,728.9,1
+5000000,721.1,1
+5100000,726.2,1
+5200000,692.4,1
+5300000,718.0,1
+5400000,729.2,1
+5500000,748.7,1
+5600000,749.9,1
+5700000,760.7,1
+5800000,764.9,1
+5900000,785.3,1
+6000000,740.1,1
+6100000,739.5,1
+6200000,799.8,1
+6300000,781.7,1
+6400000,802.1,1
+6500000,792.5,1
+6600000,822.7,1
+6700000,805.6,1
+6800000,829.5,1
+6900000,818.6,1
+7000000,814.5,1
+7100000,788.1,1
+7200000,832.1,1
+7300000,810.6,1
+7400000,815.4,1
+7500000,837.7,1
+7600000,842.2,1
+7700000,840.3,1
+7800000,791.3,1
+7900000,831.2,1
+8000000,798.5,1
+8100000,824.4,1
+8200000,838.2,1
+8300000,815.7,1
+8400000,830.2,1
+8500000,841.9,1
+8600000,847.3,1
+8700000,843.9,1
+8800000,850.2,1
+8900000,842.8,1
+9000000,848.9,1
+9100000,845.9,1
+9200000,866.2,1
+9300000,860.5,1
+9400000,863.4,1
+9500000,826.2,1
+9600000,850.6,1
+9700000,861.1,1
+9800000,848.1,1
+9900000,851.4,1
+10000000,843.8,1
+10100000,861.4,1
+10200000,852.1,1
+10300000,860.3,1
+10400000,844.6,1
+10500000,848.6,1
+10600000,860.9,1
+10700000,856.6,1
+10800000,850.8,1
+10900000,854.6,1
+11000000,845.8,1
+11100000,788.5,1
+11200000,837.7,1
+11300000,846.3,1
+11400000,847.0,1
+11500000,869.9,1
+11600000,861.3,1
+11700000,847.4,1
+11800000,841.6,1
+11900000,856.3,1
+12000000,827.8,1
+12100000,839.8,1
+12200000,863.9,1
+12300000,859.3,1
+12400000,857.4,1
+12500000,872.0,1
+12600000,852.0,1
+12700000,849.6,1
+12800000,853.9,1
+12900000,854.9,1
+13000000,827.8,1
+13100000,848.9,1
+13200000,863.5,1
+13300000,848.3,1
+13400000,873.4,1
+13500000,878.6,1
+13600000,865.5,1
+13700000,855.7,1
+13800000,864.8,1
+13900000,847.2,1
+14000000,859.1,1
+0,6.4,3
+100000,33.1,3
+200000,13.0,3
+300000,141.7,3
+400000,140.0,3
+500000,126.3,3
+600000,204.0,3
+700000,182.4,3
+800000,21.6,3
+900000,27.7,3
+1000000,27.2,3
+1100000,53.9,3
+1200000,79.8,3
+1300000,43.8,3
+1400000,41.0,3
+1500000,245.1,3
+1600000,250.7,3
+1700000,325.9,3
+1800000,235.0,3
+1900000,326.6,3
+2000000,362.9,3
+2100000,427.7,3
+2200000,482.3,3
+2300000,467.6,3
+2400000,455.0,3
+2500000,485.8,3
+2600000,543.9,3
+2700000,527.2,3
+2800000,523.2,3
+2900000,576.5,3
+3000000,413.3,3
+3100000,530.1,3
+3200000,592.5,3
+3300000,515.1,3
+3400000,598.3,3
+3500000,513.9,3
+3600000,661.6,3
+3700000,653.9,3
+3800000,676.8,3
+3900000,652.4,3
+4000000,685.3,3
+4100000,653.8,3
+4200000,705.3,3
+4300000,656.8,3
+4400000,628.7,3
+4500000,721.3,3
+4600000,731.9,3
+4700000,770.5,3
+4800000,739.4,3
+4900000,716.3,3
+5000000,778.7,3
+5100000,787.4,3
+5200000,791.5,3
+5300000,791.5,3
+5400000,796.4,3
+5500000,754.3,3
+5600000,804.4,3
+5700000,791.9,3
+5800000,791.1,3
+5900000,811.8,3
+6000000,781.7,3
+6100000,787.4,3
+6200000,790.8,3
+6300000,774.2,3
+6400000,820.6,3
+6500000,817.9,3
+6600000,784.3,3
+6700000,829.4,3
+6800000,773.1,3
+6900000,821.5,3
+7000000,800.1,3
+7100000,831.4,3
+7200000,804.0,3
+7300000,819.5,3
+7400000,807.0,3
+7500000,850.3,3
+7600000,832.3,3
+7700000,856.1,3
+7800000,846.5,3
+7900000,830.2,3
+8000000,800.4,3
+8100000,843.9,3
+8200000,859.8,3
+8300000,858.0,3
+8400000,864.8,3
+8500000,842.2,3
+8600000,844.1,3
+8700000,852.1,3
+8800000,858.2,3
+8900000,830.2,3
+9000000,851.9,3
+9100000,842.8,3
+9200000,798.7,3
+9300000,861.9,3
+9400000,847.2,3
+9500000,851.6,3
+9600000,870.4,3
+9700000,850.0,3
+9800000,852.3,3
+9900000,846.5,3
+10000000,865.8,3
+10100000,876.8,3
+10200000,843.0,3
+10300000,776.8,3
+10400000,861.0,3
+10500000,871.8,3
+10600000,841.2,3
+10700000,859.3,3
+10800000,876.6,3
+10900000,836.2,3
+11000000,849.6,3
+11100000,847.0,3
+11200000,863.5,3
+11300000,848.0,3
+11400000,871.4,3
+11500000,856.9,3
+11600000,831.0,3
+11700000,880.2,3
+11800000,872.9,3
+11900000,866.9,3
+12000000,868.8,3
+12100000,855.2,3
+12200000,807.0,3
+12300000,863.2,3
+12400000,836.8,3
+12500000,876.0,3
+12600000,856.1,3
+12700000,873.3,3
+12800000,874.7,3
+12900000,883.7,3
+13000000,872.1,3
+13100000,885.1,3
+13200000,853.0,3
+13300000,850.9,3
+13400000,874.5,3
+13500000,851.0,3
+13600000,885.7,3
+13700000,870.0,3
+13800000,868.6,3
+13900000,862.4,3
diff --git a/results/dog-stand.csv b/results/dog-stand.csv
new file mode 100644
index 0000000..c1ddbda
--- /dev/null
+++ b/results/dog-stand.csv
@@ -0,0 +1,423 @@
+step,reward,seed
+0,19.4,2
+100000,48.3,2
+200000,543.1,2
+300000,701.5,2
+400000,648.1,2
+500000,673.6,2
+600000,640.9,2
+700000,685.1,2
+800000,760.3,2
+900000,811.2,2
+1000000,830.8,2
+1100000,834.7,2
+1200000,812.1,2
+1300000,829.8,2
+1400000,797.2,2
+1500000,862.6,2
+1600000,890.4,2
+1700000,894.6,2
+1800000,925.7,2
+1900000,909.3,2
+2000000,899.0,2
+2100000,849.5,2
+2200000,962.4,2
+2300000,971.7,2
+2400000,973.5,2
+2500000,967.8,2
+2600000,959.4,2
+2700000,963.5,2
+2800000,936.0,2
+2900000,974.4,2
+3000000,954.8,2
+3100000,961.9,2
+3200000,975.8,2
+3300000,869.3,2
+3400000,939.0,2
+3500000,897.8,2
+3600000,972.4,2
+3700000,978.7,2
+3800000,972.6,2
+3900000,969.4,2
+4000000,965.3,2
+4100000,959.5,2
+4200000,936.4,2
+4300000,971.4,2
+4400000,935.1,2
+4500000,975.4,2
+4600000,976.4,2
+4700000,981.5,2
+4800000,970.8,2
+4900000,968.5,2
+5000000,977.8,2
+5100000,972.8,2
+5200000,968.5,2
+5300000,959.6,2
+5400000,972.9,2
+5500000,981.2,2
+5600000,946.1,2
+5700000,980.7,2
+5800000,956.0,2
+5900000,966.5,2
+6000000,967.9,2
+6100000,978.0,2
+6200000,979.3,2
+6300000,973.1,2
+6400000,979.6,2
+6500000,967.4,2
+6600000,981.8,2
+6700000,975.6,2
+6800000,967.9,2
+6900000,964.0,2
+7000000,974.0,2
+7100000,981.5,2
+7200000,976.7,2
+7300000,984.9,2
+7400000,961.4,2
+7500000,969.0,2
+7600000,979.5,2
+7700000,971.1,2
+7800000,958.1,2
+7900000,977.1,2
+8000000,965.5,2
+8100000,984.5,2
+8200000,736.2,2
+8300000,965.3,2
+8400000,981.3,2
+8500000,983.4,2
+8600000,983.6,2
+8700000,976.0,2
+8800000,981.0,2
+8900000,965.3,2
+9000000,972.5,2
+9100000,980.6,2
+9200000,942.9,2
+9300000,932.5,2
+9400000,972.7,2
+9500000,979.1,2
+9600000,947.6,2
+9700000,960.6,2
+9800000,982.0,2
+9900000,933.9,2
+10000000,897.3,2
+10100000,912.8,2
+10200000,940.1,2
+10300000,980.0,2
+10400000,945.2,2
+10500000,976.0,2
+10600000,974.2,2
+10700000,980.6,2
+10800000,985.3,2
+10900000,983.5,2
+11000000,982.8,2
+11100000,974.9,2
+11200000,952.8,2
+11300000,965.2,2
+11400000,980.5,2
+11500000,966.1,2
+11600000,954.6,2
+11700000,965.1,2
+11800000,959.9,2
+11900000,979.4,2
+12000000,920.8,2
+12100000,965.9,2
+12200000,974.6,2
+12300000,972.2,2
+12400000,968.1,2
+12500000,974.3,2
+12600000,968.8,2
+12700000,976.7,2
+12800000,978.7,2
+12900000,982.1,2
+13000000,982.6,2
+13100000,955.4,2
+13200000,954.6,2
+13300000,958.6,2
+13400000,928.9,2
+13500000,972.6,2
+13600000,951.2,2
+13700000,980.5,2
+13800000,948.8,2
+13900000,925.8,2
+14000000,966.9,2
+0,27.7,1
+100000,274.0,1
+200000,562.1,1
+300000,782.0,1
+400000,787.1,1
+500000,765.0,1
+600000,830.9,1
+700000,796.2,1
+800000,757.5,1
+900000,796.4,1
+1000000,775.6,1
+1100000,816.6,1
+1200000,925.3,1
+1300000,902.6,1
+1400000,934.1,1
+1500000,783.5,1
+1600000,932.5,1
+1700000,897.7,1
+1800000,942.3,1
+1900000,944.0,1
+2000000,944.6,1
+2100000,944.3,1
+2200000,957.1,1
+2300000,907.3,1
+2400000,938.2,1
+2500000,906.3,1
+2600000,964.2,1
+2700000,957.2,1
+2800000,888.0,1
+2900000,967.0,1
+3000000,967.6,1
+3100000,912.4,1
+3200000,960.0,1
+3300000,926.8,1
+3400000,953.2,1
+3500000,963.0,1
+3600000,937.9,1
+3700000,934.8,1
+3800000,952.7,1
+3900000,965.1,1
+4000000,952.7,1
+4100000,980.4,1
+4200000,934.2,1
+4300000,962.5,1
+4400000,950.8,1
+4500000,970.8,1
+4600000,952.9,1
+4700000,967.1,1
+4800000,925.7,1
+4900000,983.3,1
+5000000,956.3,1
+5100000,936.2,1
+5200000,960.6,1
+5300000,973.5,1
+5400000,983.5,1
+5500000,940.4,1
+5600000,979.4,1
+5700000,981.5,1
+5800000,985.1,1
+5900000,981.6,1
+6000000,957.9,1
+6100000,957.1,1
+6200000,976.3,1
+6300000,897.7,1
+6400000,976.6,1
+6500000,975.6,1
+6600000,982.9,1
+6700000,984.1,1
+6800000,976.3,1
+6900000,986.8,1
+7000000,944.0,1
+7100000,975.7,1
+7200000,935.4,1
+7300000,885.8,1
+7400000,935.2,1
+7500000,975.5,1
+7600000,985.8,1
+7700000,922.1,1
+7800000,983.4,1
+7900000,989.7,1
+8000000,972.3,1
+8100000,950.2,1
+8200000,928.0,1
+8300000,963.0,1
+8400000,987.7,1
+8500000,961.3,1
+8600000,975.4,1
+8700000,954.5,1
+8800000,962.6,1
+8900000,964.4,1
+9000000,830.2,1
+9100000,986.4,1
+9200000,804.8,1
+9300000,771.5,1
+9400000,892.2,1
+9500000,962.1,1
+9600000,966.7,1
+9700000,367.7,1
+9800000,961.0,1
+9900000,977.0,1
+10000000,891.7,1
+10100000,841.9,1
+10200000,941.9,1
+10300000,955.1,1
+10400000,908.7,1
+10500000,771.3,1
+10600000,848.8,1
+10700000,955.3,1
+10800000,799.4,1
+10900000,965.5,1
+11000000,963.1,1
+11100000,973.7,1
+11200000,952.2,1
+11300000,960.4,1
+11400000,924.0,1
+11500000,932.3,1
+11600000,969.0,1
+11700000,935.9,1
+11800000,946.5,1
+11900000,948.7,1
+12000000,983.0,1
+12100000,978.4,1
+12200000,983.5,1
+12300000,915.5,1
+12400000,755.0,1
+12500000,877.3,1
+12600000,934.9,1
+12700000,938.3,1
+12800000,883.1,1
+12900000,938.6,1
+13000000,922.0,1
+13100000,955.2,1
+13200000,941.8,1
+13300000,964.1,1
+13400000,964.1,1
+13500000,970.4,1
+13600000,958.9,1
+13700000,937.8,1
+13800000,992.2,1
+13900000,983.4,1
+14000000,992.7,1
+0,29.2,3
+100000,117.3,3
+200000,472.4,3
+300000,719.6,3
+400000,586.0,3
+500000,661.4,3
+600000,601.4,3
+700000,632.4,3
+800000,745.8,3
+900000,570.8,3
+1000000,790.4,3
+1100000,773.1,3
+1200000,843.9,3
+1300000,866.8,3
+1400000,938.4,3
+1500000,887.3,3
+1600000,952.6,3
+1700000,948.8,3
+1800000,941.2,3
+1900000,937.0,3
+2000000,956.4,3
+2100000,922.2,3
+2200000,959.1,3
+2300000,967.1,3
+2400000,950.9,3
+2500000,941.1,3
+2600000,963.7,3
+2700000,963.3,3
+2800000,968.9,3
+2900000,973.4,3
+3000000,968.3,3
+3100000,954.7,3
+3200000,960.8,3
+3300000,962.9,3
+3400000,973.3,3
+3500000,969.2,3
+3600000,975.2,3
+3700000,956.6,3
+3800000,880.4,3
+3900000,967.5,3
+4000000,930.1,3
+4100000,980.6,3
+4200000,964.5,3
+4300000,440.5,3
+4400000,966.0,3
+4500000,977.7,3
+4600000,973.5,3
+4700000,968.6,3
+4800000,979.6,3
+4900000,972.8,3
+5000000,982.3,3
+5100000,960.2,3
+5200000,980.1,3
+5300000,980.0,3
+5400000,971.3,3
+5500000,712.5,3
+5600000,977.1,3
+5700000,975.0,3
+5800000,807.6,3
+5900000,958.3,3
+6000000,972.3,3
+6100000,961.6,3
+6200000,977.1,3
+6300000,844.1,3
+6400000,969.0,3
+6500000,952.7,3
+6600000,956.7,3
+6700000,976.4,3
+6800000,978.8,3
+6900000,449.8,3
+7000000,976.9,3
+7100000,980.3,3
+7200000,962.5,3
+7300000,973.1,3
+7400000,964.9,3
+7500000,977.2,3
+7600000,977.5,3
+7700000,977.7,3
+7800000,967.6,3
+7900000,977.3,3
+8000000,962.6,3
+8100000,980.8,3
+8200000,948.2,3
+8300000,963.2,3
+8400000,823.4,3
+8500000,971.8,3
+8600000,972.2,3
+8700000,974.2,3
+8800000,973.9,3
+8900000,975.3,3
+9000000,979.0,3
+9100000,960.7,3
+9200000,972.4,3
+9300000,982.8,3
+9400000,957.0,3
+9500000,484.3,3
+9600000,923.4,3
+9700000,968.2,3
+9800000,980.6,3
+9900000,978.3,3
+10000000,982.6,3
+10100000,978.6,3
+10200000,978.3,3
+10300000,964.2,3
+10400000,980.7,3
+10500000,984.1,3
+10600000,976.0,3
+10700000,968.6,3
+10800000,778.3,3
+10900000,975.8,3
+11000000,940.0,3
+11100000,584.9,3
+11200000,965.2,3
+11300000,964.9,3
+11400000,981.9,3
+11500000,976.5,3
+11600000,952.8,3
+11700000,979.5,3
+11800000,975.6,3
+11900000,962.7,3
+12000000,976.1,3
+12100000,813.0,3
+12200000,965.7,3
+12300000,970.2,3
+12400000,980.4,3
+12500000,983.0,3
+12600000,972.9,3
+12700000,973.5,3
+12800000,984.9,3
+12900000,981.7,3
+13000000,981.0,3
+13100000,971.7,3
+13200000,982.1,3
+13300000,985.2,3
+13400000,976.2,3
+13500000,984.8,3
+13600000,966.7,3
+13700000,979.6,3
+13800000,910.8,3
+13900000,984.9,3
diff --git a/results/dog-trot.csv b/results/dog-trot.csv
new file mode 100644
index 0000000..4539d99
--- /dev/null
+++ b/results/dog-trot.csv
@@ -0,0 +1,423 @@
+step,reward,seed
+0,6.2,2
+100000,5.0,2
+200000,89.2,2
+300000,205.5,2
+400000,237.0,2
+500000,66.2,2
+600000,283.3,2
+700000,319.7,2
+800000,400.3,2
+900000,342.8,2
+1000000,481.2,2
+1100000,552.0,2
+1200000,567.0,2
+1300000,698.5,2
+1400000,730.2,2
+1500000,712.6,2
+1600000,858.0,2
+1700000,886.4,2
+1800000,893.2,2
+1900000,889.4,2
+2000000,906.8,2
+2100000,905.4,2
+2200000,876.7,2
+2300000,380.4,2
+2400000,914.4,2
+2500000,915.5,2
+2600000,910.4,2
+2700000,901.0,2
+2800000,896.8,2
+2900000,917.4,2
+3000000,909.6,2
+3100000,917.9,2
+3200000,741.7,2
+3300000,926.8,2
+3400000,912.5,2
+3500000,924.6,2
+3600000,899.1,2
+3700000,908.7,2
+3800000,923.2,2
+3900000,933.3,2
+4000000,949.3,2
+4100000,902.4,2
+4200000,940.7,2
+4300000,935.4,2
+4400000,938.5,2
+4500000,932.6,2
+4600000,931.3,2
+4700000,904.1,2
+4800000,926.7,2
+4900000,931.0,2
+5000000,942.1,2
+5100000,941.5,2
+5200000,935.4,2
+5300000,940.0,2
+5400000,951.6,2
+5500000,940.0,2
+5600000,939.7,2
+5700000,930.3,2
+5800000,925.7,2
+5900000,946.4,2
+6000000,954.2,2
+6100000,936.6,2
+6200000,919.9,2
+6300000,956.2,2
+6400000,932.2,2
+6500000,945.2,2
+6600000,953.3,2
+6700000,940.0,2
+6800000,930.0,2
+6900000,946.0,2
+7000000,947.6,2
+7100000,954.0,2
+7200000,956.9,2
+7300000,951.3,2
+7400000,942.5,2
+7500000,949.4,2
+7600000,928.2,2
+7700000,939.3,2
+7800000,957.4,2
+7900000,955.5,2
+8000000,935.6,2
+8100000,948.1,2
+8200000,937.6,2
+8300000,944.0,2
+8400000,922.9,2
+8500000,952.7,2
+8600000,953.5,2
+8700000,937.5,2
+8800000,937.0,2
+8900000,898.2,2
+9000000,953.8,2
+9100000,941.4,2
+9200000,918.1,2
+9300000,956.3,2
+9400000,947.5,2
+9500000,958.3,2
+9600000,949.1,2
+9700000,959.5,2
+9800000,961.4,2
+9900000,948.2,2
+10000000,952.1,2
+10100000,957.1,2
+10200000,960.9,2
+10300000,945.0,2
+10400000,965.7,2
+10500000,952.5,2
+10600000,948.6,2
+10700000,967.1,2
+10800000,955.0,2
+10900000,955.2,2
+11000000,961.3,2
+11100000,927.4,2
+11200000,958.4,2
+11300000,952.8,2
+11400000,950.7,2
+11500000,950.9,2
+11600000,957.5,2
+11700000,934.0,2
+11800000,969.7,2
+11900000,953.5,2
+12000000,936.9,2
+12100000,956.4,2
+12200000,952.2,2
+12300000,952.5,2
+12400000,959.4,2
+12500000,969.5,2
+12600000,944.9,2
+12700000,962.5,2
+12800000,962.3,2
+12900000,957.5,2
+13000000,962.9,2
+13100000,954.2,2
+13200000,946.5,2
+13300000,955.7,2
+13400000,962.0,2
+13500000,965.3,2
+13600000,962.2,2
+13700000,951.1,2
+13800000,960.5,2
+13900000,946.2,2
+14000000,923.5,2
+0,7.9,1
+100000,10.4,1
+200000,97.2,1
+300000,100.9,1
+400000,180.2,1
+500000,319.2,1
+600000,330.5,1
+700000,385.9,1
+800000,55.9,1
+900000,364.9,1
+1000000,512.7,1
+1100000,618.9,1
+1200000,597.0,1
+1300000,577.6,1
+1400000,616.0,1
+1500000,775.3,1
+1600000,712.2,1
+1700000,800.4,1
+1800000,861.5,1
+1900000,860.2,1
+2000000,854.1,1
+2100000,844.9,1
+2200000,857.0,1
+2300000,886.9,1
+2400000,871.7,1
+2500000,883.1,1
+2600000,857.4,1
+2700000,901.9,1
+2800000,903.4,1
+2900000,919.1,1
+3000000,879.8,1
+3100000,933.1,1
+3200000,927.0,1
+3300000,941.1,1
+3400000,936.4,1
+3500000,946.8,1
+3600000,931.9,1
+3700000,964.8,1
+3800000,945.9,1
+3900000,957.7,1
+4000000,955.6,1
+4100000,972.4,1
+4200000,955.1,1
+4300000,944.0,1
+4400000,965.0,1
+4500000,949.7,1
+4600000,962.7,1
+4700000,967.8,1
+4800000,969.2,1
+4900000,951.2,1
+5000000,956.1,1
+5100000,950.2,1
+5200000,969.7,1
+5300000,963.2,1
+5400000,954.2,1
+5500000,952.0,1
+5600000,963.0,1
+5700000,949.1,1
+5800000,958.3,1
+5900000,967.1,1
+6000000,948.7,1
+6100000,965.8,1
+6200000,962.0,1
+6300000,950.3,1
+6400000,934.8,1
+6500000,945.3,1
+6600000,959.1,1
+6700000,963.5,1
+6800000,963.2,1
+6900000,955.0,1
+7000000,963.9,1
+7100000,981.1,1
+7200000,959.5,1
+7300000,970.3,1
+7400000,962.3,1
+7500000,952.0,1
+7600000,970.2,1
+7700000,961.3,1
+7800000,950.9,1
+7900000,962.9,1
+8000000,970.2,1
+8100000,957.1,1
+8200000,968.6,1
+8300000,957.3,1
+8400000,970.4,1
+8500000,967.5,1
+8600000,946.5,1
+8700000,985.0,1
+8800000,962.9,1
+8900000,963.0,1
+9000000,979.4,1
+9100000,974.5,1
+9200000,967.1,1
+9300000,962.8,1
+9400000,953.4,1
+9500000,962.3,1
+9600000,964.7,1
+9700000,978.4,1
+9800000,972.7,1
+9900000,978.8,1
+10000000,980.2,1
+10100000,967.4,1
+10200000,956.2,1
+10300000,973.3,1
+10400000,967.1,1
+10500000,946.9,1
+10600000,965.0,1
+10700000,977.9,1
+10800000,965.2,1
+10900000,962.8,1
+11000000,969.8,1
+11100000,967.3,1
+11200000,961.1,1
+11300000,976.8,1
+11400000,971.7,1
+11500000,980.8,1
+11600000,977.5,1
+11700000,970.3,1
+11800000,976.3,1
+11900000,984.2,1
+12000000,956.0,1
+12100000,968.9,1
+12200000,972.5,1
+12300000,970.6,1
+12400000,978.7,1
+12500000,970.5,1
+12600000,969.9,1
+12700000,976.4,1
+12800000,985.7,1
+12900000,974.6,1
+13000000,953.3,1
+13100000,971.4,1
+13200000,979.2,1
+13300000,979.0,1
+13400000,963.3,1
+13500000,942.2,1
+13600000,976.3,1
+13700000,972.2,1
+13800000,967.1,1
+13900000,978.7,1
+14000000,977.7,1
+0,7.8,3
+100000,21.3,3
+200000,26.6,3
+300000,110.6,3
+400000,27.2,3
+500000,11.3,3
+600000,18.4,3
+700000,307.3,3
+800000,348.9,3
+900000,483.2,3
+1000000,506.2,3
+1100000,560.3,3
+1200000,658.6,3
+1300000,620.5,3
+1400000,646.9,3
+1500000,829.0,3
+1600000,840.5,3
+1700000,859.7,3
+1800000,857.1,3
+1900000,855.7,3
+2000000,892.3,3
+2100000,873.9,3
+2200000,886.7,3
+2300000,860.6,3
+2400000,875.2,3
+2500000,908.1,3
+2600000,912.8,3
+2700000,918.9,3
+2800000,912.7,3
+2900000,839.0,3
+3000000,909.8,3
+3100000,901.0,3
+3200000,917.7,3
+3300000,835.5,3
+3400000,930.9,3
+3500000,573.2,3
+3600000,822.6,3
+3700000,914.4,3
+3800000,922.1,3
+3900000,913.6,3
+4000000,935.8,3
+4100000,932.7,3
+4200000,863.2,3
+4300000,659.9,3
+4400000,943.4,3
+4500000,904.9,3
+4600000,915.3,3
+4700000,936.5,3
+4800000,920.8,3
+4900000,946.6,3
+5000000,951.0,3
+5100000,950.7,3
+5200000,935.5,3
+5300000,865.6,3
+5400000,839.9,3
+5500000,880.2,3
+5600000,861.9,3
+5700000,947.2,3
+5800000,933.2,3
+5900000,956.0,3
+6000000,854.7,3
+6100000,587.7,3
+6200000,966.4,3
+6300000,927.8,3
+6400000,966.6,3
+6500000,959.3,3
+6600000,940.5,3
+6700000,960.0,3
+6800000,879.2,3
+6900000,941.0,3
+7000000,856.5,3
+7100000,965.0,3
+7200000,813.5,3
+7300000,851.1,3
+7400000,674.0,3
+7500000,952.5,3
+7600000,859.0,3
+7700000,808.6,3
+7800000,914.9,3
+7900000,944.1,3
+8000000,957.2,3
+8100000,669.0,3
+8200000,954.2,3
+8300000,860.4,3
+8400000,958.5,3
+8500000,768.2,3
+8600000,933.6,3
+8700000,951.7,3
+8800000,920.4,3
+8900000,854.2,3
+9000000,958.2,3
+9100000,961.0,3
+9200000,950.7,3
+9300000,956.1,3
+9400000,954.5,3
+9500000,955.9,3
+9600000,954.4,3
+9700000,913.0,3
+9800000,961.0,3
+9900000,923.8,3
+10000000,946.0,3
+10100000,932.3,3
+10200000,930.7,3
+10300000,941.4,3
+10400000,950.2,3
+10500000,862.1,3
+10600000,964.4,3
+10700000,866.4,3
+10800000,931.8,3
+10900000,913.5,3
+11000000,946.8,3
+11100000,955.4,3
+11200000,964.0,3
+11300000,953.0,3
+11400000,950.3,3
+11500000,960.0,3
+11600000,963.3,3
+11700000,938.7,3
+11800000,953.4,3
+11900000,952.2,3
+12000000,937.9,3
+12100000,932.7,3
+12200000,915.5,3
+12300000,949.4,3
+12400000,941.5,3
+12500000,920.2,3
+12600000,943.7,3
+12700000,954.1,3
+12800000,960.1,3
+12900000,964.4,3
+13000000,915.6,3
+13100000,962.2,3
+13200000,940.1,3
+13300000,953.0,3
+13400000,955.3,3
+13500000,954.0,3
+13600000,945.5,3
+13700000,944.8,3
+13800000,954.9,3
+13900000,957.2,3
diff --git a/results/dog-walk.csv b/results/dog-walk.csv
new file mode 100644
index 0000000..8835a29
--- /dev/null
+++ b/results/dog-walk.csv
@@ -0,0 +1,423 @@
+step,reward,seed
+0,6.7,2
+100000,36.2,2
+200000,110.3,2
+300000,349.0,2
+400000,453.6,2
+500000,554.0,2
+600000,568.2,2
+700000,692.7,2
+800000,714.1,2
+900000,793.3,2
+1000000,787.0,2
+1100000,851.3,2
+1200000,908.4,2
+1300000,926.6,2
+1400000,930.1,2
+1500000,943.3,2
+1600000,942.4,2
+1700000,943.1,2
+1800000,946.8,2
+1900000,944.2,2
+2000000,929.0,2
+2100000,946.9,2
+2200000,952.9,2
+2300000,958.0,2
+2400000,959.7,2
+2500000,953.5,2
+2600000,952.3,2
+2700000,962.8,2
+2800000,967.8,2
+2900000,953.7,2
+3000000,961.7,2
+3100000,955.1,2
+3200000,957.8,2
+3300000,963.7,2
+3400000,959.6,2
+3500000,952.4,2
+3600000,950.5,2
+3700000,959.2,2
+3800000,938.9,2
+3900000,925.2,2
+4000000,964.3,2
+4100000,960.8,2
+4200000,947.3,2
+4300000,942.3,2
+4400000,945.0,2
+4500000,940.8,2
+4600000,953.9,2
+4700000,941.2,2
+4800000,949.9,2
+4900000,971.4,2
+5000000,967.5,2
+5100000,958.8,2
+5200000,965.3,2
+5300000,915.7,2
+5400000,950.1,2
+5500000,959.4,2
+5600000,962.4,2
+5700000,967.3,2
+5800000,938.5,2
+5900000,949.7,2
+6000000,960.9,2
+6100000,955.5,2
+6200000,950.7,2
+6300000,948.2,2
+6400000,943.8,2
+6500000,967.4,2
+6600000,948.3,2
+6700000,953.7,2
+6800000,962.2,2
+6900000,952.9,2
+7000000,964.3,2
+7100000,956.8,2
+7200000,963.3,2
+7300000,967.1,2
+7400000,956.6,2
+7500000,949.5,2
+7600000,965.8,2
+7700000,944.2,2
+7800000,956.7,2
+7900000,946.9,2
+8000000,961.6,2
+8100000,957.2,2
+8200000,965.6,2
+8300000,951.5,2
+8400000,958.6,2
+8500000,957.8,2
+8600000,960.7,2
+8700000,961.7,2
+8800000,957.6,2
+8900000,957.9,2
+9000000,959.1,2
+9100000,962.0,2
+9200000,963.9,2
+9300000,962.9,2
+9400000,963.2,2
+9500000,936.8,2
+9600000,940.8,2
+9700000,972.9,2
+9800000,971.8,2
+9900000,963.8,2
+10000000,967.0,2
+10100000,965.1,2
+10200000,960.4,2
+10300000,957.5,2
+10400000,968.4,2
+10500000,953.7,2
+10600000,952.9,2
+10700000,966.9,2
+10800000,937.2,2
+10900000,958.9,2
+11000000,966.1,2
+11100000,947.8,2
+11200000,971.9,2
+11300000,966.9,2
+11400000,967.4,2
+11500000,948.2,2
+11600000,967.8,2
+11700000,973.5,2
+11800000,959.3,2
+11900000,963.5,2
+12000000,975.1,2
+12100000,959.8,2
+12200000,978.4,2
+12300000,978.5,2
+12400000,962.4,2
+12500000,971.9,2
+12600000,955.3,2
+12700000,853.4,2
+12800000,980.0,2
+12900000,973.4,2
+13000000,972.9,2
+13100000,968.9,2
+13200000,954.2,2
+13300000,967.7,2
+13400000,973.5,2
+13500000,967.2,2
+13600000,962.9,2
+13700000,964.6,2
+13800000,975.4,2
+13900000,967.5,2
+14000000,976.8,2
+0,8.8,1
+100000,48.6,1
+200000,232.3,1
+300000,334.4,1
+400000,405.7,1
+500000,439.7,1
+600000,658.5,1
+700000,628.2,1
+800000,638.0,1
+900000,558.6,1
+1000000,632.0,1
+1100000,831.4,1
+1200000,908.1,1
+1300000,918.1,1
+1400000,916.3,1
+1500000,933.3,1
+1600000,939.8,1
+1700000,949.8,1
+1800000,949.4,1
+1900000,944.2,1
+2000000,946.3,1
+2100000,942.1,1
+2200000,949.7,1
+2300000,941.6,1
+2400000,940.1,1
+2500000,935.3,1
+2600000,948.7,1
+2700000,946.6,1
+2800000,942.8,1
+2900000,945.0,1
+3000000,955.1,1
+3100000,953.6,1
+3200000,944.2,1
+3300000,938.2,1
+3400000,958.8,1
+3500000,947.3,1
+3600000,965.5,1
+3700000,963.9,1
+3800000,946.6,1
+3900000,962.9,1
+4000000,919.9,1
+4100000,941.4,1
+4200000,931.4,1
+4300000,943.9,1
+4400000,913.2,1
+4500000,946.0,1
+4600000,940.2,1
+4700000,954.5,1
+4800000,954.9,1
+4900000,951.3,1
+5000000,899.5,1
+5100000,958.9,1
+5200000,847.1,1
+5300000,960.7,1
+5400000,957.8,1
+5500000,952.6,1
+5600000,888.1,1
+5700000,960.3,1
+5800000,943.0,1
+5900000,950.2,1
+6000000,972.5,1
+6100000,961.5,1
+6200000,952.3,1
+6300000,955.0,1
+6400000,956.6,1
+6500000,892.8,1
+6600000,940.3,1
+6700000,959.9,1
+6800000,967.9,1
+6900000,942.4,1
+7000000,865.7,1
+7100000,962.5,1
+7200000,969.7,1
+7300000,966.4,1
+7400000,958.6,1
+7500000,970.3,1
+7600000,962.0,1
+7700000,962.8,1
+7800000,965.4,1
+7900000,973.4,1
+8000000,956.3,1
+8100000,964.1,1
+8200000,958.2,1
+8300000,975.6,1
+8400000,967.2,1
+8500000,970.5,1
+8600000,914.2,1
+8700000,967.2,1
+8800000,969.6,1
+8900000,968.1,1
+9000000,968.7,1
+9100000,970.6,1
+9200000,950.9,1
+9300000,968.2,1
+9400000,969.6,1
+9500000,967.4,1
+9600000,963.4,1
+9700000,975.4,1
+9800000,969.0,1
+9900000,975.9,1
+10000000,961.9,1
+10100000,969.5,1
+10200000,969.4,1
+10300000,944.1,1
+10400000,969.7,1
+10500000,973.0,1
+10600000,962.8,1
+10700000,977.7,1
+10800000,938.5,1
+10900000,967.4,1
+11000000,971.8,1
+11100000,964.1,1
+11200000,969.4,1
+11300000,962.1,1
+11400000,968.8,1
+11500000,975.6,1
+11600000,971.4,1
+11700000,968.2,1
+11800000,973.3,1
+11900000,913.7,1
+12000000,970.3,1
+12100000,910.0,1
+12200000,972.4,1
+12300000,975.6,1
+12400000,959.0,1
+12500000,955.0,1
+12600000,948.2,1
+12700000,965.9,1
+12800000,953.1,1
+12900000,967.3,1
+13000000,960.5,1
+13100000,921.0,1
+13200000,963.8,1
+13300000,950.3,1
+13400000,965.3,1
+13500000,958.4,1
+13600000,966.4,1
+13700000,966.6,1
+13800000,939.8,1
+13900000,953.3,1
+14000000,974.5,1
+0,8.9,3
+100000,19.1,3
+200000,280.5,3
+300000,427.0,3
+400000,436.3,3
+500000,454.2,3
+600000,707.7,3
+700000,43.9,3
+800000,31.1,3
+900000,139.8,3
+1000000,62.8,3
+1100000,95.6,3
+1200000,98.9,3
+1300000,54.6,3
+1400000,28.9,3
+1500000,284.0,3
+1600000,93.2,3
+1700000,32.3,3
+1800000,165.3,3
+1900000,732.5,3
+2000000,779.7,3
+2100000,373.8,3
+2200000,604.4,3
+2300000,49.0,3
+2400000,633.5,3
+2500000,600.5,3
+2600000,290.0,3
+2700000,673.8,3
+2800000,403.4,3
+2900000,529.8,3
+3000000,642.2,3
+3100000,937.0,3
+3200000,941.1,3
+3300000,957.6,3
+3400000,960.2,3
+3500000,941.5,3
+3600000,960.7,3
+3700000,946.1,3
+3800000,957.0,3
+3900000,884.3,3
+4000000,945.3,3
+4100000,951.3,3
+4200000,894.8,3
+4300000,942.8,3
+4400000,927.1,3
+4500000,939.2,3
+4600000,850.0,3
+4700000,963.8,3
+4800000,869.1,3
+4900000,843.2,3
+5000000,951.1,3
+5100000,943.7,3
+5200000,868.1,3
+5300000,950.3,3
+5400000,939.0,3
+5500000,948.4,3
+5600000,949.9,3
+5700000,966.0,3
+5800000,961.5,3
+5900000,957.4,3
+6000000,962.6,3
+6100000,955.3,3
+6200000,973.1,3
+6300000,953.4,3
+6400000,974.1,3
+6500000,956.1,3
+6600000,936.1,3
+6700000,950.3,3
+6800000,959.8,3
+6900000,966.7,3
+7000000,956.4,3
+7100000,915.2,3
+7200000,967.0,3
+7300000,957.0,3
+7400000,950.9,3
+7500000,847.8,3
+7600000,935.4,3
+7700000,957.6,3
+7800000,957.3,3
+7900000,950.7,3
+8000000,956.6,3
+8100000,967.3,3
+8200000,960.6,3
+8300000,944.6,3
+8400000,969.1,3
+8500000,962.0,3
+8600000,925.1,3
+8700000,937.0,3
+8800000,955.5,3
+8900000,949.2,3
+9000000,947.5,3
+9100000,957.1,3
+9200000,953.5,3
+9300000,960.5,3
+9400000,938.6,3
+9500000,941.7,3
+9600000,972.4,3
+9700000,960.8,3
+9800000,954.9,3
+9900000,958.4,3
+10000000,974.8,3
+10100000,942.1,3
+10200000,957.4,3
+10300000,925.6,3
+10400000,948.5,3
+10500000,971.0,3
+10600000,953.8,3
+10700000,966.3,3
+10800000,934.9,3
+10900000,927.7,3
+11000000,945.7,3
+11100000,943.2,3
+11200000,954.8,3
+11300000,932.0,3
+11400000,948.3,3
+11500000,953.2,3
+11600000,938.5,3
+11700000,972.1,3
+11800000,934.8,3
+11900000,957.9,3
+12000000,943.5,3
+12100000,964.1,3
+12200000,958.2,3
+12300000,946.5,3
+12400000,949.8,3
+12500000,964.8,3
+12600000,954.4,3
+12700000,968.5,3
+12800000,974.0,3
+12900000,967.8,3
+13000000,951.8,3
+13100000,977.3,3
+13200000,952.3,3
+13300000,946.5,3
+13400000,964.6,3
+13500000,968.7,3
+13600000,953.3,3
+13700000,916.5,3
+13800000,954.1,3
+13900000,925.6,3
diff --git a/results/finger-spin.csv b/results/finger-spin.csv
new file mode 100644
index 0000000..88d427f
--- /dev/null
+++ b/results/finger-spin.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,0.0,3
+100000,985.6,3
+200000,978.6,3
+300000,980.3,3
+400000,987.5,3
+500000,982.7,3
+600000,982.5,3
+700000,977.8,3
+800000,985.1,3
+900000,983.7,3
+1000000,984.8,3
+1100000,987.0,3
+1200000,987.4,3
+1300000,986.2,3
+1400000,984.7,3
+1500000,987.5,3
+1600000,981.9,3
+1700000,987.0,3
+1800000,983.2,3
+1900000,986.8,3
+2000000,988.2,3
+2100000,988.3,3
+2200000,985.7,3
+2300000,985.6,3
+2400000,985.3,3
+2500000,988.1,3
+2600000,988.0,3
+2700000,989.8,3
+2800000,983.4,3
+2900000,983.7,3
+3000000,990.1,3
+3100000,981.7,3
+3200000,985.9,3
+3300000,988.5,3
+3400000,988.3,3
+3500000,988.7,3
+3600000,983.8,3
+3700000,986.5,3
+3800000,986.8,3
+3900000,987.9,3
+4000000,991.0,3
+0,0.0,2
+100000,937.3,2
+200000,980.7,2
+300000,982.4,2
+400000,987.0,2
+500000,982.1,2
+600000,983.1,2
+700000,984.1,2
+800000,985.1,2
+900000,989.9,2
+1000000,987.0,2
+1100000,990.3,2
+1200000,986.5,2
+1300000,987.4,2
+1400000,986.9,2
+1500000,990.9,2
+1600000,986.0,2
+1700000,987.3,2
+1800000,988.5,2
+1900000,987.5,2
+2000000,989.7,2
+2100000,989.1,2
+2200000,987.0,2
+2300000,985.8,2
+2400000,988.1,2
+2500000,989.9,2
+2600000,989.4,2
+2700000,989.5,2
+2800000,989.7,2
+2900000,991.4,2
+3000000,991.3,2
+3100000,982.8,2
+3200000,987.5,2
+3300000,990.3,2
+3400000,987.9,2
+3500000,991.0,2
+3600000,991.4,2
+3700000,985.3,2
+3800000,993.2,2
+3900000,989.5,2
+4000000,989.5,2
+0,0.0,1
+100000,919.6,1
+200000,963.9,1
+300000,979.2,1
+400000,987.5,1
+500000,989.1,1
+600000,985.3,1
+700000,985.9,1
+800000,987.1,1
+900000,990.3,1
+1000000,985.4,1
+1100000,987.5,1
+1200000,889.4,1
+1300000,988.7,1
+1400000,989.3,1
+1500000,989.0,1
+1600000,989.1,1
+1700000,989.5,1
+1800000,986.4,1
+1900000,989.5,1
+2000000,989.2,1
+2100000,904.6,1
+2200000,988.5,1
+2300000,987.3,1
+2400000,986.2,1
+2500000,987.8,1
+2600000,990.4,1
+2700000,989.6,1
+2800000,992.7,1
+2900000,989.1,1
+3000000,989.7,1
+3100000,989.6,1
+3200000,989.5,1
+3300000,988.9,1
+3400000,989.3,1
+3500000,989.7,1
+3600000,989.0,1
+3700000,990.8,1
+3800000,987.7,1
+3900000,984.5,1
+4000000,991.1,1
diff --git a/results/finger-turn-easy.csv b/results/finger-turn-easy.csv
new file mode 100644
index 0000000..cec2700
--- /dev/null
+++ b/results/finger-turn-easy.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,204.2,3
+100000,485.7,3
+200000,684.7,3
+300000,883.2,3
+400000,823.5,3
+500000,802.0,3
+600000,936.5,3
+700000,883.6,3
+800000,885.3,3
+900000,888.5,3
+1000000,982.1,3
+1100000,984.1,3
+1200000,976.8,3
+1300000,982.3,3
+1400000,978.5,3
+1500000,984.9,3
+1600000,978.1,3
+1700000,981.6,3
+1800000,990.8,3
+1900000,983.6,3
+2000000,980.7,3
+2100000,986.6,3
+2200000,978.9,3
+2300000,979.0,3
+2400000,984.7,3
+2500000,989.3,3
+2600000,980.6,3
+2700000,989.8,3
+2800000,980.7,3
+2900000,986.1,3
+3000000,979.8,3
+3100000,988.5,3
+3200000,986.1,3
+3300000,975.6,3
+3400000,982.3,3
+3500000,986.8,3
+3600000,987.9,3
+3700000,988.8,3
+3800000,892.9,3
+3900000,982.9,3
+4000000,983.4,3
+0,213.4,2
+100000,496.2,2
+200000,743.6,2
+300000,881.2,2
+400000,985.9,2
+500000,885.1,2
+600000,874.6,2
+700000,967.1,2
+800000,958.3,2
+900000,884.9,2
+1000000,977.2,2
+1100000,967.1,2
+1200000,876.8,2
+1300000,979.6,2
+1400000,982.2,2
+1500000,973.1,2
+1600000,983.1,2
+1700000,988.1,2
+1800000,986.3,2
+1900000,892.7,2
+2000000,984.6,2
+2100000,988.6,2
+2200000,981.3,2
+2300000,983.8,2
+2400000,987.5,2
+2500000,980.2,2
+2600000,978.2,2
+2700000,984.1,2
+2800000,982.7,2
+2900000,981.7,2
+3000000,985.1,2
+3100000,983.7,2
+3200000,983.1,2
+3300000,987.2,2
+3400000,992.5,2
+3500000,983.7,2
+3600000,984.5,2
+3700000,984.9,2
+3800000,979.6,2
+3900000,986.1,2
+4000000,977.3,2
+0,100.0,1
+100000,482.0,1
+200000,875.7,1
+300000,937.6,1
+400000,878.5,1
+500000,876.9,1
+600000,974.3,1
+700000,878.9,1
+800000,975.9,1
+900000,977.1,1
+1000000,796.2,1
+1100000,840.3,1
+1200000,978.2,1
+1300000,869.6,1
+1400000,985.5,1
+1500000,978.6,1
+1600000,977.0,1
+1700000,984.4,1
+1800000,795.2,1
+1900000,984.3,1
+2000000,977.1,1
+2100000,887.9,1
+2200000,977.8,1
+2300000,989.3,1
+2400000,979.1,1
+2500000,984.7,1
+2600000,986.4,1
+2700000,979.1,1
+2800000,979.5,1
+2900000,983.9,1
+3000000,881.8,1
+3100000,937.7,1
+3200000,898.9,1
+3300000,981.1,1
+3400000,978.7,1
+3500000,976.3,1
+3600000,980.7,1
+3700000,885.5,1
+3800000,961.8,1
+3900000,991.0,1
+4000000,986.7,1
diff --git a/results/finger-turn-hard.csv b/results/finger-turn-hard.csv
new file mode 100644
index 0000000..42faa61
--- /dev/null
+++ b/results/finger-turn-hard.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,0.0,3
+100000,386.5,3
+200000,775.1,3
+300000,378.8,3
+400000,560.8,3
+500000,871.4,3
+600000,833.3,3
+700000,882.6,3
+800000,971.9,3
+900000,970.9,3
+1000000,979.0,3
+1100000,966.7,3
+1200000,974.2,3
+1300000,982.5,3
+1400000,977.9,3
+1500000,981.5,3
+1600000,971.0,3
+1700000,978.5,3
+1800000,968.7,3
+1900000,978.4,3
+2000000,979.1,3
+2100000,982.5,3
+2200000,887.0,3
+2300000,976.3,3
+2400000,984.3,3
+2500000,968.1,3
+2600000,976.5,3
+2700000,985.5,3
+2800000,984.8,3
+2900000,960.4,3
+3000000,976.5,3
+3100000,884.7,3
+3200000,983.9,3
+3300000,973.1,3
+3400000,989.6,3
+3500000,983.1,3
+3600000,983.3,3
+3700000,984.5,3
+3800000,887.5,3
+3900000,978.3,3
+4000000,979.8,3
+0,124.3,2
+100000,580.4,2
+200000,742.9,2
+300000,775.5,2
+400000,963.0,2
+500000,882.1,2
+600000,790.5,2
+700000,863.3,2
+800000,874.7,2
+900000,966.0,2
+1000000,959.5,2
+1100000,867.4,2
+1200000,971.7,2
+1300000,890.1,2
+1400000,882.1,2
+1500000,973.5,2
+1600000,978.5,2
+1700000,982.5,2
+1800000,977.5,2
+1900000,888.3,2
+2000000,791.3,2
+2100000,987.0,2
+2200000,976.6,2
+2300000,988.3,2
+2400000,980.4,2
+2500000,977.5,2
+2600000,983.8,2
+2700000,883.3,2
+2800000,954.4,2
+2900000,968.3,2
+3000000,981.5,2
+3100000,979.5,2
+3200000,973.2,2
+3300000,981.1,2
+3400000,989.2,2
+3500000,982.4,2
+3600000,984.7,2
+3700000,982.8,2
+3800000,777.8,2
+3900000,982.1,2
+4000000,974.8,2
+0,100.0,1
+100000,484.0,1
+200000,873.3,1
+300000,960.9,1
+400000,877.6,1
+500000,875.3,1
+600000,904.3,1
+700000,934.2,1
+800000,972.2,1
+900000,956.4,1
+1000000,985.3,1
+1100000,978.4,1
+1200000,979.2,1
+1300000,957.6,1
+1400000,932.7,1
+1500000,978.6,1
+1600000,901.3,1
+1700000,978.9,1
+1800000,980.7,1
+1900000,805.1,1
+2000000,983.2,1
+2100000,969.9,1
+2200000,970.1,1
+2300000,985.9,1
+2400000,983.3,1
+2500000,969.7,1
+2600000,983.6,1
+2700000,979.4,1
+2800000,964.6,1
+2900000,878.7,1
+3000000,979.9,1
+3100000,979.5,1
+3200000,984.6,1
+3300000,964.9,1
+3400000,882.0,1
+3500000,884.1,1
+3600000,962.2,1
+3700000,984.7,1
+3800000,976.8,1
+3900000,983.2,1
+4000000,976.8,1
diff --git a/results/fish-swim.csv b/results/fish-swim.csv
new file mode 100644
index 0000000..bb3a0e3
--- /dev/null
+++ b/results/fish-swim.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,69.9,3
+100000,176.3,3
+200000,218.2,3
+300000,442.7,3
+400000,519.2,3
+500000,579.9,3
+600000,587.9,3
+700000,593.8,3
+800000,667.7,3
+900000,692.8,3
+1000000,606.0,3
+1100000,248.5,3
+1200000,654.8,3
+1300000,583.5,3
+1400000,801.5,3
+1500000,729.2,3
+1600000,819.3,3
+1700000,746.4,3
+1800000,807.1,3
+1900000,649.5,3
+2000000,698.5,3
+2100000,786.0,3
+2200000,808.3,3
+2300000,799.8,3
+2400000,672.1,3
+2500000,825.8,3
+2600000,809.9,3
+2700000,772.8,3
+2800000,859.2,3
+2900000,800.4,3
+3000000,801.7,3
+3100000,751.2,3
+3200000,809.2,3
+3300000,801.0,3
+3400000,818.1,3
+3500000,798.8,3
+3600000,833.9,3
+3700000,768.6,3
+3800000,767.4,3
+3900000,814.8,3
+4000000,847.5,3
+0,64.3,2
+100000,265.6,2
+200000,599.4,2
+300000,737.3,2
+400000,689.9,2
+500000,632.1,2
+600000,675.2,2
+700000,682.6,2
+800000,754.6,2
+900000,685.2,2
+1000000,695.1,2
+1100000,711.1,2
+1200000,674.7,2
+1300000,757.2,2
+1400000,807.4,2
+1500000,768.0,2
+1600000,809.5,2
+1700000,774.2,2
+1800000,796.1,2
+1900000,790.1,2
+2000000,680.9,2
+2100000,800.5,2
+2200000,757.3,2
+2300000,748.3,2
+2400000,761.8,2
+2500000,738.6,2
+2600000,784.0,2
+2700000,799.0,2
+2800000,714.5,2
+2900000,790.1,2
+3000000,666.9,2
+3100000,719.4,2
+3200000,782.5,2
+3300000,633.4,2
+3400000,832.3,2
+3500000,806.9,2
+3600000,807.3,2
+3700000,795.0,2
+3800000,758.3,2
+3900000,791.5,2
+4000000,780.3,2
+0,69.4,1
+100000,173.9,1
+200000,319.0,1
+300000,633.3,1
+400000,594.6,1
+500000,618.7,1
+600000,693.6,1
+700000,622.7,1
+800000,679.1,1
+900000,771.3,1
+1000000,770.1,1
+1100000,747.8,1
+1200000,762.2,1
+1300000,735.6,1
+1400000,791.9,1
+1500000,655.1,1
+1600000,757.4,1
+1700000,717.6,1
+1800000,731.8,1
+1900000,799.8,1
+2000000,739.2,1
+2100000,775.2,1
+2200000,685.6,1
+2300000,804.8,1
+2400000,796.0,1
+2500000,828.5,1
+2600000,761.6,1
+2700000,731.9,1
+2800000,790.6,1
+2900000,776.7,1
+3000000,801.7,1
+3100000,701.2,1
+3200000,829.3,1
+3300000,739.2,1
+3400000,761.5,1
+3500000,784.7,1
+3600000,842.1,1
+3700000,697.2,1
+3800000,740.1,1
+3900000,833.6,1
+4000000,655.1,1
diff --git a/results/hopper-hop-backwards.csv b/results/hopper-hop-backwards.csv
new file mode 100644
index 0000000..0b50cc3
--- /dev/null
+++ b/results/hopper-hop-backwards.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,0.0,2
+100000,151.7,2
+200000,883.7,2
+300000,863.9,2
+400000,937.2,2
+500000,943.5,2
+600000,950.6,2
+700000,952.6,2
+800000,938.9,2
+900000,951.9,2
+1000000,914.9,2
+1100000,852.5,2
+1200000,953.9,2
+1300000,958.1,2
+1400000,957.0,2
+1500000,954.4,2
+1600000,939.3,2
+1700000,958.9,2
+1800000,944.4,2
+1900000,961.2,2
+2000000,959.5,2
+2100000,858.9,2
+2200000,965.4,2
+2300000,861.8,2
+2400000,965.5,2
+2500000,960.5,2
+2600000,948.7,2
+2700000,955.6,2
+2800000,960.6,2
+2900000,958.3,2
+3000000,959.5,2
+3100000,959.3,2
+3200000,946.0,2
+3300000,957.8,2
+3400000,959.9,2
+3500000,960.0,2
+3600000,955.2,2
+3700000,962.5,2
+3800000,858.1,2
+3900000,960.6,2
+4000000,954.3,2
+0,0.0,1
+100000,2.6,1
+200000,657.0,1
+300000,749.7,1
+400000,942.7,1
+500000,936.7,1
+600000,955.2,1
+700000,959.7,1
+800000,856.8,1
+900000,954.4,1
+1000000,963.2,1
+1100000,962.8,1
+1200000,956.4,1
+1300000,956.7,1
+1400000,857.0,1
+1500000,958.5,1
+1600000,960.3,1
+1700000,965.0,1
+1800000,956.5,1
+1900000,959.0,1
+2000000,960.2,1
+2100000,962.1,1
+2200000,917.3,1
+2300000,960.2,1
+2400000,957.0,1
+2500000,955.8,1
+2600000,960.0,1
+2700000,962.2,1
+2800000,955.6,1
+2900000,954.1,1
+3000000,962.5,1
+3100000,958.8,1
+3200000,963.3,1
+3300000,863.4,1
+3400000,954.2,1
+3500000,957.9,1
+3600000,964.4,1
+3700000,963.7,1
+3800000,958.1,1
+3900000,962.8,1
+4000000,956.8,1
+0,0.0,3
+100000,664.5,3
+200000,734.0,3
+300000,759.2,3
+400000,630.0,3
+500000,792.2,3
+600000,810.2,3
+700000,818.8,3
+800000,813.8,3
+900000,826.4,3
+1000000,827.6,3
+1100000,837.7,3
+1200000,838.0,3
+1300000,844.1,3
+1400000,818.1,3
+1500000,836.3,3
+1600000,835.1,3
+1700000,849.2,3
+1800000,850.5,3
+1900000,894.8,3
+2000000,818.8,3
+2100000,887.3,3
+2200000,902.4,3
+2300000,898.9,3
+2400000,896.8,3
+2500000,909.2,3
+2600000,875.9,3
+2700000,898.9,3
+2800000,865.8,3
+2900000,868.3,3
+3000000,888.5,3
+3100000,873.7,3
+3200000,832.7,3
+3300000,885.5,3
+3400000,797.8,3
+3500000,885.0,3
+3600000,895.2,3
+3700000,873.5,3
+3800000,832.2,3
+3900000,790.9,3
+4000000,885.6,3
diff --git a/results/hopper-hop.csv b/results/hopper-hop.csv
new file mode 100644
index 0000000..7ea281f
--- /dev/null
+++ b/results/hopper-hop.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,0.0,3
+100000,0.7,3
+200000,145.7,3
+300000,191.9,3
+400000,210.3,3
+500000,267.9,3
+600000,354.9,3
+700000,361.6,3
+800000,353.3,3
+900000,372.0,3
+1000000,373.1,3
+1100000,357.7,3
+1200000,371.6,3
+1300000,362.0,3
+1400000,330.7,3
+1500000,358.0,3
+1600000,372.9,3
+1700000,376.9,3
+1800000,377.9,3
+1900000,372.6,3
+2000000,382.8,3
+2100000,369.7,3
+2200000,333.8,3
+2300000,366.2,3
+2400000,350.4,3
+2500000,370.0,3
+2600000,376.9,3
+2700000,386.5,3
+2800000,363.4,3
+2900000,432.9,3
+3000000,439.6,3
+3100000,454.1,3
+3200000,452.0,3
+3300000,563.0,3
+3400000,521.1,3
+3500000,532.9,3
+3600000,600.8,3
+3700000,589.0,3
+3800000,553.7,3
+3900000,603.3,3
+4000000,594.2,3
+0,0.0,2
+100000,14.0,2
+200000,215.8,2
+300000,316.3,2
+400000,346.3,2
+500000,306.3,2
+600000,364.2,2
+700000,280.9,2
+800000,365.3,2
+900000,376.3,2
+1000000,271.2,2
+1100000,330.9,2
+1200000,369.7,2
+1300000,376.4,2
+1400000,364.1,2
+1500000,379.7,2
+1600000,373.7,2
+1700000,377.3,2
+1800000,377.5,2
+1900000,383.7,2
+2000000,374.7,2
+2100000,328.6,2
+2200000,377.2,2
+2300000,317.7,2
+2400000,346.7,2
+2500000,383.1,2
+2600000,375.3,2
+2700000,383.4,2
+2800000,377.1,2
+2900000,385.2,2
+3000000,379.7,2
+3100000,377.7,2
+3200000,371.5,2
+3300000,303.0,2
+3400000,380.1,2
+3500000,307.2,2
+3600000,385.1,2
+3700000,380.1,2
+3800000,336.9,2
+3900000,318.2,2
+4000000,373.2,2
+0,0.0,1
+100000,26.2,1
+200000,138.4,1
+300000,271.0,1
+400000,299.7,1
+500000,335.6,1
+600000,354.2,1
+700000,362.2,1
+800000,328.4,1
+900000,247.7,1
+1000000,368.8,1
+1100000,373.6,1
+1200000,276.5,1
+1300000,364.7,1
+1400000,334.9,1
+1500000,302.1,1
+1600000,372.0,1
+1700000,357.4,1
+1800000,366.1,1
+1900000,370.0,1
+2000000,367.5,1
+2100000,375.8,1
+2200000,349.5,1
+2300000,378.7,1
+2400000,369.5,1
+2500000,370.0,1
+2600000,369.9,1
+2700000,372.7,1
+2800000,381.0,1
+2900000,379.5,1
+3000000,370.5,1
+3100000,362.6,1
+3200000,374.0,1
+3300000,343.5,1
+3400000,316.1,1
+3500000,365.2,1
+3600000,373.8,1
+3700000,365.0,1
+3800000,379.5,1
+3900000,378.0,1
+4000000,380.1,1
diff --git a/results/hopper-stand.csv b/results/hopper-stand.csv
new file mode 100644
index 0000000..ea8f49d
--- /dev/null
+++ b/results/hopper-stand.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,1.2,2
+100000,6.4,2
+200000,326.4,2
+300000,904.0,2
+400000,916.9,2
+500000,932.9,2
+600000,948.9,2
+700000,952.2,2
+800000,948.5,2
+900000,961.6,2
+1000000,950.6,2
+1100000,857.8,2
+1200000,956.3,2
+1300000,960.5,2
+1400000,963.9,2
+1500000,958.0,2
+1600000,961.1,2
+1700000,959.9,2
+1800000,957.6,2
+1900000,963.3,2
+2000000,960.3,2
+2100000,858.5,2
+2200000,966.3,2
+2300000,866.4,2
+2400000,961.3,2
+2500000,956.7,2
+2600000,955.7,2
+2700000,948.3,2
+2800000,958.1,2
+2900000,953.6,2
+3000000,959.7,2
+3100000,954.0,2
+3200000,944.0,2
+3300000,962.9,2
+3400000,961.5,2
+3500000,960.9,2
+3600000,960.7,2
+3700000,971.8,2
+3800000,865.2,2
+3900000,957.4,2
+4000000,951.4,2
+0,1.2,1
+100000,10.1,1
+200000,19.6,1
+300000,534.1,1
+400000,914.2,1
+500000,933.1,1
+600000,941.3,1
+700000,943.9,1
+800000,842.9,1
+900000,940.0,1
+1000000,954.1,1
+1100000,955.1,1
+1200000,947.4,1
+1300000,947.0,1
+1400000,754.6,1
+1500000,955.7,1
+1600000,946.6,1
+1700000,939.2,1
+1800000,954.9,1
+1900000,961.6,1
+2000000,950.7,1
+2100000,940.3,1
+2200000,859.4,1
+2300000,954.5,1
+2400000,954.9,1
+2500000,957.4,1
+2600000,945.0,1
+2700000,961.9,1
+2800000,962.4,1
+2900000,955.5,1
+3000000,959.0,1
+3100000,959.1,1
+3200000,964.6,1
+3300000,856.4,1
+3400000,964.4,1
+3500000,953.2,1
+3600000,962.7,1
+3700000,963.9,1
+3800000,958.6,1
+3900000,916.0,1
+4000000,954.1,1
+0,2.5,3
+100000,68.0,3
+200000,438.5,3
+300000,927.2,3
+400000,750.1,3
+500000,943.4,3
+600000,944.9,3
+700000,954.8,3
+800000,931.8,3
+900000,957.9,3
+1000000,958.3,3
+1100000,959.5,3
+1200000,962.2,3
+1300000,910.7,3
+1400000,940.7,3
+1500000,953.0,3
+1600000,963.7,3
+1700000,965.7,3
+1800000,957.0,3
+1900000,960.9,3
+2000000,951.3,3
+2100000,961.6,3
+2200000,957.2,3
+2300000,964.6,3
+2400000,968.2,3
+2500000,963.7,3
+2600000,955.2,3
+2700000,962.8,3
+2800000,961.6,3
+2900000,966.1,3
+3000000,962.7,3
+3100000,950.4,3
+3200000,959.6,3
+3300000,960.0,3
+3400000,866.6,3
+3500000,953.1,3
+3600000,955.5,3
+3700000,957.0,3
+3800000,961.6,3
+3900000,946.0,3
+4000000,958.9,3
diff --git a/results/humanoid-run.csv b/results/humanoid-run.csv
new file mode 100644
index 0000000..b7f149d
--- /dev/null
+++ b/results/humanoid-run.csv
@@ -0,0 +1,424 @@
+step,reward,seed
+0,1.1,3
+100000,1.1,3
+200000,1.2,3
+300000,31.5,3
+400000,111.0,3
+500000,110.7,3
+600000,85.3,3
+700000,126.5,3
+800000,153.6,3
+900000,178.8,3
+1000000,201.0,3
+1100000,219.7,3
+1200000,247.5,3
+1300000,254.9,3
+1400000,254.0,3
+1500000,261.1,3
+1600000,309.5,3
+1700000,306.0,3
+1800000,311.8,3
+1900000,322.5,3
+2000000,312.0,3
+2100000,331.3,3
+2200000,357.4,3
+2300000,359.0,3
+2400000,391.5,3
+2500000,382.9,3
+2600000,400.1,3
+2700000,445.6,3
+2800000,419.5,3
+2900000,450.8,3
+3000000,460.6,3
+3100000,450.0,3
+3200000,438.5,3
+3300000,468.8,3
+3400000,473.2,3
+3500000,501.0,3
+3600000,488.5,3
+3700000,521.3,3
+3800000,549.1,3
+3900000,529.4,3
+4000000,533.0,3
+4100000,521.6,3
+4200000,490.1,3
+4300000,548.1,3
+4400000,525.5,3
+4500000,573.4,3
+4600000,525.4,3
+4700000,527.2,3
+4800000,570.3,3
+4900000,560.0,3
+5000000,571.9,3
+5100000,593.0,3
+5200000,573.7,3
+5300000,577.4,3
+5400000,551.7,3
+5500000,566.1,3
+5600000,568.9,3
+5700000,590.3,3
+5800000,593.0,3
+5900000,599.8,3
+6000000,598.4,3
+6100000,570.4,3
+6200000,612.0,3
+6300000,625.8,3
+6400000,605.6,3
+6500000,579.7,3
+6600000,597.7,3
+6700000,610.7,3
+6800000,603.8,3
+6900000,618.5,3
+7000000,605.8,3
+7100000,554.1,3
+7200000,598.5,3
+7300000,603.2,3
+7400000,594.9,3
+7500000,623.1,3
+7600000,621.4,3
+7700000,640.7,3
+7800000,624.2,3
+7900000,605.8,3
+8000000,617.3,3
+8100000,653.7,3
+8200000,617.3,3
+8300000,637.3,3
+8400000,626.6,3
+8500000,648.1,3
+8600000,645.3,3
+8700000,654.9,3
+8800000,636.2,3
+8900000,620.5,3
+9000000,647.9,3
+9100000,593.9,3
+9200000,657.1,3
+9300000,573.6,3
+9400000,642.3,3
+9500000,610.5,3
+9600000,657.1,3
+9700000,656.1,3
+9800000,638.2,3
+9900000,611.1,3
+10000000,654.0,3
+10100000,632.8,3
+10200000,635.8,3
+10300000,668.0,3
+10400000,607.4,3
+10500000,642.1,3
+10600000,607.1,3
+10700000,670.4,3
+10800000,678.4,3
+10900000,657.4,3
+11000000,650.6,3
+11100000,661.7,3
+11200000,655.4,3
+11300000,685.4,3
+11400000,656.2,3
+11500000,627.6,3
+11600000,638.4,3
+11700000,672.5,3
+11800000,654.9,3
+11900000,654.4,3
+12000000,646.3,3
+12100000,646.7,3
+12200000,632.5,3
+12300000,653.8,3
+12400000,645.7,3
+12500000,625.3,3
+12600000,603.7,3
+12700000,620.8,3
+12800000,622.0,3
+12900000,654.2,3
+13000000,635.0,3
+13100000,648.9,3
+13200000,666.7,3
+13300000,695.9,3
+13400000,674.7,3
+13500000,683.6,3
+13600000,668.1,3
+13700000,653.0,3
+13800000,637.9,3
+13900000,663.6,3
+14000000,664.6,3
+0,0.9,2
+100000,1.6,2
+200000,1.2,2
+300000,80.8,2
+400000,93.4,2
+500000,118.1,2
+600000,131.0,2
+700000,117.6,2
+800000,163.8,2
+900000,174.3,2
+1000000,157.4,2
+1100000,193.0,2
+1200000,215.1,2
+1300000,226.2,2
+1400000,252.2,2
+1500000,264.0,2
+1600000,252.1,2
+1700000,246.5,2
+1800000,290.7,2
+1900000,295.9,2
+2000000,328.9,2
+2100000,348.3,2
+2200000,326.6,2
+2300000,375.4,2
+2400000,401.8,2
+2500000,393.3,2
+2600000,414.3,2
+2700000,443.0,2
+2800000,446.9,2
+2900000,423.2,2
+3000000,444.6,2
+3100000,434.1,2
+3200000,444.3,2
+3300000,439.9,2
+3400000,440.2,2
+3500000,441.5,2
+3600000,445.8,2
+3700000,472.0,2
+3800000,469.2,2
+3900000,457.1,2
+4000000,459.8,2
+4100000,453.8,2
+4200000,463.7,2
+4300000,398.5,2
+4400000,494.6,2
+4500000,512.3,2
+4600000,501.4,2
+4700000,472.5,2
+4800000,499.7,2
+4900000,512.3,2
+5000000,497.6,2
+5100000,477.5,2
+5200000,570.6,2
+5300000,554.1,2
+5400000,586.9,2
+5500000,545.3,2
+5600000,552.7,2
+5700000,585.6,2
+5800000,578.5,2
+5900000,589.6,2
+6000000,536.1,2
+6100000,549.1,2
+6200000,559.6,2
+6300000,565.0,2
+6400000,563.2,2
+6500000,442.3,2
+6600000,543.4,2
+6700000,541.9,2
+6800000,544.0,2
+6900000,531.9,2
+7000000,588.9,2
+7100000,609.4,2
+7200000,549.6,2
+7300000,605.3,2
+7400000,546.1,2
+7500000,558.8,2
+7600000,541.5,2
+7700000,557.5,2
+7800000,555.1,2
+7900000,548.0,2
+8000000,601.0,2
+8100000,552.9,2
+8200000,593.2,2
+8300000,603.1,2
+8400000,587.5,2
+8500000,556.5,2
+8600000,583.7,2
+8700000,580.9,2
+8800000,635.6,2
+8900000,551.0,2
+9000000,597.9,2
+9100000,585.3,2
+9200000,514.0,2
+9300000,588.6,2
+9400000,577.4,2
+9500000,579.0,2
+9600000,542.4,2
+9700000,560.2,2
+9800000,562.6,2
+9900000,584.4,2
+10000000,562.4,2
+10100000,572.2,2
+10200000,571.3,2
+10300000,592.9,2
+10400000,627.5,2
+10500000,571.0,2
+10600000,540.4,2
+10700000,574.4,2
+10800000,541.0,2
+10900000,593.8,2
+11000000,627.7,2
+11100000,609.6,2
+11200000,613.7,2
+11300000,573.7,2
+11400000,591.7,2
+11500000,615.3,2
+11600000,572.2,2
+11700000,626.3,2
+11800000,621.4,2
+11900000,584.9,2
+12000000,595.6,2
+12100000,625.8,2
+12200000,591.1,2
+12300000,594.1,2
+12400000,523.3,2
+12500000,613.2,2
+12600000,646.0,2
+12700000,535.3,2
+12800000,610.7,2
+12900000,594.8,2
+13000000,592.1,2
+13100000,620.3,2
+13200000,632.3,2
+13300000,607.1,2
+13400000,620.6,2
+13500000,585.0,2
+13600000,618.0,2
+13700000,490.2,2
+13800000,599.3,2
+13900000,605.7,2
+14000000,603.5,2
+0,0.8,1
+100000,1.0,1
+200000,0.9,1
+300000,57.1,1
+400000,77.6,1
+500000,96.7,1
+600000,144.2,1
+700000,155.5,1
+800000,155.8,1
+900000,181.4,1
+1000000,195.3,1
+1100000,191.3,1
+1200000,185.3,1
+1300000,237.6,1
+1400000,228.1,1
+1500000,239.1,1
+1600000,260.8,1
+1700000,295.3,1
+1800000,271.1,1
+1900000,290.9,1
+2000000,307.7,1
+2100000,305.5,1
+2200000,332.4,1
+2300000,316.3,1
+2400000,361.8,1
+2500000,342.0,1
+2600000,370.6,1
+2700000,370.1,1
+2800000,369.3,1
+2900000,382.8,1
+3000000,411.2,1
+3100000,392.7,1
+3200000,394.9,1
+3300000,375.4,1
+3400000,395.4,1
+3500000,385.1,1
+3600000,366.5,1
+3700000,400.5,1
+3800000,387.0,1
+3900000,351.3,1
+4000000,390.6,1
+4100000,396.1,1
+4200000,427.9,1
+4300000,386.5,1
+4400000,461.5,1
+4500000,431.7,1
+4600000,434.8,1
+4700000,462.8,1
+4800000,492.8,1
+4900000,416.2,1
+5000000,433.9,1
+5100000,495.6,1
+5200000,445.0,1
+5300000,446.1,1
+5400000,493.0,1
+5500000,445.8,1
+5600000,437.8,1
+5700000,392.6,1
+5800000,469.4,1
+5900000,484.3,1
+6000000,497.8,1
+6100000,445.0,1
+6200000,497.1,1
+6300000,533.3,1
+6400000,480.6,1
+6500000,531.1,1
+6600000,496.3,1
+6700000,475.3,1
+6800000,511.9,1
+6900000,474.0,1
+7000000,513.5,1
+7100000,505.5,1
+7200000,484.0,1
+7300000,521.8,1
+7400000,487.6,1
+7500000,511.6,1
+7600000,522.5,1
+7700000,483.2,1
+7800000,473.1,1
+7900000,462.8,1
+8000000,481.1,1
+8100000,528.6,1
+8200000,504.6,1
+8300000,514.6,1
+8400000,510.4,1
+8500000,527.3,1
+8600000,510.5,1
+8700000,524.8,1
+8800000,520.9,1
+8900000,442.8,1
+9000000,532.7,1
+9100000,466.1,1
+9200000,544.4,1
+9300000,529.0,1
+9400000,549.6,1
+9500000,535.8,1
+9600000,539.7,1
+9700000,504.3,1
+9800000,550.8,1
+9900000,533.0,1
+10000000,561.8,1
+10100000,522.6,1
+10200000,542.6,1
+10300000,526.9,1
+10400000,551.1,1
+10500000,545.3,1
+10600000,482.8,1
+10700000,541.0,1
+10800000,537.9,1
+10900000,541.5,1
+11000000,533.3,1
+11100000,512.0,1
+11200000,518.5,1
+11300000,524.6,1
+11400000,555.7,1
+11500000,540.3,1
+11600000,459.5,1
+11700000,540.9,1
+11800000,540.4,1
+11900000,534.2,1
+12000000,542.8,1
+12100000,511.3,1
+12200000,554.0,1
+12300000,561.8,1
+12400000,544.7,1
+12500000,548.9,1
+12600000,555.9,1
+12700000,511.2,1
+12800000,553.9,1
+12900000,548.4,1
+13000000,542.5,1
+13100000,559.0,1
+13200000,558.0,1
+13300000,555.5,1
+13400000,518.3,1
+13500000,549.5,1
+13600000,545.8,1
+13700000,543.8,1
+13800000,521.8,1
+13900000,542.4,1
+14000000,542.2,1
diff --git a/results/humanoid-stand.csv b/results/humanoid-stand.csv
new file mode 100644
index 0000000..2137842
--- /dev/null
+++ b/results/humanoid-stand.csv
@@ -0,0 +1,424 @@
+step,reward,seed
+0,6.3,3
+100000,6.1,3
+200000,7.3,3
+300000,39.4,3
+400000,344.6,3
+500000,357.2,3
+600000,491.0,3
+700000,483.6,3
+800000,522.8,3
+900000,564.9,3
+1000000,628.3,3
+1100000,699.3,3
+1200000,730.6,3
+1300000,700.1,3
+1400000,777.6,3
+1500000,785.0,3
+1600000,788.7,3
+1700000,862.8,3
+1800000,874.1,3
+1900000,895.2,3
+2000000,892.6,3
+2100000,910.1,3
+2200000,910.9,3
+2300000,898.7,3
+2400000,914.0,3
+2500000,919.6,3
+2600000,918.9,3
+2700000,915.6,3
+2800000,921.8,3
+2900000,927.4,3
+3000000,902.0,3
+3100000,925.7,3
+3200000,932.0,3
+3300000,929.7,3
+3400000,908.9,3
+3500000,903.6,3
+3600000,910.5,3
+3700000,919.3,3
+3800000,903.9,3
+3900000,909.3,3
+4000000,910.5,3
+4100000,903.3,3
+4200000,895.9,3
+4300000,896.4,3
+4400000,917.9,3
+4500000,902.1,3
+4600000,902.4,3
+4700000,921.4,3
+4800000,901.1,3
+4900000,821.7,3
+5000000,896.5,3
+5100000,896.3,3
+5200000,907.2,3
+5300000,885.3,3
+5400000,875.5,3
+5500000,898.7,3
+5600000,871.2,3
+5700000,861.5,3
+5800000,897.4,3
+5900000,910.6,3
+6000000,905.4,3
+6100000,874.9,3
+6200000,898.6,3
+6300000,894.9,3
+6400000,868.3,3
+6500000,853.2,3
+6600000,846.6,3
+6700000,820.3,3
+6800000,821.7,3
+6900000,815.7,3
+7000000,810.9,3
+7100000,831.9,3
+7200000,880.3,3
+7300000,851.9,3
+7400000,892.4,3
+7500000,907.5,3
+7600000,900.7,3
+7700000,891.5,3
+7800000,844.5,3
+7900000,893.0,3
+8000000,877.1,3
+8100000,879.1,3
+8200000,880.3,3
+8300000,902.6,3
+8400000,889.1,3
+8500000,912.4,3
+8600000,904.1,3
+8700000,900.3,3
+8800000,882.0,3
+8900000,893.7,3
+9000000,905.0,3
+9100000,909.7,3
+9200000,890.7,3
+9300000,887.4,3
+9400000,910.1,3
+9500000,923.4,3
+9600000,917.3,3
+9700000,906.2,3
+9800000,920.3,3
+9900000,901.6,3
+10000000,904.4,3
+10100000,914.0,3
+10200000,894.3,3
+10300000,914.2,3
+10400000,909.3,3
+10500000,898.8,3
+10600000,857.9,3
+10700000,835.8,3
+10800000,862.8,3
+10900000,818.2,3
+11000000,785.3,3
+11100000,796.7,3
+11200000,795.4,3
+11300000,820.2,3
+11400000,805.2,3
+11500000,804.4,3
+11600000,892.7,3
+11700000,841.3,3
+11800000,754.7,3
+11900000,842.3,3
+12000000,830.6,3
+12100000,803.2,3
+12200000,833.2,3
+12300000,820.4,3
+12400000,870.6,3
+12500000,850.4,3
+12600000,856.3,3
+12700000,870.8,3
+12800000,836.0,3
+12900000,863.8,3
+13000000,847.0,3
+13100000,885.2,3
+13200000,881.4,3
+13300000,912.4,3
+13400000,905.1,3
+13500000,892.6,3
+13600000,889.1,3
+13700000,888.1,3
+13800000,888.4,3
+13900000,885.4,3
+14000000,891.5,3
+0,4.5,2
+100000,6.2,2
+200000,6.5,2
+300000,160.2,2
+400000,423.9,2
+500000,428.8,2
+600000,417.8,2
+700000,588.7,2
+800000,644.2,2
+900000,634.1,2
+1000000,670.7,2
+1100000,805.5,2
+1200000,800.6,2
+1300000,781.4,2
+1400000,845.5,2
+1500000,854.6,2
+1600000,861.4,2
+1700000,848.3,2
+1800000,887.0,2
+1900000,899.3,2
+2000000,921.5,2
+2100000,909.5,2
+2200000,929.1,2
+2300000,930.8,2
+2400000,908.1,2
+2500000,927.5,2
+2600000,936.1,2
+2700000,938.6,2
+2800000,935.7,2
+2900000,928.2,2
+3000000,932.9,2
+3100000,919.0,2
+3200000,927.6,2
+3300000,927.7,2
+3400000,926.8,2
+3500000,921.9,2
+3600000,909.6,2
+3700000,901.9,2
+3800000,905.6,2
+3900000,918.6,2
+4000000,918.8,2
+4100000,910.1,2
+4200000,896.0,2
+4300000,895.5,2
+4400000,920.3,2
+4500000,852.1,2
+4600000,888.4,2
+4700000,895.2,2
+4800000,884.8,2
+4900000,870.8,2
+5000000,882.2,2
+5100000,872.7,2
+5200000,860.1,2
+5300000,858.4,2
+5400000,859.7,2
+5500000,858.8,2
+5600000,873.3,2
+5700000,830.0,2
+5800000,912.0,2
+5900000,925.8,2
+6000000,930.3,2
+6100000,890.7,2
+6200000,900.8,2
+6300000,917.6,2
+6400000,923.0,2
+6500000,915.2,2
+6600000,903.0,2
+6700000,881.0,2
+6800000,910.1,2
+6900000,909.5,2
+7000000,901.5,2
+7100000,919.7,2
+7200000,908.3,2
+7300000,890.3,2
+7400000,896.9,2
+7500000,913.7,2
+7600000,910.5,2
+7700000,922.2,2
+7800000,916.3,2
+7900000,903.3,2
+8000000,901.3,2
+8100000,912.2,2
+8200000,897.6,2
+8300000,919.4,2
+8400000,879.5,2
+8500000,896.3,2
+8600000,874.7,2
+8700000,874.8,2
+8800000,906.1,2
+8900000,913.6,2
+9000000,899.7,2
+9100000,896.1,2
+9200000,865.3,2
+9300000,908.9,2
+9400000,886.2,2
+9500000,889.1,2
+9600000,844.5,2
+9700000,899.4,2
+9800000,899.8,2
+9900000,900.4,2
+10000000,904.5,2
+10100000,887.1,2
+10200000,885.7,2
+10300000,900.0,2
+10400000,894.1,2
+10500000,909.9,2
+10600000,902.6,2
+10700000,894.3,2
+10800000,899.8,2
+10900000,899.1,2
+11000000,901.3,2
+11100000,875.5,2
+11200000,920.3,2
+11300000,912.6,2
+11400000,918.5,2
+11500000,915.4,2
+11600000,907.4,2
+11700000,906.1,2
+11800000,902.2,2
+11900000,913.3,2
+12000000,912.1,2
+12100000,918.6,2
+12200000,912.6,2
+12300000,909.3,2
+12400000,899.3,2
+12500000,916.2,2
+12600000,905.7,2
+12700000,900.5,2
+12800000,896.2,2
+12900000,906.7,2
+13000000,882.3,2
+13100000,863.1,2
+13200000,850.5,2
+13300000,829.7,2
+13400000,802.4,2
+13500000,820.3,2
+13600000,817.1,2
+13700000,812.9,2
+13800000,871.0,2
+13900000,919.3,2
+14000000,912.7,2
+0,4.5,1
+100000,5.3,1
+200000,96.9,1
+300000,281.8,1
+400000,437.1,1
+500000,480.7,1
+600000,518.7,1
+700000,554.6,1
+800000,561.9,1
+900000,518.7,1
+1000000,692.2,1
+1100000,719.7,1
+1200000,779.4,1
+1300000,765.5,1
+1400000,873.5,1
+1500000,877.2,1
+1600000,871.0,1
+1700000,892.9,1
+1800000,899.3,1
+1900000,918.7,1
+2000000,925.8,1
+2100000,893.1,1
+2200000,910.6,1
+2300000,920.5,1
+2400000,940.3,1
+2500000,937.4,1
+2600000,931.6,1
+2700000,925.7,1
+2800000,936.0,1
+2900000,940.3,1
+3000000,923.0,1
+3100000,919.8,1
+3200000,941.3,1
+3300000,927.3,1
+3400000,919.6,1
+3500000,914.5,1
+3600000,919.9,1
+3700000,923.9,1
+3800000,929.8,1
+3900000,918.1,1
+4000000,924.1,1
+4100000,925.1,1
+4200000,899.7,1
+4300000,923.0,1
+4400000,928.6,1
+4500000,927.9,1
+4600000,923.4,1
+4700000,920.7,1
+4800000,915.4,1
+4900000,857.6,1
+5000000,873.3,1
+5100000,921.3,1
+5200000,897.3,1
+5300000,888.5,1
+5400000,899.2,1
+5500000,913.4,1
+5600000,913.0,1
+5700000,895.3,1
+5800000,917.2,1
+5900000,917.1,1
+6000000,914.2,1
+6100000,912.5,1
+6200000,910.7,1
+6300000,911.4,1
+6400000,933.6,1
+6500000,882.6,1
+6600000,893.0,1
+6700000,883.8,1
+6800000,910.7,1
+6900000,892.1,1
+7000000,909.6,1
+7100000,916.2,1
+7200000,898.6,1
+7300000,882.4,1
+7400000,884.7,1
+7500000,881.1,1
+7600000,884.6,1
+7700000,851.8,1
+7800000,888.4,1
+7900000,891.4,1
+8000000,894.2,1
+8100000,906.2,1
+8200000,889.4,1
+8300000,859.5,1
+8400000,912.0,1
+8500000,873.0,1
+8600000,897.2,1
+8700000,880.7,1
+8800000,877.8,1
+8900000,921.9,1
+9000000,912.4,1
+9100000,894.7,1
+9200000,929.7,1
+9300000,909.6,1
+9400000,917.0,1
+9500000,897.6,1
+9600000,901.7,1
+9700000,898.8,1
+9800000,897.6,1
+9900000,913.2,1
+10000000,894.6,1
+10100000,884.5,1
+10200000,884.8,1
+10300000,884.0,1
+10400000,843.6,1
+10500000,904.8,1
+10600000,908.8,1
+10700000,888.2,1
+10800000,877.3,1
+10900000,892.6,1
+11000000,902.7,1
+11100000,918.5,1
+11200000,899.0,1
+11300000,872.0,1
+11400000,909.0,1
+11500000,889.8,1
+11600000,899.4,1
+11700000,918.9,1
+11800000,891.1,1
+11900000,913.4,1
+12000000,916.2,1
+12100000,898.7,1
+12200000,908.2,1
+12300000,862.1,1
+12400000,876.6,1
+12500000,909.9,1
+12600000,919.1,1
+12700000,922.8,1
+12800000,897.6,1
+12900000,923.7,1
+13000000,928.3,1
+13100000,912.7,1
+13200000,906.0,1
+13300000,922.2,1
+13400000,914.6,1
+13500000,932.0,1
+13600000,918.8,1
+13700000,881.2,1
+13800000,901.1,1
+13900000,916.1,1
+14000000,914.1,1
diff --git a/results/humanoid-walk.csv b/results/humanoid-walk.csv
new file mode 100644
index 0000000..ca6fba1
--- /dev/null
+++ b/results/humanoid-walk.csv
@@ -0,0 +1,424 @@
+step,reward,seed
+0,1.3,3
+100000,1.4,3
+200000,1.7,3
+300000,1.4,3
+400000,188.7,3
+500000,389.8,3
+600000,459.7,3
+700000,493.2,3
+800000,520.0,3
+900000,555.7,3
+1000000,610.4,3
+1100000,666.0,3
+1200000,709.2,3
+1300000,767.1,3
+1400000,781.4,3
+1500000,749.4,3
+1600000,776.6,3
+1700000,797.7,3
+1800000,820.0,3
+1900000,874.0,3
+2000000,809.9,3
+2100000,893.2,3
+2200000,877.4,3
+2300000,886.9,3
+2400000,897.8,3
+2500000,890.5,3
+2600000,899.1,3
+2700000,894.1,3
+2800000,912.6,3
+2900000,915.9,3
+3000000,909.3,3
+3100000,925.0,3
+3200000,888.8,3
+3300000,918.4,3
+3400000,921.6,3
+3500000,919.6,3
+3600000,923.8,3
+3700000,919.4,3
+3800000,924.7,3
+3900000,927.0,3
+4000000,915.7,3
+4100000,918.2,3
+4200000,930.1,3
+4300000,911.5,3
+4400000,919.6,3
+4500000,917.9,3
+4600000,912.1,3
+4700000,921.3,3
+4800000,864.6,3
+4900000,911.2,3
+5000000,904.1,3
+5100000,920.7,3
+5200000,919.2,3
+5300000,921.3,3
+5400000,924.1,3
+5500000,901.1,3
+5600000,909.6,3
+5700000,911.2,3
+5800000,904.3,3
+5900000,919.6,3
+6000000,901.5,3
+6100000,901.6,3
+6200000,924.1,3
+6300000,920.2,3
+6400000,901.9,3
+6500000,907.7,3
+6600000,891.1,3
+6700000,915.6,3
+6800000,918.2,3
+6900000,912.6,3
+7000000,915.2,3
+7100000,904.6,3
+7200000,911.4,3
+7300000,899.5,3
+7400000,918.5,3
+7500000,910.4,3
+7600000,901.9,3
+7700000,918.4,3
+7800000,918.2,3
+7900000,881.9,3
+8000000,905.4,3
+8100000,894.8,3
+8200000,897.3,3
+8300000,847.6,3
+8400000,886.8,3
+8500000,896.0,3
+8600000,902.3,3
+8700000,895.9,3
+8800000,905.8,3
+8900000,900.2,3
+9000000,897.3,3
+9100000,909.1,3
+9200000,906.0,3
+9300000,905.2,3
+9400000,877.9,3
+9500000,886.1,3
+9600000,900.8,3
+9700000,905.1,3
+9800000,901.8,3
+9900000,896.6,3
+10000000,896.2,3
+10100000,894.3,3
+10200000,901.8,3
+10300000,888.4,3
+10400000,895.6,3
+10500000,897.8,3
+10600000,874.4,3
+10700000,892.2,3
+10800000,896.0,3
+10900000,886.8,3
+11000000,888.1,3
+11100000,886.8,3
+11200000,867.7,3
+11300000,885.6,3
+11400000,887.6,3
+11500000,875.5,3
+11600000,884.8,3
+11700000,890.9,3
+11800000,896.9,3
+11900000,882.2,3
+12000000,888.7,3
+12100000,867.7,3
+12200000,891.1,3
+12300000,881.6,3
+12400000,879.1,3
+12500000,881.6,3
+12600000,879.3,3
+12700000,878.3,3
+12800000,874.8,3
+12900000,898.4,3
+13000000,890.7,3
+13100000,873.8,3
+13200000,891.1,3
+13300000,889.1,3
+13400000,897.3,3
+13500000,860.8,3
+13600000,893.6,3
+13700000,898.7,3
+13800000,892.5,3
+13900000,894.4,3
+14000000,892.6,3
+0,0.9,1
+100000,1.4,1
+200000,1.2,1
+300000,3.6,1
+400000,203.0,1
+500000,26.6,1
+600000,391.5,1
+700000,461.6,1
+800000,577.9,1
+900000,590.6,1
+1000000,642.3,1
+1100000,695.9,1
+1200000,702.9,1
+1300000,753.7,1
+1400000,799.2,1
+1500000,822.6,1
+1600000,841.6,1
+1700000,871.8,1
+1800000,897.8,1
+1900000,886.1,1
+2000000,889.6,1
+2100000,893.9,1
+2200000,900.9,1
+2300000,894.1,1
+2400000,914.0,1
+2500000,913.4,1
+2600000,911.1,1
+2700000,900.8,1
+2800000,899.2,1
+2900000,911.6,1
+3000000,922.6,1
+3100000,925.9,1
+3200000,930.8,1
+3300000,928.9,1
+3400000,931.6,1
+3500000,921.7,1
+3600000,918.8,1
+3700000,926.5,1
+3800000,921.3,1
+3900000,912.4,1
+4000000,917.2,1
+4100000,921.8,1
+4200000,916.3,1
+4300000,895.7,1
+4400000,911.7,1
+4500000,906.1,1
+4600000,905.4,1
+4700000,902.9,1
+4800000,906.4,1
+4900000,894.4,1
+5000000,900.7,1
+5100000,885.9,1
+5200000,884.6,1
+5300000,897.0,1
+5400000,897.8,1
+5500000,901.5,1
+5600000,884.2,1
+5700000,875.0,1
+5800000,849.5,1
+5900000,881.5,1
+6000000,889.6,1
+6100000,846.9,1
+6200000,873.2,1
+6300000,882.9,1
+6400000,878.6,1
+6500000,865.4,1
+6600000,873.5,1
+6700000,875.0,1
+6800000,850.3,1
+6900000,883.9,1
+7000000,858.9,1
+7100000,877.0,1
+7200000,864.8,1
+7300000,867.6,1
+7400000,880.9,1
+7500000,898.6,1
+7600000,913.9,1
+7700000,893.2,1
+7800000,878.9,1
+7900000,916.3,1
+8000000,924.0,1
+8100000,916.7,1
+8200000,903.9,1
+8300000,875.3,1
+8400000,915.5,1
+8500000,904.3,1
+8600000,900.5,1
+8700000,897.3,1
+8800000,912.0,1
+8900000,906.5,1
+9000000,893.3,1
+9100000,908.3,1
+9200000,894.7,1
+9300000,912.0,1
+9400000,893.4,1
+9500000,893.6,1
+9600000,887.9,1
+9700000,888.9,1
+9800000,900.0,1
+9900000,887.4,1
+10000000,901.3,1
+10100000,917.1,1
+10200000,910.6,1
+10300000,903.7,1
+10400000,908.8,1
+10500000,912.0,1
+10600000,913.2,1
+10700000,906.6,1
+10800000,898.6,1
+10900000,898.5,1
+11000000,907.5,1
+11100000,905.0,1
+11200000,895.6,1
+11300000,890.7,1
+11400000,890.6,1
+11500000,893.5,1
+11600000,900.0,1
+11700000,895.0,1
+11800000,903.8,1
+11900000,878.0,1
+12000000,907.9,1
+12100000,904.4,1
+12200000,908.8,1
+12300000,907.3,1
+12400000,884.9,1
+12500000,899.6,1
+12600000,909.8,1
+12700000,917.3,1
+12800000,875.0,1
+12900000,906.3,1
+13000000,908.1,1
+13100000,898.3,1
+13200000,912.4,1
+13300000,882.0,1
+13400000,903.0,1
+13500000,905.9,1
+13600000,900.3,1
+13700000,914.2,1
+13800000,908.2,1
+13900000,900.0,1
+14000000,901.8,1
+0,0.9,2
+100000,1.6,2
+200000,1.9,2
+300000,4.0,2
+400000,174.7,2
+500000,417.8,2
+600000,459.4,2
+700000,497.4,2
+800000,522.2,2
+900000,570.0,2
+1000000,632.0,2
+1100000,678.6,2
+1200000,734.7,2
+1300000,835.6,2
+1400000,851.6,2
+1500000,871.1,2
+1600000,872.1,2
+1700000,890.1,2
+1800000,901.5,2
+1900000,894.3,2
+2000000,879.5,2
+2100000,892.8,2
+2200000,899.8,2
+2300000,888.5,2
+2400000,885.9,2
+2500000,915.9,2
+2600000,913.8,2
+2700000,915.7,2
+2800000,916.9,2
+2900000,919.6,2
+3000000,921.4,2
+3100000,911.4,2
+3200000,922.8,2
+3300000,925.1,2
+3400000,927.0,2
+3500000,928.2,2
+3600000,913.7,2
+3700000,911.9,2
+3800000,923.2,2
+3900000,930.5,2
+4000000,904.3,2
+4100000,917.4,2
+4200000,910.5,2
+4300000,915.6,2
+4400000,916.0,2
+4500000,901.0,2
+4600000,920.5,2
+4700000,908.3,2
+4800000,897.4,2
+4900000,899.3,2
+5000000,887.3,2
+5100000,909.6,2
+5200000,916.2,2
+5300000,885.1,2
+5400000,889.7,2
+5500000,883.8,2
+5600000,867.0,2
+5700000,879.3,2
+5800000,875.6,2
+5900000,871.8,2
+6000000,865.6,2
+6100000,876.3,2
+6200000,869.0,2
+6300000,868.2,2
+6400000,882.7,2
+6500000,893.9,2
+6600000,831.6,2
+6700000,861.2,2
+6800000,877.0,2
+6900000,877.6,2
+7000000,919.1,2
+7100000,902.7,2
+7200000,883.8,2
+7300000,922.8,2
+7400000,917.7,2
+7500000,904.8,2
+7600000,892.9,2
+7700000,904.9,2
+7800000,882.7,2
+7900000,899.9,2
+8000000,923.5,2
+8100000,909.7,2
+8200000,909.8,2
+8300000,905.5,2
+8400000,891.7,2
+8500000,894.1,2
+8600000,880.1,2
+8700000,911.9,2
+8800000,897.6,2
+8900000,914.7,2
+9000000,906.9,2
+9100000,901.5,2
+9200000,896.9,2
+9300000,899.5,2
+9400000,886.8,2
+9500000,896.7,2
+9600000,886.0,2
+9700000,903.7,2
+9800000,906.4,2
+9900000,898.0,2
+10000000,889.3,2
+10100000,907.9,2
+10200000,911.9,2
+10300000,889.2,2
+10400000,908.4,2
+10500000,907.9,2
+10600000,904.9,2
+10700000,883.3,2
+10800000,889.2,2
+10900000,886.2,2
+11000000,867.3,2
+11100000,854.2,2
+11200000,862.8,2
+11300000,861.3,2
+11400000,852.1,2
+11500000,857.3,2
+11600000,849.2,2
+11700000,854.6,2
+11800000,854.0,2
+11900000,863.6,2
+12000000,864.4,2
+12100000,815.6,2
+12200000,859.7,2
+12300000,860.0,2
+12400000,845.5,2
+12500000,863.5,2
+12600000,844.4,2
+12700000,861.1,2
+12800000,860.9,2
+12900000,864.5,2
+13000000,915.4,2
+13100000,904.3,2
+13200000,896.1,2
+13300000,906.4,2
+13400000,911.4,2
+13500000,886.9,2
+13600000,890.4,2
+13700000,905.9,2
+13800000,892.8,2
+13900000,911.8,2
+14000000,906.2,2
diff --git a/results/lift-cube.csv b/results/lift-cube.csv
new file mode 100644
index 0000000..973599e
--- /dev/null
+++ b/results/lift-cube.csv
@@ -0,0 +1,124 @@
+step,success,seed
+0,0.0,2
+100000,1.0,2
+200000,0.9,2
+300000,1.0,2
+400000,0.8,2
+500000,1.0,2
+600000,1.0,2
+700000,0.9,2
+800000,1.0,2
+900000,0.9,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,0.9,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+2100000,1.0,2
+2200000,1.0,2
+2300000,0.9,2
+2400000,1.0,2
+2500000,1.0,2
+2600000,1.0,2
+2700000,1.0,2
+2800000,1.0,2
+2900000,1.0,2
+3000000,1.0,2
+3100000,1.0,2
+3200000,0.9,2
+3300000,0.9,2
+3400000,1.0,2
+3500000,1.0,2
+3600000,0.7,2
+3700000,1.0,2
+3800000,1.0,2
+3900000,1.0,2
+4000000,1.0,2
+0,0.0,1
+100000,0.9,1
+200000,0.8,1
+300000,0.9,1
+400000,1.0,1
+500000,0.9,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
+2100000,1.0,1
+2200000,1.0,1
+2300000,0.9,1
+2400000,1.0,1
+2500000,1.0,1
+2600000,1.0,1
+2700000,1.0,1
+2800000,1.0,1
+2900000,0.9,1
+3000000,1.0,1
+3100000,1.0,1
+3200000,1.0,1
+3300000,1.0,1
+3400000,1.0,1
+3500000,1.0,1
+3600000,1.0,1
+3700000,1.0,1
+3800000,0.8,1
+3900000,0.9,1
+4000000,1.0,1
+0,0.0,3
+100000,0.8,3
+200000,1.0,3
+300000,0.9,3
+400000,1.0,3
+500000,0.9,3
+600000,1.0,3
+700000,1.0,3
+800000,0.9,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,0.9,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,0.9,3
+2000000,1.0,3
+2100000,1.0,3
+2200000,1.0,3
+2300000,1.0,3
+2400000,1.0,3
+2500000,1.0,3
+2600000,1.0,3
+2700000,1.0,3
+2800000,1.0,3
+2900000,1.0,3
+3000000,1.0,3
+3100000,1.0,3
+3200000,1.0,3
+3300000,1.0,3
+3400000,1.0,3
+3500000,1.0,3
+3600000,1.0,3
+3700000,0.9,3
+3800000,1.0,3
+3900000,1.0,3
+4000000,1.0,3
diff --git a/results/mw-assembly.csv b/results/mw-assembly.csv
new file mode 100644
index 0000000..ffb3af1
--- /dev/null
+++ b/results/mw-assembly.csv
@@ -0,0 +1,64 @@
+step,success,seed
+0,0.0,3
+100000,0.1,3
+200000,0.8,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+2000000,1.0,3
+0,0.0,2
+100000,0.0,2
+200000,1.0,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,0.0,1
+200000,0.0,1
+300000,0.0,1
+400000,0.0,1
+500000,0.0,1
+600000,0.0,1
+700000,0.0,1
+800000,0.0,1
+900000,0.0,1
+1000000,0.0,1
+1100000,0.0,1
+1200000,0.2,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-basketball.csv b/results/mw-basketball.csv
new file mode 100644
index 0000000..2777a24
--- /dev/null
+++ b/results/mw-basketball.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.2,3
+200000,0.5,3
+300000,0.9,3
+400000,1.0,3
+500000,0.6,3
+600000,1.0,3
+700000,1.0,3
+800000,0.8,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,0.9,3
+1300000,0.9,3
+1400000,0.7,3
+1500000,1.0,3
+1600000,0.9,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,0.0,2
+200000,0.0,2
+300000,0.5,2
+400000,0.7,2
+500000,1.0,2
+600000,0.9,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,0.9,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,0.9,2
+1800000,0.9,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,0.4,1
+200000,0.8,1
+300000,0.6,1
+400000,1.0,1
+500000,0.9,1
+600000,0.9,1
+700000,0.9,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,0.9,1
+1300000,0.8,1
+1400000,0.9,1
+1500000,0.9,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-bin-picking.csv b/results/mw-bin-picking.csv
new file mode 100644
index 0000000..1a9fc1f
--- /dev/null
+++ b/results/mw-bin-picking.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.6,3
+200000,1.0,3
+300000,0.8,3
+400000,0.9,3
+500000,1.0,3
+600000,0.9,3
+700000,0.9,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,0.9,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,0.9,3
+1900000,1.0,3
+0,0.0,2
+100000,1.0,2
+200000,0.9,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,0.9,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,0.9,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,0.9,2
+0,0.0,1
+100000,0.0,1
+200000,1.0,1
+300000,0.9,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,0.9,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-box-close.csv b/results/mw-box-close.csv
new file mode 100644
index 0000000..8a4d362
--- /dev/null
+++ b/results/mw-box-close.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.7,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,0.9,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,0.8,2
+200000,1.0,2
+300000,0.9,2
+400000,1.0,2
+500000,1.0,2
+600000,0.9,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,0.9,2
+2000000,0.9,2
+0,0.0,1
+100000,1.0,1
+200000,0.8,1
+300000,0.9,1
+400000,0.9,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,0.9,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,0.9,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-button-press-topdown-wall.csv b/results/mw-button-press-topdown-wall.csv
new file mode 100644
index 0000000..5c0c3a7
--- /dev/null
+++ b/results/mw-button-press-topdown-wall.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.8,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,1.0,2
+200000,1.0,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,1.0,1
+200000,1.0,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,0.7,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-button-press-topdown.csv b/results/mw-button-press-topdown.csv
new file mode 100644
index 0000000..cde7e65
--- /dev/null
+++ b/results/mw-button-press-topdown.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,1.0,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,1
+100000,1.0,1
+200000,1.0,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
+0,0.0,2
+100000,1.0,2
+200000,1.0,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
diff --git a/results/mw-button-press-wall.csv b/results/mw-button-press-wall.csv
new file mode 100644
index 0000000..11b5931
--- /dev/null
+++ b/results/mw-button-press-wall.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.6,3
+200000,0.7,3
+300000,0.9,3
+400000,0.9,3
+500000,1.0,3
+600000,0.7,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,0.9,3
+1900000,1.0,3
+0,0.0,2
+100000,0.7,2
+200000,1.0,2
+300000,1.0,2
+400000,0.9,2
+500000,0.8,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,0.9,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,0.7,1
+200000,1.0,1
+300000,0.7,1
+400000,1.0,1
+500000,1.0,1
+600000,0.9,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,0.5,1
+1300000,1.0,1
+1400000,0.7,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-button-press.csv b/results/mw-button-press.csv
new file mode 100644
index 0000000..f7a4f18
--- /dev/null
+++ b/results/mw-button-press.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.0,3
+200000,0.5,3
+300000,0.5,3
+400000,0.6,3
+500000,1.0,3
+600000,0.5,3
+700000,1.0,3
+800000,0.3,3
+900000,0.6,3
+1000000,0.8,3
+1100000,0.7,3
+1200000,0.9,3
+1300000,0.7,3
+1400000,0.7,3
+1500000,0.2,3
+1600000,0.6,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,0.0,2
+200000,0.1,2
+300000,0.4,2
+400000,0.6,2
+500000,0.9,2
+600000,0.4,2
+700000,0.3,2
+800000,1.0,2
+900000,0.5,2
+1000000,0.7,2
+1100000,0.8,2
+1200000,1.0,2
+1300000,0.7,2
+1400000,1.0,2
+1500000,0.9,2
+1600000,0.9,2
+1700000,0.9,2
+1800000,1.0,2
+1900000,0.6,2
+2000000,0.8,2
+0,0.0,1
+100000,0.8,1
+200000,0.0,1
+300000,0.5,1
+400000,1.0,1
+500000,0.8,1
+600000,0.9,1
+700000,0.7,1
+800000,1.0,1
+900000,0.5,1
+1000000,0.8,1
+1100000,0.7,1
+1200000,0.7,1
+1300000,1.0,1
+1400000,0.8,1
+1500000,0.9,1
+1600000,1.0,1
+1700000,0.9,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-coffee-button.csv b/results/mw-coffee-button.csv
new file mode 100644
index 0000000..001f8e4
--- /dev/null
+++ b/results/mw-coffee-button.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,1.0,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.4,2
+100000,1.0,2
+200000,1.0,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,1.0,1
+200000,1.0,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-coffee-pull.csv b/results/mw-coffee-pull.csv
new file mode 100644
index 0000000..7d594c9
--- /dev/null
+++ b/results/mw-coffee-pull.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.8,3
+200000,0.9,3
+300000,0.7,3
+400000,1.0,3
+500000,0.9,3
+600000,0.9,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,0.9,3
+1200000,0.9,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,0.3,2
+200000,0.9,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,0.9,2
+700000,0.8,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,0.9,2
+1300000,0.8,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,0.0,1
+200000,0.8,1
+300000,1.0,1
+400000,1.0,1
+500000,0.8,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,0.8,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,0.9,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,0.9,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-coffee-push.csv b/results/mw-coffee-push.csv
new file mode 100644
index 0000000..ac53242
--- /dev/null
+++ b/results/mw-coffee-push.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.6,3
+200000,0.8,3
+300000,0.8,3
+400000,0.8,3
+500000,1.0,3
+600000,0.8,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,0.9,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,0.6,2
+200000,0.8,2
+300000,0.9,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,0.8,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,0.9,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,0.2,1
+200000,0.4,1
+300000,0.7,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,0.9,1
+900000,0.9,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,0.9,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,0.9,1
diff --git a/results/mw-dial-turn.csv b/results/mw-dial-turn.csv
new file mode 100644
index 0000000..fc62674
--- /dev/null
+++ b/results/mw-dial-turn.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.3,3
+200000,0.5,3
+300000,0.4,3
+400000,0.4,3
+500000,0.4,3
+600000,0.5,3
+700000,0.9,3
+800000,0.7,3
+900000,0.8,3
+1000000,0.9,3
+1100000,0.9,3
+1200000,0.8,3
+1300000,0.9,3
+1400000,0.8,3
+1500000,0.8,3
+1600000,0.7,3
+1700000,1.0,3
+1800000,0.6,3
+1900000,0.8,3
+0,0.0,2
+100000,0.3,2
+200000,0.9,2
+300000,0.9,2
+400000,0.7,2
+500000,0.9,2
+600000,0.7,2
+700000,0.9,2
+800000,0.7,2
+900000,0.8,2
+1000000,0.8,2
+1100000,0.6,2
+1200000,0.8,2
+1300000,0.9,2
+1400000,0.5,2
+1500000,0.6,2
+1600000,0.8,2
+1700000,0.7,2
+1800000,0.9,2
+1900000,0.8,2
+2000000,0.7,2
+0,0.0,1
+100000,0.8,1
+200000,0.7,1
+300000,0.8,1
+400000,0.5,1
+500000,0.8,1
+600000,0.8,1
+700000,0.4,1
+800000,0.9,1
+900000,0.8,1
+1000000,1.0,1
+1100000,0.9,1
+1200000,0.8,1
+1300000,0.9,1
+1400000,0.7,1
+1500000,0.8,1
+1600000,0.9,1
+1700000,0.7,1
+1800000,0.9,1
+1900000,1.0,1
+2000000,0.9,1
diff --git a/results/mw-disassemble.csv b/results/mw-disassemble.csv
new file mode 100644
index 0000000..1b8bcad
--- /dev/null
+++ b/results/mw-disassemble.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.0,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,0.9,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,0.7,2
+200000,1.0,2
+300000,0.8,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,0.9,2
+900000,1.0,2
+1000000,0.9,2
+1100000,0.9,2
+1200000,0.9,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,0.0,1
+200000,0.0,1
+300000,0.0,1
+400000,0.0,1
+500000,0.0,1
+600000,0.0,1
+700000,0.0,1
+800000,0.0,1
+900000,0.0,1
+1000000,0.0,1
+1100000,0.0,1
+1200000,0.0,1
+1300000,0.0,1
+1400000,0.0,1
+1500000,0.0,1
+1600000,0.0,1
+1700000,0.0,1
+1800000,0.0,1
+1900000,0.0,1
+2000000,0.0,1
diff --git a/results/mw-door-close.csv b/results/mw-door-close.csv
new file mode 100644
index 0000000..2a9802f
--- /dev/null
+++ b/results/mw-door-close.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0,3
+100000,1,3
+200000,1,3
+300000,1,3
+400000,1,3
+500000,1,3
+600000,1,3
+700000,1,3
+800000,1,3
+900000,1,3
+1000000,1,3
+1100000,1,3
+1200000,1,3
+1300000,1,3
+1400000,1,3
+1500000,1,3
+1600000,1,3
+1700000,1,3
+1800000,1,3
+1900000,1,3
+0,0,1
+100000,1,1
+200000,1,1
+300000,1,1
+400000,1,1
+500000,1,1
+600000,1,1
+700000,1,1
+800000,1,1
+900000,1,1
+1000000,1,1
+1100000,1,1
+1200000,1,1
+1300000,1,1
+1400000,1,1
+1500000,1,1
+1600000,1,1
+1700000,1,1
+1800000,1,1
+1900000,1,1
+2000000,1,1
+0,0,2
+100000,1,2
+200000,1,2
+300000,1,2
+400000,1,2
+500000,1,2
+600000,1,2
+700000,1,2
+800000,1,2
+900000,1,2
+1000000,1,2
+1100000,1,2
+1200000,1,2
+1300000,1,2
+1400000,1,2
+1500000,1,2
+1600000,1,2
+1700000,1,2
+1800000,1,2
+1900000,1,2
+2000000,1,2
diff --git a/results/mw-door-lock.csv b/results/mw-door-lock.csv
new file mode 100644
index 0000000..37e1dd1
--- /dev/null
+++ b/results/mw-door-lock.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,1.0,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,1
+100000,1.0,1
+200000,1.0,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,0.8,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
+0,0.0,2
+100000,0.5,2
+200000,1.0,2
+300000,0.9,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,0.9,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
diff --git a/results/mw-door-open.csv b/results/mw-door-open.csv
new file mode 100644
index 0000000..33147bf
--- /dev/null
+++ b/results/mw-door-open.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.7,3
+200000,0.7,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,0.9,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,0.9,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,0.7,3
+1700000,1.0,3
+1800000,0.9,3
+1900000,1.0,3
+0,0.0,2
+100000,0.6,2
+200000,1.0,2
+300000,0.6,2
+400000,1.0,2
+500000,0.6,2
+600000,0.8,2
+700000,0.9,2
+800000,1.0,2
+900000,0.9,2
+1000000,0.9,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,0.7,2
+1500000,0.8,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,0.9,2
+2000000,0.9,2
+0,0.0,1
+100000,0.7,1
+200000,0.5,1
+300000,1.0,1
+400000,1.0,1
+500000,0.8,1
+600000,0.4,1
+700000,1.0,1
+800000,0.7,1
+900000,0.9,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,0.7,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,0.8,1
+1600000,0.7,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,0.8,1
diff --git a/results/mw-door-unlock.csv b/results/mw-door-unlock.csv
new file mode 100644
index 0000000..9e28368
--- /dev/null
+++ b/results/mw-door-unlock.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.0,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,1
+100000,1.0,1
+200000,1.0,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
+0,0.0,2
+100000,0.9,2
+200000,0.7,2
+300000,0.9,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
diff --git a/results/mw-drawer-close.csv b/results/mw-drawer-close.csv
new file mode 100644
index 0000000..d16a1dc
--- /dev/null
+++ b/results/mw-drawer-close.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.1,3
+100000,0.9,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,1
+100000,1.0,1
+200000,1.0,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
+0,0.1,2
+100000,1.0,2
+200000,1.0,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,0.9,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,0.9,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
diff --git a/results/mw-drawer-open.csv b/results/mw-drawer-open.csv
new file mode 100644
index 0000000..5f5138a
--- /dev/null
+++ b/results/mw-drawer-open.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,1.0,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,0.8,3
+600000,0.9,3
+700000,1.0,3
+800000,0.9,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,1.0,2
+200000,0.8,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,0.6,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,1.0,1
+200000,0.9,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,0.9,1
+700000,1.0,1
+800000,0.8,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-faucet-close.csv b/results/mw-faucet-close.csv
new file mode 100644
index 0000000..1404e20
--- /dev/null
+++ b/results/mw-faucet-close.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,1.0,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,1
+100000,1.0,1
+200000,0.9,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
+0,0.0,2
+100000,1.0,2
+200000,1.0,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
diff --git a/results/mw-faucet-open.csv b/results/mw-faucet-open.csv
new file mode 100644
index 0000000..e89699f
--- /dev/null
+++ b/results/mw-faucet-open.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,1.0,3
+200000,0.8,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,0.9,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,0.9,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,0.9,2
+200000,1.0,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,0.9,2
+1000000,1.0,2
+1100000,0.9,2
+1200000,0.9,2
+1300000,0.9,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,0.9,1
+200000,1.0,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,0.9,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-hammer.csv b/results/mw-hammer.csv
new file mode 100644
index 0000000..c66aa4c
--- /dev/null
+++ b/results/mw-hammer.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.8,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,0.9,2
+200000,1.0,2
+300000,0.0,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,0.3,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,1.0,1
+200000,1.0,1
+300000,1.0,1
+400000,1.0,1
+500000,0.9,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-hand-insert.csv b/results/mw-hand-insert.csv
new file mode 100644
index 0000000..a31ea68
--- /dev/null
+++ b/results/mw-hand-insert.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.8,3
+200000,1.0,3
+300000,1.0,3
+400000,0.9,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,1.0,2
+200000,0.9,2
+300000,0.7,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,0.9,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,0.9,1
+200000,1.0,1
+300000,0.9,1
+400000,0.9,1
+500000,0.8,1
+600000,0.9,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-handle-press-side.csv b/results/mw-handle-press-side.csv
new file mode 100644
index 0000000..cde7e65
--- /dev/null
+++ b/results/mw-handle-press-side.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,1.0,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,1
+100000,1.0,1
+200000,1.0,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
+0,0.0,2
+100000,1.0,2
+200000,1.0,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
diff --git a/results/mw-handle-press.csv b/results/mw-handle-press.csv
new file mode 100644
index 0000000..85a742e
--- /dev/null
+++ b/results/mw-handle-press.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,1.0,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,0.9,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,1.0,2
+200000,1.0,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,0.9,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,1.0,1
+200000,0.9,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-handle-pull-side.csv b/results/mw-handle-pull-side.csv
new file mode 100644
index 0000000..a66cb0f
--- /dev/null
+++ b/results/mw-handle-pull-side.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,1.0,3
+200000,0.9,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,0.9,2
+200000,1.0,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,1.0,1
+200000,0.9,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-handle-pull.csv b/results/mw-handle-pull.csv
new file mode 100644
index 0000000..9d9ca47
--- /dev/null
+++ b/results/mw-handle-pull.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.0,3
+200000,0.0,3
+300000,0.8,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,0.0,2
+200000,0.0,2
+300000,0.9,2
+400000,0.8,2
+500000,1.0,2
+600000,1.0,2
+700000,0.8,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,0.9,2
+1200000,0.9,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,1.0,1
+200000,1.0,1
+300000,0.9,1
+400000,0.9,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-lever-pull.csv b/results/mw-lever-pull.csv
new file mode 100644
index 0000000..61015e8
--- /dev/null
+++ b/results/mw-lever-pull.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.4,3
+200000,0.8,3
+300000,0.8,3
+400000,0.7,3
+500000,0.3,3
+600000,1.0,3
+700000,0.2,3
+800000,0.8,3
+900000,0.8,3
+1000000,0.8,3
+1100000,0.7,3
+1200000,1.0,3
+1300000,0.9,3
+1400000,0.6,3
+1500000,0.8,3
+1600000,0.8,3
+1700000,1.0,3
+1800000,0.1,3
+1900000,0.9,3
+0,0.0,2
+100000,0.9,2
+200000,0.6,2
+300000,0.7,2
+400000,0.8,2
+500000,0.3,2
+600000,0.9,2
+700000,0.6,2
+800000,0.8,2
+900000,0.9,2
+1000000,0.9,2
+1100000,0.7,2
+1200000,0.9,2
+1300000,0.9,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,0.9,2
+1700000,0.7,2
+1800000,1.0,2
+1900000,0.6,2
+2000000,0.8,2
+0,0.0,1
+100000,0.7,1
+200000,0.2,1
+300000,0.8,1
+400000,0.9,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,0.8,1
+1100000,0.5,1
+1200000,0.8,1
+1300000,0.8,1
+1400000,0.7,1
+1500000,0.9,1
+1600000,0.8,1
+1700000,0.7,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,0.7,1
diff --git a/results/mw-peg-insert-side.csv b/results/mw-peg-insert-side.csv
new file mode 100644
index 0000000..47364de
--- /dev/null
+++ b/results/mw-peg-insert-side.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.9,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,0.9,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,1
+100000,1.0,1
+200000,0.8,1
+300000,0.9,1
+400000,1.0,1
+500000,1.0,1
+600000,0.9,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,0.9,1
+1600000,1.0,1
+1700000,0.9,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
+0,0.0,2
+100000,0.8,2
+200000,0.7,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
diff --git a/results/mw-peg-unplug-side.csv b/results/mw-peg-unplug-side.csv
new file mode 100644
index 0000000..269a6a7
--- /dev/null
+++ b/results/mw-peg-unplug-side.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.7,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,0.9,3
+1100000,0.9,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,0.9,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,0.6,2
+200000,1.0,2
+300000,0.8,2
+400000,0.9,2
+500000,0.9,2
+600000,1.0,2
+700000,0.9,2
+800000,0.9,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,0.9,2
+1500000,0.9,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,0.8,1
+200000,0.9,1
+300000,0.8,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,0.9,1
+1100000,0.8,1
+1200000,1.0,1
+1300000,0.9,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-pick-out-of-hole.csv b/results/mw-pick-out-of-hole.csv
new file mode 100644
index 0000000..17fa040
--- /dev/null
+++ b/results/mw-pick-out-of-hole.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.5,3
+200000,0.9,3
+300000,0.8,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,0.9,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,0.9,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,0.5,2
+200000,1.0,2
+300000,0.7,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,0.9,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,0.8,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,0.8,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,0.9,2
+2000000,1.0,2
+0,0.0,1
+100000,0.6,1
+200000,0.7,1
+300000,0.6,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,0.9,1
+800000,1.0,1
+900000,0.9,1
+1000000,0.9,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,0.8,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-pick-place-wall.csv b/results/mw-pick-place-wall.csv
new file mode 100644
index 0000000..db7e0d9
--- /dev/null
+++ b/results/mw-pick-place-wall.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.0,3
+200000,0.0,3
+300000,0.1,3
+400000,0.3,3
+500000,0.7,3
+600000,0.8,3
+700000,0.8,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,0.6,3
+1300000,0.5,3
+1400000,1.0,3
+1500000,0.8,3
+1600000,0.8,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,0.5,3
+0,0.0,2
+100000,0.0,2
+200000,0.0,2
+300000,0.0,2
+400000,0.1,2
+500000,0.6,2
+600000,0.8,2
+700000,0.8,2
+800000,1.0,2
+900000,0.9,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,0.9,2
+1300000,0.3,2
+1400000,0.9,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,0.9,2
+1900000,0.9,2
+2000000,1.0,2
+0,0.0,1
+100000,0.0,1
+200000,0.0,1
+300000,0.5,1
+400000,0.6,1
+500000,0.7,1
+600000,1.0,1
+700000,0.9,1
+800000,0.9,1
+900000,1.0,1
+1000000,1.0,1
+1100000,0.4,1
+1200000,0.9,1
+1300000,0.9,1
+1400000,0.8,1
+1500000,0.8,1
+1600000,1.0,1
+1700000,0.9,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,0.7,1
diff --git a/results/mw-pick-place.csv b/results/mw-pick-place.csv
new file mode 100644
index 0000000..87d4d8b
--- /dev/null
+++ b/results/mw-pick-place.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.0,3
+200000,0.6,3
+300000,0.6,3
+400000,0.9,3
+500000,0.9,3
+600000,1.0,3
+700000,0.9,3
+800000,0.7,3
+900000,0.9,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,0.9,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,0.9,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,1
+100000,0.1,1
+200000,0.7,1
+300000,0.8,1
+400000,0.8,1
+500000,0.5,1
+600000,1.0,1
+700000,1.0,1
+800000,0.7,1
+900000,1.0,1
+1000000,0.5,1
+1100000,0.9,1
+1200000,0.9,1
+1300000,1.0,1
+1400000,0.4,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
+0,0.0,2
+100000,0.2,2
+200000,0.6,2
+300000,0.8,2
+400000,0.8,2
+500000,0.9,2
+600000,1.0,2
+700000,0.9,2
+800000,0.8,2
+900000,0.9,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,0.9,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,0.9,2
+1600000,1.0,2
+1700000,0.9,2
+1800000,0.8,2
+1900000,1.0,2
+2000000,0.6,2
diff --git a/results/mw-plate-slide-back-side.csv b/results/mw-plate-slide-back-side.csv
new file mode 100644
index 0000000..88c5a18
--- /dev/null
+++ b/results/mw-plate-slide-back-side.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,1.0,3
+200000,0.3,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,1.0,2
+200000,1.0,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,1.0,1
+200000,1.0,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-plate-slide-back.csv b/results/mw-plate-slide-back.csv
new file mode 100644
index 0000000..2a9802f
--- /dev/null
+++ b/results/mw-plate-slide-back.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0,3
+100000,1,3
+200000,1,3
+300000,1,3
+400000,1,3
+500000,1,3
+600000,1,3
+700000,1,3
+800000,1,3
+900000,1,3
+1000000,1,3
+1100000,1,3
+1200000,1,3
+1300000,1,3
+1400000,1,3
+1500000,1,3
+1600000,1,3
+1700000,1,3
+1800000,1,3
+1900000,1,3
+0,0,1
+100000,1,1
+200000,1,1
+300000,1,1
+400000,1,1
+500000,1,1
+600000,1,1
+700000,1,1
+800000,1,1
+900000,1,1
+1000000,1,1
+1100000,1,1
+1200000,1,1
+1300000,1,1
+1400000,1,1
+1500000,1,1
+1600000,1,1
+1700000,1,1
+1800000,1,1
+1900000,1,1
+2000000,1,1
+0,0,2
+100000,1,2
+200000,1,2
+300000,1,2
+400000,1,2
+500000,1,2
+600000,1,2
+700000,1,2
+800000,1,2
+900000,1,2
+1000000,1,2
+1100000,1,2
+1200000,1,2
+1300000,1,2
+1400000,1,2
+1500000,1,2
+1600000,1,2
+1700000,1,2
+1800000,1,2
+1900000,1,2
+2000000,1,2
diff --git a/results/mw-plate-slide-side.csv b/results/mw-plate-slide-side.csv
new file mode 100644
index 0000000..b537d57
--- /dev/null
+++ b/results/mw-plate-slide-side.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.9,3
+200000,0.7,3
+300000,0.9,3
+400000,0.8,3
+500000,0.3,3
+600000,0.8,3
+700000,0.8,3
+800000,0.6,3
+900000,1.0,3
+1000000,0.2,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,0.9,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,1.0,2
+200000,1.0,2
+300000,1.0,2
+400000,0.8,2
+500000,0.9,2
+600000,0.9,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,0.9,2
+1700000,0.9,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,0.4,1
+200000,0.2,1
+300000,0.6,1
+400000,0.8,1
+500000,0.7,1
+600000,0.9,1
+700000,0.9,1
+800000,0.8,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,0.8,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,0.8,1
diff --git a/results/mw-plate-slide.csv b/results/mw-plate-slide.csv
new file mode 100644
index 0000000..062a9c0
--- /dev/null
+++ b/results/mw-plate-slide.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,1.0,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,1.0,2
+200000,1.0,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,1.0,1
+200000,1.0,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-push-back.csv b/results/mw-push-back.csv
new file mode 100644
index 0000000..84a31aa
--- /dev/null
+++ b/results/mw-push-back.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.0,3
+200000,0.0,3
+300000,0.0,3
+400000,0.0,3
+500000,0.0,3
+600000,0.0,3
+700000,0.0,3
+800000,0.0,3
+900000,0.0,3
+1000000,0.0,3
+1100000,0.0,3
+1200000,0.0,3
+1300000,0.0,3
+1400000,0.0,3
+1500000,0.0,3
+1600000,0.0,3
+1700000,0.0,3
+1800000,0.0,3
+1900000,0.0,3
+0,0.0,1
+100000,0.1,1
+200000,0.7,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,0.9,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
+0,0.0,2
+100000,1.0,2
+200000,1.0,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
diff --git a/results/mw-push-wall.csv b/results/mw-push-wall.csv
new file mode 100644
index 0000000..a31840b
--- /dev/null
+++ b/results/mw-push-wall.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.9,3
+200000,1.0,3
+300000,1.0,3
+400000,0.7,3
+500000,1.0,3
+600000,1.0,3
+700000,0.5,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,1
+100000,0.6,1
+200000,1.0,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,0.9,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
+0,0.0,2
+100000,0.5,2
+200000,0.9,2
+300000,0.9,2
+400000,0.8,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
diff --git a/results/mw-push.csv b/results/mw-push.csv
new file mode 100644
index 0000000..13bbe48
--- /dev/null
+++ b/results/mw-push.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.6,3
+200000,0.9,3
+300000,0.9,3
+400000,0.4,3
+500000,0.7,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,0.7,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,1
+100000,0.9,1
+200000,1.0,1
+300000,0.5,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,0.9,1
+1200000,1.0,1
+1300000,0.9,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
+0,0.0,2
+100000,0.4,2
+200000,1.0,2
+300000,0.9,2
+400000,0.8,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
diff --git a/results/mw-reach-wall.csv b/results/mw-reach-wall.csv
new file mode 100644
index 0000000..edf23a7
--- /dev/null
+++ b/results/mw-reach-wall.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,1.0,3
+200000,0.8,3
+300000,1.0,3
+400000,1.0,3
+500000,0.8,3
+600000,0.9,3
+700000,0.9,3
+800000,1.0,3
+900000,0.9,3
+1000000,1.0,3
+1100000,0.9,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,0.6,2
+200000,0.9,2
+300000,0.8,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,0.7,2
+1700000,1.0,2
+1800000,0.9,2
+1900000,1.0,2
+2000000,0.9,2
+0,0.0,1
+100000,1.0,1
+200000,1.0,1
+300000,0.9,1
+400000,0.1,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,0.8,1
+1000000,0.9,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,0.9,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,0.7,1
+1900000,0.9,1
+2000000,0.9,1
diff --git a/results/mw-reach.csv b/results/mw-reach.csv
new file mode 100644
index 0000000..9045bfb
--- /dev/null
+++ b/results/mw-reach.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,1.0,3
+200000,1.0,3
+300000,0.2,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,0.8,3
+1700000,0.9,3
+1800000,1.0,3
+1900000,0.6,3
+0,0.0,1
+100000,0.0,1
+200000,1.0,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,0.8,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,0.6,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,0.9,1
+1600000,0.9,1
+1700000,1.0,1
+1800000,0.9,1
+1900000,1.0,1
+2000000,1.0,1
+0,0.0,2
+100000,0.9,2
+200000,1.0,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,0.9,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,0.9,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,0.6,2
+1400000,1.0,2
+1500000,0.8,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,0.9,2
diff --git a/results/mw-shelf-place.csv b/results/mw-shelf-place.csv
new file mode 100644
index 0000000..7db31a7
--- /dev/null
+++ b/results/mw-shelf-place.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.0,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,0.9,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,0.0,2
+200000,0.6,2
+300000,1.0,2
+400000,0.9,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,0.0,1
+200000,0.9,1
+300000,0.6,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,0.9,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-soccer.csv b/results/mw-soccer.csv
new file mode 100644
index 0000000..6562fc2
--- /dev/null
+++ b/results/mw-soccer.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.3,3
+200000,0.9,3
+300000,0.6,3
+400000,0.6,3
+500000,0.8,3
+600000,0.9,3
+700000,0.9,3
+800000,0.9,3
+900000,1.0,3
+1000000,0.8,3
+1100000,1.0,3
+1200000,0.7,3
+1300000,0.9,3
+1400000,1.0,3
+1500000,0.9,3
+1600000,0.7,3
+1700000,1.0,3
+1800000,0.9,3
+1900000,0.8,3
+0,0.0,2
+100000,0.3,2
+200000,0.5,2
+300000,0.8,2
+400000,1.0,2
+500000,1.0,2
+600000,0.9,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,0.1,1
+200000,0.7,1
+300000,0.6,1
+400000,0.8,1
+500000,0.8,1
+600000,0.7,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,0.8,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,0.7,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-stick-pull.csv b/results/mw-stick-pull.csv
new file mode 100644
index 0000000..7184c28
--- /dev/null
+++ b/results/mw-stick-pull.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.0,3
+200000,0.8,3
+300000,1.0,3
+400000,0.6,3
+500000,1.0,3
+600000,0.8,3
+700000,1.0,3
+800000,0.8,3
+900000,0.8,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,0.7,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,0.2,2
+200000,0.9,2
+300000,0.9,2
+400000,0.8,2
+500000,0.8,2
+600000,0.8,2
+700000,0.9,2
+800000,0.9,2
+900000,1.0,2
+1000000,0.7,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,0.9,2
+1400000,0.9,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,0.9,2
+2000000,0.9,2
+0,0.0,1
+100000,0.8,1
+200000,0.8,1
+300000,0.9,1
+400000,0.8,1
+500000,0.9,1
+600000,0.7,1
+700000,0.5,1
+800000,0.9,1
+900000,0.9,1
+1000000,0.9,1
+1100000,1.0,1
+1200000,0.9,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,0.8,1
+1600000,1.0,1
+1700000,0.9,1
+1800000,0.9,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-stick-push.csv b/results/mw-stick-push.csv
new file mode 100644
index 0000000..82d975f
--- /dev/null
+++ b/results/mw-stick-push.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.4,3
+200000,0.9,3
+300000,0.9,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,0.9,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,1.0,2
+200000,0.7,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,0.9,2
+1400000,1.0,2
+1500000,0.9,2
+1600000,0.8,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,0.7,2
+2000000,1.0,2
+0,0.0,1
+100000,0.8,1
+200000,1.0,1
+300000,0.9,1
+400000,1.0,1
+500000,0.9,1
+600000,1.0,1
+700000,0.7,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,0.8,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,0.9,1
diff --git a/results/mw-sweep-into.csv b/results/mw-sweep-into.csv
new file mode 100644
index 0000000..3582f5b
--- /dev/null
+++ b/results/mw-sweep-into.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.9,3
+200000,0.7,3
+300000,0.8,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,0.9,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,1
+100000,0.9,1
+200000,1.0,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,0.8,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
+0,0.0,2
+100000,0.9,2
+200000,0.2,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,0.9,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
diff --git a/results/mw-sweep.csv b/results/mw-sweep.csv
new file mode 100644
index 0000000..9550d59
--- /dev/null
+++ b/results/mw-sweep.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,1.0,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,0.8,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,1
+100000,1.0,1
+200000,1.0,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,0.7,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
+0,0.0,2
+100000,0.9,2
+200000,1.0,2
+300000,0.9,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,0.9,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
diff --git a/results/mw-window-close.csv b/results/mw-window-close.csv
new file mode 100644
index 0000000..4b4e898
--- /dev/null
+++ b/results/mw-window-close.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,1.0,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,2
+100000,1.0,2
+200000,1.0,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,1.0,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+0,0.0,1
+100000,1.0,1
+200000,1.0,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,0.8,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
diff --git a/results/mw-window-open.csv b/results/mw-window-open.csv
new file mode 100644
index 0000000..d3ee1f0
--- /dev/null
+++ b/results/mw-window-open.csv
@@ -0,0 +1,63 @@
+step,success,seed
+0,0.0,3
+100000,0.8,3
+200000,1.0,3
+300000,1.0,3
+400000,1.0,3
+500000,1.0,3
+600000,1.0,3
+700000,1.0,3
+800000,1.0,3
+900000,1.0,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,1.0,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,1.0,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+0,0.0,1
+100000,0.3,1
+200000,1.0,1
+300000,1.0,1
+400000,1.0,1
+500000,1.0,1
+600000,1.0,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,1.0,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
+0,0.0,2
+100000,0.9,2
+200000,1.0,2
+300000,1.0,2
+400000,1.0,2
+500000,1.0,2
+600000,1.0,2
+700000,1.0,2
+800000,1.0,2
+900000,1.0,2
+1000000,0.9,2
+1100000,1.0,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
diff --git a/results/myo-hand-key-turn-hard.csv b/results/myo-hand-key-turn-hard.csv
new file mode 100644
index 0000000..ab84721
--- /dev/null
+++ b/results/myo-hand-key-turn-hard.csv
@@ -0,0 +1,122 @@
+step,success,seed
+0,0.0,1
+50000,0.0,1
+100000,0.0,1
+150000,0.0,1
+200000,0.0,1
+250000,0.0,1
+300000,0.0,1
+350000,0.0,1
+400000,0.0,1
+450000,0.0,1
+500000,0.0,1
+550000,0.0,1
+600000,0.0,1
+650000,0.0,1
+700000,0.0,1
+750000,0.0,1
+800000,0.0,1
+850000,0.0,1
+900000,0.0,1
+950000,0.0,1
+1000000,0.0,1
+1050000,0.0,1
+1100000,0.0,1
+1150000,0.0,1
+1200000,0.0,1
+1250000,0.0,1
+1300000,0.0,1
+1350000,0.0,1
+1400000,0.0,1
+1450000,0.0,1
+1500000,0.0,1
+1550000,0.0,1
+1600000,0.0,1
+1650000,0.0,1
+1700000,0.1,1
+1750000,0.0,1
+1800000,0.0,1
+1850000,0.0,1
+1900000,0.0,1
+1950000,0.0,1
+2000000,0.0,1
+0,0.0,3
+50000,0.0,3
+100000,0.0,3
+150000,0.0,3
+200000,0.0,3
+250000,0.0,3
+300000,0.0,3
+350000,0.0,3
+400000,0.0,3
+450000,0.0,3
+500000,0.0,3
+550000,0.0,3
+600000,0.0,3
+650000,0.0,3
+700000,0.0,3
+750000,0.0,3
+800000,0.0,3
+850000,0.0,3
+900000,0.0,3
+950000,0.0,3
+1000000,0.0,3
+1050000,0.0,3
+1100000,0.0,3
+1150000,0.0,3
+1200000,0.0,3
+1250000,0.0,3
+1300000,0.0,3
+1350000,0.0,3
+1400000,0.0,3
+1450000,0.0,3
+1500000,0.0,3
+1550000,0.0,3
+1600000,0.0,3
+1650000,0.0,3
+1700000,0.0,3
+1750000,0.0,3
+1800000,0.0,3
+1850000,0.0,3
+1900000,0.0,3
+1950000,0.0,3
+0,0.0,2
+50000,0.0,2
+100000,0.0,2
+150000,0.0,2
+200000,0.0,2
+250000,0.0,2
+300000,0.0,2
+350000,0.0,2
+400000,0.0,2
+450000,0.0,2
+500000,0.0,2
+550000,0.0,2
+600000,0.0,2
+650000,0.0,2
+700000,0.0,2
+750000,0.0,2
+800000,0.0,2
+850000,0.0,2
+900000,0.0,2
+950000,0.0,2
+1000000,0.0,2
+1050000,0.0,2
+1100000,0.0,2
+1150000,0.0,2
+1200000,0.0,2
+1250000,0.0,2
+1300000,0.0,2
+1350000,0.0,2
+1400000,0.0,2
+1450000,0.0,2
+1500000,0.0,2
+1550000,0.1,2
+1600000,0.1,2
+1650000,0.0,2
+1700000,0.1,2
+1750000,0.1,2
+1800000,0.1,2
+1850000,0.2,2
+1900000,0.0,2
+1950000,0.1,2
diff --git a/results/myo-hand-key-turn.csv b/results/myo-hand-key-turn.csv
new file mode 100644
index 0000000..1748894
--- /dev/null
+++ b/results/myo-hand-key-turn.csv
@@ -0,0 +1,122 @@
+step,success,seed
+0,0.0,1
+50000,0.0,1
+100000,1.0,1
+150000,1.0,1
+200000,1.0,1
+250000,1.0,1
+300000,1.0,1
+350000,1.0,1
+400000,1.0,1
+450000,1.0,1
+500000,1.0,1
+550000,1.0,1
+600000,1.0,1
+650000,1.0,1
+700000,1.0,1
+750000,1.0,1
+800000,1.0,1
+850000,1.0,1
+900000,1.0,1
+950000,1.0,1
+1000000,1.0,1
+1050000,1.0,1
+1100000,1.0,1
+1150000,1.0,1
+1200000,1.0,1
+1250000,1.0,1
+1300000,1.0,1
+1350000,1.0,1
+1400000,1.0,1
+1450000,1.0,1
+1500000,1.0,1
+1550000,1.0,1
+1600000,1.0,1
+1650000,1.0,1
+1700000,1.0,1
+1750000,0.9,1
+1800000,1.0,1
+1850000,1.0,1
+1900000,1.0,1
+1950000,1.0,1
+2000000,1.0,1
+0,0.0,2
+50000,1.0,2
+100000,1.0,2
+150000,1.0,2
+200000,1.0,2
+250000,1.0,2
+300000,1.0,2
+350000,1.0,2
+400000,1.0,2
+450000,1.0,2
+500000,1.0,2
+550000,1.0,2
+600000,1.0,2
+650000,1.0,2
+700000,1.0,2
+750000,1.0,2
+800000,1.0,2
+850000,1.0,2
+900000,1.0,2
+950000,1.0,2
+1000000,1.0,2
+1050000,1.0,2
+1100000,1.0,2
+1150000,1.0,2
+1200000,1.0,2
+1250000,1.0,2
+1300000,1.0,2
+1350000,1.0,2
+1400000,1.0,2
+1450000,1.0,2
+1500000,1.0,2
+1550000,1.0,2
+1600000,1.0,2
+1650000,1.0,2
+1700000,1.0,2
+1750000,1.0,2
+1800000,1.0,2
+1850000,1.0,2
+1900000,1.0,2
+1950000,1.0,2
+0,0.0,3
+50000,1.0,3
+100000,0.9,3
+150000,0.5,3
+200000,1.0,3
+250000,1.0,3
+300000,1.0,3
+350000,1.0,3
+400000,1.0,3
+450000,1.0,3
+500000,1.0,3
+550000,1.0,3
+600000,1.0,3
+650000,1.0,3
+700000,1.0,3
+750000,1.0,3
+800000,1.0,3
+850000,1.0,3
+900000,1.0,3
+950000,1.0,3
+1000000,1.0,3
+1050000,1.0,3
+1100000,1.0,3
+1150000,1.0,3
+1200000,1.0,3
+1250000,1.0,3
+1300000,1.0,3
+1350000,1.0,3
+1400000,1.0,3
+1450000,1.0,3
+1500000,1.0,3
+1550000,1.0,3
+1600000,1.0,3
+1650000,1.0,3
+1700000,1.0,3
+1750000,1.0,3
+1800000,1.0,3
+1850000,1.0,3
+1900000,1.0,3
+1950000,1.0,3
diff --git a/results/myo-hand-obj-hold-hard.csv b/results/myo-hand-obj-hold-hard.csv
new file mode 100644
index 0000000..6263c99
--- /dev/null
+++ b/results/myo-hand-obj-hold-hard.csv
@@ -0,0 +1,122 @@
+step,success,seed
+0,0.0,1
+50000,0.0,1
+100000,0.1,1
+150000,0.2,1
+200000,0.0,1
+250000,0.1,1
+300000,0.3,1
+350000,0.3,1
+400000,0.3,1
+450000,0.5,1
+500000,0.3,1
+550000,0.4,1
+600000,0.4,1
+650000,0.4,1
+700000,0.8,1
+750000,0.5,1
+800000,0.5,1
+850000,0.5,1
+900000,0.8,1
+950000,0.7,1
+1000000,0.7,1
+1050000,0.9,1
+1100000,0.6,1
+1150000,0.5,1
+1200000,0.7,1
+1250000,0.7,1
+1300000,0.7,1
+1350000,0.6,1
+1400000,0.9,1
+1450000,0.7,1
+1500000,0.7,1
+1550000,0.8,1
+1600000,0.8,1
+1650000,0.4,1
+1700000,0.9,1
+1750000,0.6,1
+1800000,0.5,1
+1850000,0.8,1
+1900000,0.4,1
+1950000,0.9,1
+2000000,0.8,1
+0,0.0,3
+50000,0.0,3
+100000,0.1,3
+150000,0.0,3
+200000,0.1,3
+250000,0.2,3
+300000,0.2,3
+350000,0.0,3
+400000,0.3,3
+450000,0.2,3
+500000,0.4,3
+550000,0.2,3
+600000,0.4,3
+650000,0.5,3
+700000,0.5,3
+750000,0.4,3
+800000,0.7,3
+850000,0.8,3
+900000,0.5,3
+950000,0.6,3
+1000000,0.4,3
+1050000,0.8,3
+1100000,0.5,3
+1150000,0.7,3
+1200000,0.7,3
+1250000,0.7,3
+1300000,0.7,3
+1350000,0.7,3
+1400000,0.9,3
+1450000,0.8,3
+1500000,0.6,3
+1550000,0.6,3
+1600000,0.9,3
+1650000,0.7,3
+1700000,0.9,3
+1750000,0.8,3
+1800000,0.9,3
+1850000,0.6,3
+1900000,1.0,3
+1950000,0.8,3
+0,0.0,2
+50000,0.0,2
+100000,0.1,2
+150000,0.2,2
+200000,0.4,2
+250000,0.0,2
+300000,0.1,2
+350000,0.3,2
+400000,0.6,2
+450000,0.4,2
+500000,0.3,2
+550000,0.6,2
+600000,0.5,2
+650000,0.8,2
+700000,0.5,2
+750000,0.6,2
+800000,0.5,2
+850000,0.7,2
+900000,0.6,2
+950000,0.4,2
+1000000,0.6,2
+1050000,1.0,2
+1100000,0.6,2
+1150000,0.9,2
+1200000,0.7,2
+1250000,0.8,2
+1300000,0.8,2
+1350000,1.0,2
+1400000,0.8,2
+1450000,0.9,2
+1500000,1.0,2
+1550000,0.9,2
+1600000,1.0,2
+1650000,0.9,2
+1700000,1.0,2
+1750000,1.0,2
+1800000,1.0,2
+1850000,1.0,2
+1900000,0.9,2
+1950000,1.0,2
diff --git a/results/myo-hand-obj-hold.csv b/results/myo-hand-obj-hold.csv
new file mode 100644
index 0000000..43af016
--- /dev/null
+++ b/results/myo-hand-obj-hold.csv
@@ -0,0 +1,122 @@
+step,success,seed
+0,0.0,1
+50000,0.0,1
+100000,0.0,1
+150000,0.8,1
+200000,0.3,1
+250000,1.0,1
+300000,0.8,1
+350000,0.5,1
+400000,1.0,1
+450000,1.0,1
+500000,1.0,1
+550000,1.0,1
+600000,1.0,1
+650000,1.0,1
+700000,1.0,1
+750000,1.0,1
+800000,1.0,1
+850000,1.0,1
+900000,1.0,1
+950000,1.0,1
+1000000,1.0,1
+1050000,1.0,1
+1100000,1.0,1
+1150000,1.0,1
+1200000,1.0,1
+1250000,1.0,1
+1300000,1.0,1
+1350000,0.8,1
+1400000,0.9,1
+1450000,1.0,1
+1500000,1.0,1
+1550000,1.0,1
+1600000,0.2,1
+1650000,1.0,1
+1700000,1.0,1
+1750000,1.0,1
+1800000,1.0,1
+1850000,1.0,1
+1900000,1.0,1
+1950000,1.0,1
+2000000,1.0,1
+0,0.0,3
+50000,0.0,3
+100000,0.0,3
+150000,0.8,3
+200000,1.0,3
+250000,0.9,3
+300000,1.0,3
+350000,1.0,3
+400000,1.0,3
+450000,1.0,3
+500000,1.0,3
+550000,1.0,3
+600000,1.0,3
+650000,1.0,3
+700000,1.0,3
+750000,1.0,3
+800000,1.0,3
+850000,1.0,3
+900000,1.0,3
+950000,1.0,3
+1000000,1.0,3
+1050000,1.0,3
+1100000,1.0,3
+1150000,1.0,3
+1200000,1.0,3
+1250000,1.0,3
+1300000,1.0,3
+1350000,1.0,3
+1400000,1.0,3
+1450000,1.0,3
+1500000,1.0,3
+1550000,1.0,3
+1600000,1.0,3
+1650000,1.0,3
+1700000,1.0,3
+1750000,1.0,3
+1800000,1.0,3
+1850000,1.0,3
+1900000,1.0,3
+1950000,1.0,3
+0,0.0,2
+50000,0.0,2
+100000,0.0,2
+150000,0.0,2
+200000,0.1,2
+250000,0.9,2
+300000,0.9,2
+350000,0.5,2
+400000,1.0,2
+450000,1.0,2
+500000,1.0,2
+550000,0.9,2
+600000,1.0,2
+650000,1.0,2
+700000,1.0,2
+750000,1.0,2
+800000,1.0,2
+850000,1.0,2
+900000,1.0,2
+950000,1.0,2
+1000000,1.0,2
+1050000,1.0,2
+1100000,1.0,2
+1150000,1.0,2
+1200000,1.0,2
+1250000,1.0,2
+1300000,1.0,2
+1350000,1.0,2
+1400000,1.0,2
+1450000,0.8,2
+1500000,0.5,2
+1550000,1.0,2
+1600000,1.0,2
+1650000,1.0,2
+1700000,1.0,2
+1750000,0.0,2
+1800000,1.0,2
+1850000,1.0,2
+1900000,1.0,2
+1950000,1.0,2
diff --git a/results/myo-hand-pen-twirl-hard.csv b/results/myo-hand-pen-twirl-hard.csv
new file mode 100644
index 0000000..1898f21
--- /dev/null
+++ b/results/myo-hand-pen-twirl-hard.csv
@@ -0,0 +1,121 @@
+step,success,seed
+0,0.0,3
+50000,0.0,3
+100000,0.0,3
+150000,0.2,3
+200000,0.0,3
+250000,0.1,3
+300000,0.0,3
+350000,0.2,3
+400000,0.4,3
+450000,0.4,3
+500000,0.4,3
+550000,0.4,3
+600000,0.3,3
+650000,0.3,3
+700000,0.3,3
+750000,0.4,3
+800000,0.4,3
+850000,0.4,3
+900000,0.4,3
+950000,0.4,3
+1000000,0.4,3
+1050000,0.2,3
+1100000,0.4,3
+1150000,0.5,3
+1200000,0.4,3
+1250000,0.4,3
+1300000,0.4,3
+1350000,0.5,3
+1400000,0.5,3
+1450000,0.4,3
+1500000,0.4,3
+1550000,0.4,3
+1600000,0.5,3
+1650000,0.5,3
+1700000,0.4,3
+1750000,0.2,3
+1800000,0.6,3
+1850000,0.4,3
+1900000,0.5,3
+1950000,0.3,3
+0,0.0,2
+50000,0.0,2
+100000,0.0,2
+150000,0.0,2
+200000,0.0,2
+250000,0.1,2
+300000,0.1,2
+350000,0.3,2
+400000,0.5,2
+450000,0.1,2
+500000,0.3,2
+550000,0.3,2
+600000,0.2,2
+650000,0.3,2
+700000,0.3,2
+750000,0.4,2
+800000,0.2,2
+850000,0.5,2
+900000,0.4,2
+950000,0.6,2
+1000000,0.4,2
+1050000,0.3,2
+1100000,0.1,2
+1150000,0.1,2
+1200000,0.4,2
+1250000,0.4,2
+1300000,0.5,2
+1350000,0.2,2
+1400000,0.4,2
+1450000,0.3,2
+1500000,0.3,2
+1550000,0.4,2
+1600000,0.6,2
+1650000,0.4,2
+1700000,0.4,2
+1750000,0.5,2
+1800000,0.7,2
+1850000,0.6,2
+1900000,0.5,2
+1950000,0.6,2
+0,0.0,1
+50000,0.0,1
+100000,0.0,1
+150000,0.0,1
+200000,0.0,1
+250000,0.0,1
+300000,0.0,1
+350000,0.2,1
+400000,0.1,1
+450000,0.2,1
+500000,0.4,1
+550000,0.2,1
+600000,0.6,1
+650000,0.1,1
+700000,0.2,1
+750000,0.4,1
+800000,0.6,1
+850000,0.6,1
+900000,0.3,1
+950000,0.7,1
+1000000,0.4,1
+1050000,0.5,1
+1100000,0.2,1
+1150000,0.5,1
+1200000,0.5,1
+1250000,0.5,1
+1300000,0.4,1
+1350000,0.5,1
+1400000,0.3,1
+1450000,0.6,1
+1500000,0.6,1
+1550000,0.6,1
+1600000,0.4,1
+1650000,0.5,1
+1700000,0.8,1
+1750000,0.5,1
+1800000,0.6,1
+1850000,0.5,1
+1900000,0.7,1
+1950000,0.3,1
diff --git a/results/myo-hand-pen-twirl.csv b/results/myo-hand-pen-twirl.csv
new file mode 100644
index 0000000..b4faee1
--- /dev/null
+++ b/results/myo-hand-pen-twirl.csv
@@ -0,0 +1,121 @@
+step,success,seed
+0,0.0,3
+50000,0.0,3
+100000,0.0,3
+150000,0.0,3
+200000,0.0,3
+250000,0.0,3
+300000,0.1,3
+350000,0.0,3
+400000,0.2,3
+450000,0.2,3
+500000,0.6,3
+550000,0.7,3
+600000,0.9,3
+650000,1.0,3
+700000,1.0,3
+750000,1.0,3
+800000,1.0,3
+850000,1.0,3
+900000,1.0,3
+950000,1.0,3
+1000000,1.0,3
+1050000,1.0,3
+1100000,1.0,3
+1150000,1.0,3
+1200000,1.0,3
+1250000,1.0,3
+1300000,1.0,3
+1350000,1.0,3
+1400000,0.8,3
+1450000,1.0,3
+1500000,1.0,3
+1550000,1.0,3
+1600000,1.0,3
+1650000,0.8,3
+1700000,1.0,3
+1750000,1.0,3
+1800000,1.0,3
+1850000,1.0,3
+1900000,1.0,3
+1950000,1.0,3
+0,0.0,2
+50000,0.0,2
+100000,0.0,2
+150000,0.0,2
+200000,0.0,2
+250000,0.0,2
+300000,0.0,2
+350000,0.0,2
+400000,0.0,2
+450000,0.0,2
+500000,0.0,2
+550000,0.0,2
+600000,0.0,2
+650000,0.1,2
+700000,0.0,2
+750000,0.0,2
+800000,0.0,2
+850000,0.0,2
+900000,0.0,2
+950000,0.0,2
+1000000,0.1,2
+1050000,0.4,2
+1100000,0.5,2
+1150000,0.9,2
+1200000,0.8,2
+1250000,1.0,2
+1300000,1.0,2
+1350000,1.0,2
+1400000,1.0,2
+1450000,1.0,2
+1500000,1.0,2
+1550000,1.0,2
+1600000,1.0,2
+1650000,1.0,2
+1700000,1.0,2
+1750000,1.0,2
+1800000,1.0,2
+1850000,1.0,2
+1900000,1.0,2
+1950000,1.0,2
+0,0.0,1
+50000,0.0,1
+100000,0.0,1
+150000,0.1,1
+200000,0.3,1
+250000,0.8,1
+300000,1.0,1
+350000,1.0,1
+400000,1.0,1
+450000,1.0,1
+500000,1.0,1
+550000,1.0,1
+600000,1.0,1
+650000,0.9,1
+700000,1.0,1
+750000,1.0,1
+800000,1.0,1
+850000,1.0,1
+900000,1.0,1
+950000,1.0,1
+1000000,1.0,1
+1050000,1.0,1
+1100000,1.0,1
+1150000,1.0,1
+1200000,0.4,1
+1250000,1.0,1
+1300000,1.0,1
+1350000,0.9,1
+1400000,1.0,1
+1450000,1.0,1
+1500000,1.0,1
+1550000,1.0,1
+1600000,0.2,1
+1650000,1.0,1
+1700000,1.0,1
+1750000,1.0,1
+1800000,0.9,1
+1850000,1.0,1
+1900000,1.0,1
+1950000,1.0,1
diff --git a/results/myo-hand-pose-hard.csv b/results/myo-hand-pose-hard.csv
new file mode 100644
index 0000000..f66b5d2
--- /dev/null
+++ b/results/myo-hand-pose-hard.csv
@@ -0,0 +1,121 @@
+step,success,seed
+0,0.0,3
+50000,0.0,3
+100000,0.0,3
+150000,0.0,3
+200000,0.0,3
+250000,0.0,3
+300000,0.0,3
+350000,0.0,3
+400000,0.0,3
+450000,0.0,3
+500000,0.0,3
+550000,0.0,3
+600000,0.0,3
+650000,0.0,3
+700000,0.0,3
+750000,0.0,3
+800000,0.0,3
+850000,0.0,3
+900000,0.0,3
+950000,0.0,3
+1000000,0.0,3
+1050000,0.0,3
+1100000,0.0,3
+1150000,0.1,3
+1200000,0.0,3
+1250000,0.0,3
+1300000,0.0,3
+1350000,0.0,3
+1400000,0.0,3
+1450000,0.0,3
+1500000,0.0,3
+1550000,0.0,3
+1600000,0.0,3
+1650000,0.0,3
+1700000,0.0,3
+1750000,0.2,3
+1800000,0.0,3
+1850000,0.2,3
+1900000,0.1,3
+1950000,0.2,3
+0,0.0,2
+50000,0.0,2
+100000,0.0,2
+150000,0.0,2
+200000,0.0,2
+250000,0.0,2
+300000,0.0,2
+350000,0.0,2
+400000,0.0,2
+450000,0.0,2
+500000,0.0,2
+550000,0.0,2
+600000,0.0,2
+650000,0.0,2
+700000,0.0,2
+750000,0.0,2
+800000,0.0,2
+850000,0.0,2
+900000,0.0,2
+950000,0.0,2
+1000000,0.0,2
+1050000,0.1,2
+1100000,0.0,2
+1150000,0.0,2
+1200000,0.0,2
+1250000,0.0,2
+1300000,0.0,2
+1350000,0.0,2
+1400000,0.0,2
+1450000,0.0,2
+1500000,0.0,2
+1550000,0.1,2
+1600000,0.0,2
+1650000,0.0,2
+1700000,0.0,2
+1750000,0.0,2
+1800000,0.1,2
+1850000,0.0,2
+1900000,0.2,2
+1950000,0.1,2
+0,0.0,1
+50000,0.0,1
+100000,0.0,1
+150000,0.0,1
+200000,0.0,1
+250000,0.0,1
+300000,0.0,1
+350000,0.0,1
+400000,0.0,1
+450000,0.0,1
+500000,0.0,1
+550000,0.0,1
+600000,0.0,1
+650000,0.0,1
+700000,0.0,1
+750000,0.0,1
+800000,0.0,1
+850000,0.0,1
+900000,0.1,1
+950000,0.0,1
+1000000,0.0,1
+1050000,0.0,1
+1100000,0.1,1
+1150000,0.0,1
+1200000,0.0,1
+1250000,0.0,1
+1300000,0.0,1
+1350000,0.0,1
+1400000,0.1,1
+1450000,0.0,1
+1500000,0.1,1
+1550000,0.1,1
+1600000,0.0,1
+1650000,0.0,1
+1700000,0.2,1
+1750000,0.3,1
+1800000,0.0,1
+1850000,0.2,1
+1900000,0.2,1
+1950000,0.1,1
diff --git a/results/myo-hand-pose.csv b/results/myo-hand-pose.csv
new file mode 100644
index 0000000..9cb2d26
--- /dev/null
+++ b/results/myo-hand-pose.csv
@@ -0,0 +1,123 @@
+step,success,seed
+0,0.0,3
+50000,0.0,3
+100000,0.3,3
+150000,0.9,3
+200000,0.9,3
+250000,1.0,3
+300000,1.0,3
+350000,1.0,3
+400000,1.0,3
+450000,1.0,3
+500000,1.0,3
+550000,1.0,3
+600000,1.0,3
+650000,1.0,3
+700000,0.0,3
+750000,1.0,3
+800000,1.0,3
+850000,1.0,3
+900000,1.0,3
+950000,1.0,3
+1000000,1.0,3
+1050000,1.0,3
+1100000,1.0,3
+1150000,1.0,3
+1200000,1.0,3
+1250000,1.0,3
+1300000,1.0,3
+1350000,1.0,3
+1400000,1.0,3
+1450000,1.0,3
+1500000,1.0,3
+1550000,1.0,3
+1600000,1.0,3
+1650000,1.0,3
+1700000,1.0,3
+1750000,1.0,3
+1800000,1.0,3
+1850000,1.0,3
+1900000,1.0,3
+1950000,1.0,3
+2000000,1.0,3
+0,0.0,2
+50000,0.0,2
+100000,0.0,2
+150000,0.9,2
+200000,1.0,2
+250000,1.0,2
+300000,1.0,2
+350000,1.0,2
+400000,1.0,2
+450000,1.0,2
+500000,1.0,2
+550000,1.0,2
+600000,1.0,2
+650000,1.0,2
+700000,1.0,2
+750000,1.0,2
+800000,1.0,2
+850000,1.0,2
+900000,1.0,2
+950000,1.0,2
+1000000,1.0,2
+1050000,0.9,2
+1100000,1.0,2
+1150000,1.0,2
+1200000,1.0,2
+1250000,1.0,2
+1300000,1.0,2
+1350000,1.0,2
+1400000,1.0,2
+1450000,1.0,2
+1500000,1.0,2
+1550000,1.0,2
+1600000,1.0,2
+1650000,1.0,2
+1700000,1.0,2
+1750000,1.0,2
+1800000,1.0,2
+1850000,1.0,2
+1900000,1.0,2
+1950000,1.0,2
+2000000,1.0,2
+0,0.0,1
+50000,0.0,1
+100000,0.4,1
+150000,1.0,1
+200000,1.0,1
+250000,1.0,1
+300000,1.0,1
+350000,1.0,1
+400000,1.0,1
+450000,1.0,1
+500000,1.0,1
+550000,1.0,1
+600000,1.0,1
+650000,1.0,1
+700000,1.0,1
+750000,1.0,1
+800000,1.0,1
+850000,1.0,1
+900000,1.0,1
+950000,1.0,1
+1000000,1.0,1
+1050000,1.0,1
+1100000,1.0,1
+1150000,1.0,1
+1200000,1.0,1
+1250000,1.0,1
+1300000,1.0,1
+1350000,1.0,1
+1400000,1.0,1
+1450000,1.0,1
+1500000,1.0,1
+1550000,1.0,1
+1600000,1.0,1
+1650000,1.0,1
+1700000,1.0,1
+1750000,1.0,1
+1800000,1.0,1
+1850000,1.0,1
+1900000,1.0,1
+1950000,1.0,1
diff --git a/results/myo-hand-reach-hard.csv b/results/myo-hand-reach-hard.csv
new file mode 100644
index 0000000..c5752e0
--- /dev/null
+++ b/results/myo-hand-reach-hard.csv
@@ -0,0 +1,121 @@
+step,success,seed
+0,0.0,2
+50000,0.0,2
+100000,0.2,2
+150000,0.8,2
+200000,0.8,2
+250000,0.5,2
+300000,1.0,2
+350000,0.9,2
+400000,0.8,2
+450000,0.6,2
+500000,0.8,2
+550000,0.9,2
+600000,1.0,2
+650000,1.0,2
+700000,1.0,2
+750000,0.7,2
+800000,0.8,2
+850000,1.0,2
+900000,0.7,2
+950000,0.7,2
+1000000,0.8,2
+1050000,1.0,2
+1100000,0.8,2
+1150000,0.7,2
+1200000,1.0,2
+1250000,0.7,2
+1300000,0.9,2
+1350000,0.8,2
+1400000,0.8,2
+1450000,1.0,2
+1500000,1.0,2
+1550000,1.0,2
+1600000,1.0,2
+1650000,1.0,2
+1700000,0.8,2
+1750000,0.6,2
+1800000,1.0,2
+1850000,1.0,2
+1900000,1.0,2
+1950000,1.0,2
+0,0.0,3
+50000,0.0,3
+100000,0.6,3
+150000,0.9,3
+200000,0.6,3
+250000,1.0,3
+300000,1.0,3
+350000,0.6,3
+400000,1.0,3
+450000,0.8,3
+500000,0.7,3
+550000,0.8,3
+600000,1.0,3
+650000,0.9,3
+700000,0.9,3
+750000,0.9,3
+800000,1.0,3
+850000,0.8,3
+900000,1.0,3
+950000,0.9,3
+1000000,0.7,3
+1050000,1.0,3
+1100000,0.9,3
+1150000,1.0,3
+1200000,0.8,3
+1250000,0.6,3
+1300000,0.9,3
+1350000,0.6,3
+1400000,1.0,3
+1450000,0.9,3
+1500000,0.8,3
+1550000,0.9,3
+1600000,0.7,3
+1650000,1.0,3
+1700000,1.0,3
+1750000,1.0,3
+1800000,0.9,3
+1850000,0.8,3
+1900000,0.9,3
+1950000,0.9,3
+0,0.0,1
+50000,0.0,1
+100000,0.2,1
+150000,0.1,1
+200000,0.8,1
+250000,0.7,1
+300000,0.8,1
+350000,0.8,1
+400000,0.9,1
+450000,0.9,1
+500000,0.9,1
+550000,1.0,1
+600000,0.9,1
+650000,1.0,1
+700000,1.0,1
+750000,0.9,1
+800000,1.0,1
+850000,1.0,1
+900000,1.0,1
+950000,1.0,1
+1000000,1.0,1
+1050000,0.9,1
+1100000,1.0,1
+1150000,1.0,1
+1200000,1.0,1
+1250000,1.0,1
+1300000,0.9,1
+1350000,0.9,1
+1400000,1.0,1
+1450000,0.9,1
+1500000,1.0,1
+1550000,1.0,1
+1600000,0.8,1
+1650000,1.0,1
+1700000,1.0,1
+1750000,0.9,1
+1800000,1.0,1
+1850000,1.0,1
+1900000,1.0,1
+1950000,1.0,1
diff --git a/results/myo-hand-reach.csv b/results/myo-hand-reach.csv
new file mode 100644
index 0000000..f98a248
--- /dev/null
+++ b/results/myo-hand-reach.csv
@@ -0,0 +1,123 @@
+step,success,seed
+0,0.0,3
+50000,1.0,3
+100000,1.0,3
+150000,1.0,3
+200000,1.0,3
+250000,1.0,3
+300000,1.0,3
+350000,1.0,3
+400000,1.0,3
+450000,1.0,3
+500000,1.0,3
+550000,1.0,3
+600000,1.0,3
+650000,1.0,3
+700000,1.0,3
+750000,1.0,3
+800000,1.0,3
+850000,1.0,3
+900000,1.0,3
+950000,1.0,3
+1000000,1.0,3
+1050000,1.0,3
+1100000,1.0,3
+1150000,1.0,3
+1200000,1.0,3
+1250000,1.0,3
+1300000,1.0,3
+1350000,1.0,3
+1400000,1.0,3
+1450000,1.0,3
+1500000,1.0,3
+1550000,1.0,3
+1600000,1.0,3
+1650000,1.0,3
+1700000,1.0,3
+1750000,1.0,3
+1800000,1.0,3
+1850000,1.0,3
+1900000,1.0,3
+1950000,1.0,3
+2000000,1.0,3
+0,0.0,2
+50000,1.0,2
+100000,1.0,2
+150000,1.0,2
+200000,1.0,2
+250000,1.0,2
+300000,1.0,2
+350000,1.0,2
+400000,0.6,2
+450000,1.0,2
+500000,0.9,2
+550000,1.0,2
+600000,0.9,2
+650000,1.0,2
+700000,1.0,2
+750000,1.0,2
+800000,1.0,2
+850000,1.0,2
+900000,1.0,2
+950000,1.0,2
+1000000,1.0,2
+1050000,1.0,2
+1100000,1.0,2
+1150000,1.0,2
+1200000,1.0,2
+1250000,1.0,2
+1300000,1.0,2
+1350000,1.0,2
+1400000,1.0,2
+1450000,1.0,2
+1500000,1.0,2
+1550000,1.0,2
+1600000,1.0,2
+1650000,1.0,2
+1700000,1.0,2
+1750000,1.0,2
+1800000,1.0,2
+1850000,1.0,2
+1900000,1.0,2
+1950000,1.0,2
+2000000,1.0,2
+0,0.0,1
+50000,0.9,1
+100000,1.0,1
+150000,0.8,1
+200000,1.0,1
+250000,1.0,1
+300000,1.0,1
+350000,0.8,1
+400000,1.0,1
+450000,1.0,1
+500000,1.0,1
+550000,1.0,1
+600000,1.0,1
+650000,1.0,1
+700000,1.0,1
+750000,1.0,1
+800000,1.0,1
+850000,1.0,1
+900000,1.0,1
+950000,1.0,1
+1000000,1.0,1
+1050000,1.0,1
+1100000,1.0,1
+1150000,1.0,1
+1200000,1.0,1
+1250000,1.0,1
+1300000,1.0,1
+1350000,1.0,1
+1400000,1.0,1
+1450000,1.0,1
+1500000,1.0,1
+1550000,1.0,1
+1600000,1.0,1
+1650000,1.0,1
+1700000,1.0,1
+1750000,1.0,1
+1800000,1.0,1
+1850000,1.0,1
+1900000,1.0,1
+1950000,1.0,1
diff --git a/results/pendulum-spin.csv b/results/pendulum-spin.csv
new file mode 100644
index 0000000..eb38c7d
--- /dev/null
+++ b/results/pendulum-spin.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,183.8,3
+100000,834.5,3
+200000,873.1,3
+300000,834.7,3
+400000,823.1,3
+500000,828.5,3
+600000,848.2,3
+700000,854.4,3
+800000,825.5,3
+900000,823.2,3
+1000000,855.2,3
+1100000,859.6,3
+1200000,862.2,3
+1300000,860.5,3
+1400000,838.9,3
+1500000,853.4,3
+1600000,849.4,3
+1700000,813.8,3
+1800000,825.7,3
+1900000,832.9,3
+2000000,837.2,3
+2100000,822.0,3
+2200000,803.0,3
+2300000,846.4,3
+2400000,842.0,3
+2500000,843.7,3
+2600000,832.0,3
+2700000,830.2,3
+2800000,873.8,3
+2900000,877.5,3
+3000000,859.1,3
+3100000,842.4,3
+3200000,841.5,3
+3300000,829.2,3
+3400000,848.8,3
+3500000,829.8,3
+3600000,825.2,3
+3700000,864.3,3
+3800000,849.9,3
+3900000,861.8,3
+4000000,811.1,3
+0,103.4,2
+100000,869.3,2
+200000,829.8,2
+300000,835.5,2
+400000,893.6,2
+500000,837.5,2
+600000,884.5,2
+700000,852.2,2
+800000,832.2,2
+900000,847.7,2
+1000000,850.4,2
+1100000,850.1,2
+1200000,845.0,2
+1300000,829.3,2
+1400000,839.1,2
+1500000,861.4,2
+1600000,857.8,2
+1700000,865.7,2
+1800000,830.1,2
+1900000,823.0,2
+2000000,851.6,2
+2100000,853.9,2
+2200000,863.0,2
+2300000,821.6,2
+2400000,828.4,2
+2500000,844.1,2
+2600000,880.4,2
+2700000,844.9,2
+2800000,838.0,2
+2900000,851.7,2
+3000000,838.5,2
+3100000,843.5,2
+3200000,886.3,2
+3300000,866.4,2
+3400000,860.9,2
+3500000,876.8,2
+3600000,865.2,2
+3700000,860.6,2
+3800000,841.3,2
+3900000,860.4,2
+4000000,867.9,2
+0,73.6,1
+100000,810.8,1
+200000,826.8,1
+300000,840.6,1
+400000,806.2,1
+500000,869.9,1
+600000,842.6,1
+700000,865.2,1
+800000,867.9,1
+900000,866.0,1
+1000000,833.4,1
+1100000,833.4,1
+1200000,814.8,1
+1300000,826.6,1
+1400000,816.8,1
+1500000,846.9,1
+1600000,847.3,1
+1700000,886.5,1
+1800000,840.8,1
+1900000,849.1,1
+2000000,846.6,1
+2100000,854.1,1
+2200000,834.2,1
+2300000,800.8,1
+2400000,814.1,1
+2500000,828.6,1
+2600000,856.3,1
+2700000,857.2,1
+2800000,852.3,1
+2900000,829.1,1
+3000000,835.5,1
+3100000,827.5,1
+3200000,823.9,1
+3300000,816.1,1
+3400000,844.7,1
+3500000,874.2,1
+3600000,807.4,1
+3700000,876.9,1
+3800000,879.8,1
+3900000,858.6,1
+4000000,850.9,1
diff --git a/results/pendulum-swingup.csv b/results/pendulum-swingup.csv
new file mode 100644
index 0000000..959971d
--- /dev/null
+++ b/results/pendulum-swingup.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,1.4,3
+100000,756.0,3
+200000,876.9,3
+300000,814.8,3
+400000,801.8,3
+500000,816.3,3
+600000,853.2,3
+700000,862.0,3
+800000,803.2,3
+900000,806.4,3
+1000000,867.7,3
+1100000,848.9,3
+1200000,866.4,3
+1300000,864.5,3
+1400000,829.1,3
+1500000,855.6,3
+1600000,839.4,3
+1700000,791.5,3
+1800000,814.2,3
+1900000,821.0,3
+2000000,829.3,3
+2100000,804.0,3
+2200000,767.5,3
+2300000,828.0,3
+2400000,830.1,3
+2500000,836.2,3
+2600000,815.6,3
+2700000,812.1,3
+2800000,893.3,3
+2900000,887.6,3
+3000000,867.8,3
+3100000,837.1,3
+3200000,828.8,3
+3300000,816.9,3
+3400000,845.3,3
+3500000,819.7,3
+3600000,808.5,3
+3700000,860.1,3
+3800000,857.5,3
+3900000,862.2,3
+4000000,782.8,3
+0,0.0,1
+100000,563.7,1
+200000,801.6,1
+300000,830.2,1
+400000,772.6,1
+500000,884.6,1
+600000,835.7,1
+700000,866.1,1
+800000,891.9,1
+900000,877.9,1
+1000000,815.6,1
+1100000,827.0,1
+1200000,786.5,1
+1300000,803.8,1
+1400000,792.3,1
+1500000,837.8,1
+1600000,846.2,1
+1700000,904.9,1
+1800000,836.5,1
+1900000,843.7,1
+2000000,839.5,1
+2100000,853.5,1
+2200000,827.7,1
+2300000,771.1,1
+2400000,784.2,1
+2500000,805.8,1
+2600000,862.6,1
+2700000,855.7,1
+2800000,851.5,1
+2900000,818.5,1
+3000000,819.8,1
+3100000,804.5,1
+3200000,800.5,1
+3300000,789.5,1
+3400000,826.4,1
+3500000,882.3,1
+3600000,780.6,1
+3700000,898.8,1
+3800000,891.4,1
+3900000,866.4,1
+4000000,848.3,1
+0,0.0,2
+100000,852.2,2
+200000,795.0,2
+300000,819.5,2
+400000,827.4,2
+500000,824.2,2
+600000,905.7,2
+700000,854.4,2
+800000,817.7,2
+900000,844.1,2
+1000000,774.8,2
+1100000,849.2,2
+1200000,837.7,2
+1300000,810.9,2
+1400000,826.2,2
+1500000,862.1,2
+1600000,855.6,2
+1700000,875.8,2
+1800000,886.0,2
+1900000,811.5,2
+2000000,841.1,2
+2100000,848.1,2
+2200000,864.3,2
+2300000,801.2,2
+2400000,828.0,2
+2500000,831.4,2
+2600000,894.1,2
+2700000,847.4,2
+2800000,831.1,2
+2900000,842.8,2
+3000000,824.5,2
+3100000,830.5,2
+3200000,906.2,2
+3300000,871.5,2
+3400000,866.2,2
+3500000,885.5,2
+3600000,870.3,2
+3700000,861.9,2
+3800000,838.4,2
+3900000,849.5,2
+4000000,883.0,2
diff --git a/results/pick-cube.csv b/results/pick-cube.csv
new file mode 100644
index 0000000..f96df7a
--- /dev/null
+++ b/results/pick-cube.csv
@@ -0,0 +1,124 @@
+step,success,seed
+0,0.0,1
+100000,0.0,1
+200000,0.0,1
+300000,0.7,1
+400000,0.9,1
+500000,0.8,1
+600000,0.8,1
+700000,1.0,1
+800000,1.0,1
+900000,1.0,1
+1000000,1.0,1
+1100000,1.0,1
+1200000,1.0,1
+1300000,1.0,1
+1400000,1.0,1
+1500000,0.9,1
+1600000,0.9,1
+1700000,1.0,1
+1800000,1.0,1
+1900000,1.0,1
+2000000,1.0,1
+2100000,1.0,1
+2200000,1.0,1
+2300000,1.0,1
+2400000,0.9,1
+2500000,1.0,1
+2600000,1.0,1
+2700000,0.9,1
+2800000,1.0,1
+2900000,0.9,1
+3000000,1.0,1
+3100000,1.0,1
+3200000,1.0,1
+3300000,1.0,1
+3400000,0.9,1
+3500000,1.0,1
+3600000,1.0,1
+3700000,1.0,1
+3800000,0.9,1
+3900000,1.0,1
+4000000,1.0,1
+0,0.0,3
+100000,0.0,3
+200000,0.0,3
+300000,0.4,3
+400000,0.7,3
+500000,0.7,3
+600000,0.9,3
+700000,1.0,3
+800000,1.0,3
+900000,0.9,3
+1000000,1.0,3
+1100000,1.0,3
+1200000,0.9,3
+1300000,1.0,3
+1400000,1.0,3
+1500000,1.0,3
+1600000,0.9,3
+1700000,1.0,3
+1800000,1.0,3
+1900000,1.0,3
+2000000,1.0,3
+2100000,1.0,3
+2200000,1.0,3
+2300000,1.0,3
+2400000,1.0,3
+2500000,1.0,3
+2600000,1.0,3
+2700000,1.0,3
+2800000,1.0,3
+2900000,1.0,3
+3000000,1.0,3
+3100000,1.0,3
+3200000,1.0,3
+3300000,1.0,3
+3400000,1.0,3
+3500000,1.0,3
+3600000,1.0,3
+3700000,1.0,3
+3800000,1.0,3
+3900000,1.0,3
+4000000,1.0,3
+0,0.0,2
+100000,0.0,2
+200000,0.0,2
+300000,0.2,2
+400000,0.7,2
+500000,0.4,2
+600000,0.9,2
+700000,0.7,2
+800000,0.9,2
+900000,0.9,2
+1000000,0.8,2
+1100000,0.9,2
+1200000,1.0,2
+1300000,1.0,2
+1400000,1.0,2
+1500000,1.0,2
+1600000,1.0,2
+1700000,0.8,2
+1800000,1.0,2
+1900000,1.0,2
+2000000,1.0,2
+2100000,0.9,2
+2200000,1.0,2
+2300000,1.0,2
+2400000,1.0,2
+2500000,1.0,2
+2600000,1.0,2
+2700000,1.0,2
+2800000,1.0,2
+2900000,0.9,2
+3000000,0.9,2
+3100000,1.0,2
+3200000,1.0,2
+3300000,0.9,2
+3400000,1.0,2
+3500000,1.0,2
+3600000,0.9,2
+3700000,1.0,2
+3800000,1.0,2
+3900000,1.0,2
+4000000,1.0,2
diff --git a/results/pick-ycb.csv b/results/pick-ycb.csv
new file mode 100644
index 0000000..2e2a917
--- /dev/null
+++ b/results/pick-ycb.csv
@@ -0,0 +1,421 @@
+step,success,seed
+0,0.0,2
+100000,0.0,2
+200000,0.0,2
+300000,0.0,2
+400000,0.0,2
+500000,0.0,2
+600000,0.0,2
+700000,0.0,2
+800000,0.0,2
+900000,0.0,2
+1000000,0.0,2
+1100000,0.0,2
+1200000,0.0,2
+1300000,0.0,2
+1400000,0.0,2
+1500000,0.1,2
+1600000,0.0,2
+1700000,0.1,2
+1800000,0.1,2
+1900000,0.1,2
+2000000,0.2,2
+2100000,0.0,2
+2200000,0.2,2
+2300000,0.3,2
+2400000,0.2,2
+2500000,0.6,2
+2600000,0.3,2
+2700000,0.1,2
+2800000,0.4,2
+2900000,0.3,2
+3000000,0.2,2
+3100000,0.5,2
+3200000,0.5,2
+3300000,0.5,2
+3400000,0.5,2
+3500000,0.3,2
+3600000,0.4,2
+3700000,0.2,2
+3800000,0.4,2
+3900000,0.5,2
+4000000,0.4,2
+4100000,0.2,2
+4200000,0.5,2
+4300000,0.8,2
+4400000,0.7,2
+4500000,0.3,2
+4600000,0.5,2
+4700000,0.7,2
+4800000,0.5,2
+4900000,0.7,2
+5000000,0.3,2
+5100000,0.6,2
+5200000,0.7,2
+5300000,0.4,2
+5400000,0.3,2
+5500000,0.2,2
+5600000,0.6,2
+5700000,0.7,2
+5800000,0.6,2
+5900000,0.5,2
+6000000,0.3,2
+6100000,0.4,2
+6200000,0.5,2
+6300000,0.8,2
+6400000,0.7,2
+6500000,0.6,2
+6600000,0.5,2
+6700000,0.6,2
+6800000,0.6,2
+6900000,0.9,2
+7000000,0.5,2
+7100000,0.6,2
+7200000,0.7,2
+7300000,0.8,2
+7400000,0.7,2
+7500000,0.5,2
+7600000,0.5,2
+7700000,0.6,2
+7800000,0.5,2
+7900000,0.6,2
+8000000,0.5,2
+8100000,0.4,2
+8200000,0.7,2
+8300000,0.7,2
+8400000,0.6,2
+8500000,0.6,2
+8600000,0.5,2
+8700000,0.4,2
+8800000,0.6,2
+8900000,0.6,2
+9000000,0.6,2
+9100000,0.7,2
+9200000,0.9,2
+9300000,0.5,2
+9400000,0.7,2
+9500000,0.5,2
+9600000,0.6,2
+9700000,0.7,2
+9800000,0.4,2
+9900000,0.5,2
+10000000,0.6,2
+10100000,0.7,2
+10200000,0.7,2
+10300000,0.5,2
+10400000,0.3,2
+10500000,0.8,2
+10600000,0.9,2
+10700000,0.4,2
+10800000,0.4,2
+10900000,0.7,2
+11000000,0.6,2
+11100000,0.7,2
+11200000,0.7,2
+11300000,0.8,2
+11400000,0.8,2
+11500000,0.6,2
+11600000,0.8,2
+11700000,0.6,2
+11800000,0.6,2
+11900000,0.5,2
+12000000,0.6,2
+12100000,0.7,2
+12200000,0.6,2
+12300000,0.5,2
+12400000,0.7,2
+12500000,0.5,2
+12600000,0.4,2
+12700000,0.8,2
+12800000,0.5,2
+12900000,0.4,2
+13000000,0.8,2
+13100000,0.8,2
+13200000,0.6,2
+13300000,0.6,2
+13400000,0.3,2
+13500000,0.6,2
+13600000,0.6,2
+13700000,0.6,2
+13800000,0.6,2
+13900000,0.8,2
+0,0.0,3
+100000,0.0,3
+200000,0.0,3
+300000,0.0,3
+400000,0.0,3
+500000,0.0,3
+600000,0.0,3
+700000,0.0,3
+800000,0.0,3
+900000,0.0,3
+1000000,0.0,3
+1100000,0.0,3
+1200000,0.0,3
+1300000,0.0,3
+1400000,0.1,3
+1500000,0.0,3
+1600000,0.0,3
+1700000,0.0,3
+1800000,0.0,3
+1900000,0.0,3
+2000000,0.1,3
+2100000,0.2,3
+2200000,0.1,3
+2300000,0.2,3
+2400000,0.2,3
+2500000,0.0,3
+2600000,0.0,3
+2700000,0.1,3
+2800000,0.3,3
+2900000,0.3,3
+3000000,0.2,3
+3100000,0.2,3
+3200000,0.1,3
+3300000,0.3,3
+3400000,0.3,3
+3500000,0.4,3
+3600000,0.3,3
+3700000,0.3,3
+3800000,0.2,3
+3900000,0.2,3
+4000000,0.6,3
+4100000,0.3,3
+4200000,0.1,3
+4300000,0.3,3
+4400000,0.5,3
+4500000,0.2,3
+4600000,0.2,3
+4700000,0.5,3
+4800000,0.5,3
+4900000,0.3,3
+5000000,0.4,3
+5100000,0.4,3
+5200000,0.6,3
+5300000,0.5,3
+5400000,0.4,3
+5500000,0.3,3
+5600000,0.6,3
+5700000,0.5,3
+5800000,0.3,3
+5900000,0.7,3
+6000000,0.7,3
+6100000,0.7,3
+6200000,0.6,3
+6300000,0.4,3
+6400000,0.4,3
+6500000,0.7,3
+6600000,0.4,3
+6700000,0.5,3
+6800000,0.6,3
+6900000,0.7,3
+7000000,0.2,3
+7100000,0.8,3
+7200000,0.6,3
+7300000,0.6,3
+7400000,0.5,3
+7500000,0.3,3
+7600000,0.6,3
+7700000,0.5,3
+7800000,0.5,3
+7900000,0.4,3
+8000000,0.5,3
+8100000,0.5,3
+8200000,0.5,3
+8300000,0.6,3
+8400000,0.4,3
+8500000,0.3,3
+8600000,0.7,3
+8700000,0.3,3
+8800000,0.7,3
+8900000,0.8,3
+9000000,0.6,3
+9100000,0.7,3
+9200000,0.8,3
+9300000,0.5,3
+9400000,0.4,3
+9500000,0.6,3
+9600000,0.2,3
+9700000,0.5,3
+9800000,0.2,3
+9900000,0.6,3
+10000000,0.3,3
+10100000,0.5,3
+10200000,0.5,3
+10300000,0.4,3
+10400000,0.4,3
+10500000,0.5,3
+10600000,0.6,3
+10700000,0.5,3
+10800000,0.4,3
+10900000,0.6,3
+11000000,0.5,3
+11100000,0.3,3
+11200000,0.7,3
+11300000,0.6,3
+11400000,0.8,3
+11500000,0.8,3
+11600000,0.9,3
+11700000,0.7,3
+11800000,0.5,3
+11900000,0.5,3
+12000000,0.2,3
+12100000,0.6,3
+12200000,0.5,3
+12300000,0.5,3
+12400000,0.6,3
+12500000,0.4,3
+12600000,0.2,3
+12700000,0.8,3
+12800000,0.4,3
+12900000,0.8,3
+13000000,0.6,3
+13100000,0.6,3
+13200000,0.3,3
+13300000,0.8,3
+13400000,0.4,3
+13500000,0.4,3
+13600000,0.4,3
+13700000,0.5,3
+13800000,0.6,3
+13900000,0.5,3
+0,0.0,1
+100000,0.0,1
+200000,0.0,1
+300000,0.0,1
+400000,0.0,1
+500000,0.0,1
+600000,0.0,1
+700000,0.0,1
+800000,0.0,1
+900000,0.0,1
+1000000,0.0,1
+1100000,0.0,1
+1200000,0.0,1
+1300000,0.0,1
+1400000,0.1,1
+1500000,0.1,1
+1600000,0.1,1
+1700000,0.0,1
+1800000,0.3,1
+1900000,0.2,1
+2000000,0.1,1
+2100000,0.1,1
+2200000,0.1,1
+2300000,0.0,1
+2400000,0.0,1
+2500000,0.1,1
+2600000,0.0,1
+2700000,0.2,1
+2800000,0.3,1
+2900000,0.4,1
+3000000,0.2,1
+3100000,0.0,1
+3200000,0.6,1
+3300000,0.5,1
+3400000,0.4,1
+3500000,0.3,1
+3600000,0.5,1
+3700000,0.6,1
+3800000,0.2,1
+3900000,0.2,1
+4000000,0.4,1
+4100000,0.3,1
+4200000,0.5,1
+4300000,0.5,1
+4400000,0.6,1
+4500000,0.2,1
+4600000,0.4,1
+4700000,0.7,1
+4800000,0.8,1
+4900000,0.2,1
+5000000,0.3,1
+5100000,0.4,1
+5200000,0.8,1
+5300000,0.5,1
+5400000,0.3,1
+5500000,0.4,1
+5600000,0.6,1
+5700000,0.6,1
+5800000,0.5,1
+5900000,0.8,1
+6000000,0.3,1
+6100000,0.6,1
+6200000,0.5,1
+6300000,0.3,1
+6400000,0.7,1
+6500000,0.9,1
+6600000,0.5,1
+6700000,0.5,1
+6800000,0.4,1
+6900000,0.7,1
+7000000,0.4,1
+7100000,0.4,1
+7200000,0.5,1
+7300000,0.5,1
+7400000,0.6,1
+7500000,0.8,1
+7600000,0.7,1
+7700000,0.5,1
+7800000,0.4,1
+7900000,0.7,1
+8000000,0.5,1
+8100000,0.3,1
+8200000,0.6,1
+8300000,0.5,1
+8400000,0.4,1
+8500000,0.3,1
+8600000,0.7,1
+8700000,0.3,1
+8800000,0.7,1
+8900000,0.7,1
+9000000,0.6,1
+9100000,0.4,1
+9200000,0.6,1
+9300000,0.5,1
+9400000,0.8,1
+9500000,0.6,1
+9600000,0.4,1
+9700000,0.4,1
+9800000,0.4,1
+9900000,0.5,1
+10000000,0.7,1
+10100000,0.4,1
+10200000,0.4,1
+10300000,0.4,1
+10400000,0.8,1
+10500000,0.6,1
+10600000,0.4,1
+10700000,0.5,1
+10800000,0.4,1
+10900000,0.7,1
+11000000,0.5,1
+11100000,0.7,1
+11200000,0.5,1
+11300000,0.8,1
+11400000,0.4,1
+11500000,0.7,1
+11600000,0.6,1
+11700000,0.6,1
+11800000,0.5,1
+11900000,0.6,1
+12000000,0.7,1
+12100000,0.4,1
+12200000,0.5,1
+12300000,0.7,1
+12400000,0.5,1
+12500000,0.4,1
+12600000,0.6,1
+12700000,0.7,1
+12800000,0.4,1
+12900000,0.6,1
+13000000,0.9,1
+13100000,0.5,1
+13200000,0.6,1
+13300000,0.6,1
+13400000,0.3,1
+13500000,0.6,1
+13600000,0.7,1
+13700000,0.8,1
+13800000,0.6,1
+13900000,0.8,1
diff --git a/results/quadruped-run.csv b/results/quadruped-run.csv
new file mode 100644
index 0000000..1240fe7
--- /dev/null
+++ b/results/quadruped-run.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,278.5,2
+100000,385.7,2
+200000,801.8,2
+300000,888.2,2
+400000,925.2,2
+500000,933.5,2
+600000,928.9,2
+700000,935.4,2
+800000,943.7,2
+900000,952.0,2
+1000000,946.7,2
+1100000,943.0,2
+1200000,949.5,2
+1300000,942.5,2
+1400000,946.7,2
+1500000,949.6,2
+1600000,953.2,2
+1700000,946.8,2
+1800000,951.0,2
+1900000,954.2,2
+2000000,957.2,2
+2100000,948.5,2
+2200000,951.6,2
+2300000,954.2,2
+2400000,953.5,2
+2500000,953.8,2
+2600000,954.5,2
+2700000,952.3,2
+2800000,939.9,2
+2900000,944.4,2
+3000000,953.3,2
+3100000,952.7,2
+3200000,946.5,2
+3300000,954.4,2
+3400000,951.4,2
+3500000,954.3,2
+3600000,954.1,2
+3700000,952.2,2
+3800000,951.6,2
+3900000,962.3,2
+4000000,955.8,2
+0,129.4,1
+100000,121.4,1
+200000,691.1,1
+300000,874.7,1
+400000,915.7,1
+500000,936.4,1
+600000,947.5,1
+700000,938.9,1
+800000,941.3,1
+900000,927.8,1
+1000000,940.6,1
+1100000,945.0,1
+1200000,941.2,1
+1300000,954.5,1
+1400000,946.1,1
+1500000,956.9,1
+1600000,923.9,1
+1700000,952.9,1
+1800000,957.1,1
+1900000,957.4,1
+2000000,940.9,1
+2100000,954.6,1
+2200000,947.0,1
+2300000,956.1,1
+2400000,962.5,1
+2500000,953.8,1
+2600000,949.2,1
+2700000,950.2,1
+2800000,955.4,1
+2900000,957.6,1
+3000000,957.3,1
+3100000,960.9,1
+3200000,963.9,1
+3300000,951.2,1
+3400000,960.2,1
+3500000,954.8,1
+3600000,955.1,1
+3700000,956.1,1
+3800000,955.3,1
+3900000,960.8,1
+4000000,951.5,1
+0,202.7,3
+100000,210.5,3
+200000,784.2,3
+300000,921.9,3
+400000,948.7,3
+500000,949.0,3
+600000,942.5,3
+700000,941.6,3
+800000,941.5,3
+900000,906.1,3
+1000000,925.7,3
+1100000,935.9,3
+1200000,956.0,3
+1300000,948.1,3
+1400000,939.6,3
+1500000,932.7,3
+1600000,955.4,3
+1700000,952.4,3
+1800000,955.4,3
+1900000,943.2,3
+2000000,944.7,3
+2100000,957.5,3
+2200000,936.0,3
+2300000,954.9,3
+2400000,953.9,3
+2500000,944.4,3
+2600000,961.5,3
+2700000,951.5,3
+2800000,958.8,3
+2900000,962.6,3
+3000000,955.6,3
+3100000,960.5,3
+3200000,957.6,3
+3300000,958.5,3
+3400000,963.6,3
+3500000,948.5,3
+3600000,957.8,3
+3700000,960.0,3
+3800000,947.1,3
+3900000,957.7,3
+4000000,954.2,3
diff --git a/results/quadruped-walk.csv b/results/quadruped-walk.csv
new file mode 100644
index 0000000..234bfe6
--- /dev/null
+++ b/results/quadruped-walk.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,283.2,2
+100000,420.2,2
+200000,950.8,2
+300000,959.1,2
+400000,961.7,2
+500000,957.5,2
+600000,955.0,2
+700000,968.8,2
+800000,965.0,2
+900000,966.0,2
+1000000,968.2,2
+1100000,968.1,2
+1200000,964.2,2
+1300000,964.1,2
+1400000,962.9,2
+1500000,965.4,2
+1600000,974.5,2
+1700000,975.5,2
+1800000,966.6,2
+1900000,975.7,2
+2000000,974.5,2
+2100000,964.0,2
+2200000,970.6,2
+2300000,975.4,2
+2400000,969.2,2
+2500000,972.1,2
+2600000,973.5,2
+2700000,965.8,2
+2800000,976.8,2
+2900000,974.1,2
+3000000,968.4,2
+3100000,977.9,2
+3200000,973.0,2
+3300000,976.9,2
+3400000,979.0,2
+3500000,975.6,2
+3600000,973.6,2
+3700000,969.5,2
+3800000,967.3,2
+3900000,980.0,2
+4000000,969.8,2
+0,126.9,1
+100000,213.6,1
+200000,888.8,1
+300000,953.5,1
+400000,945.8,1
+500000,949.1,1
+600000,958.8,1
+700000,955.9,1
+800000,956.9,1
+900000,961.3,1
+1000000,965.3,1
+1100000,962.7,1
+1200000,971.4,1
+1300000,969.5,1
+1400000,967.2,1
+1500000,977.8,1
+1600000,968.9,1
+1700000,967.4,1
+1800000,974.9,1
+1900000,965.9,1
+2000000,969.5,1
+2100000,973.8,1
+2200000,973.3,1
+2300000,972.5,1
+2400000,982.0,1
+2500000,969.7,1
+2600000,964.6,1
+2700000,956.7,1
+2800000,972.3,1
+2900000,971.9,1
+3000000,967.9,1
+3100000,975.3,1
+3200000,977.8,1
+3300000,970.2,1
+3400000,976.3,1
+3500000,977.4,1
+3600000,970.1,1
+3700000,957.4,1
+3800000,973.9,1
+3900000,964.1,1
+4000000,972.1,1
+0,201.4,3
+100000,826.7,3
+200000,904.4,3
+300000,937.5,3
+400000,970.0,3
+500000,964.9,3
+600000,929.7,3
+700000,971.0,3
+800000,965.7,3
+900000,964.6,3
+1000000,935.5,3
+1100000,969.2,3
+1200000,971.6,3
+1300000,974.1,3
+1400000,968.9,3
+1500000,966.5,3
+1600000,968.5,3
+1700000,975.3,3
+1800000,969.3,3
+1900000,966.3,3
+2000000,964.6,3
+2100000,963.0,3
+2200000,966.3,3
+2300000,969.0,3
+2400000,976.8,3
+2500000,972.4,3
+2600000,968.8,3
+2700000,973.2,3
+2800000,977.8,3
+2900000,982.8,3
+3000000,969.3,3
+3100000,969.9,3
+3200000,971.8,3
+3300000,966.3,3
+3400000,980.8,3
+3500000,957.6,3
+3600000,975.5,3
+3700000,982.3,3
+3800000,968.4,3
+3900000,975.8,3
+4000000,971.2,3
diff --git a/results/reacher-easy.csv b/results/reacher-easy.csv
new file mode 100644
index 0000000..32c5018
--- /dev/null
+++ b/results/reacher-easy.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,91.4,2
+100000,972.8,2
+200000,985.5,2
+300000,880.9,2
+400000,888.6,2
+500000,892.2,2
+600000,986.6,2
+700000,983.8,2
+800000,985.2,2
+900000,985.6,2
+1000000,981.8,2
+1100000,986.8,2
+1200000,982.8,2
+1300000,942.0,2
+1400000,980.0,2
+1500000,984.5,2
+1600000,978.5,2
+1700000,991.3,2
+1800000,979.5,2
+1900000,982.9,2
+2000000,979.3,2
+2100000,981.1,2
+2200000,983.6,2
+2300000,987.6,2
+2400000,988.0,2
+2500000,987.1,2
+2600000,981.6,2
+2700000,986.8,2
+2800000,983.4,2
+2900000,982.1,2
+3000000,982.5,2
+3100000,981.0,2
+3200000,986.7,2
+3300000,986.1,2
+3400000,981.4,2
+3500000,983.7,2
+3600000,988.4,2
+3700000,986.1,2
+3800000,983.7,2
+3900000,978.7,2
+4000000,989.4,2
+0,51.5,1
+100000,977.9,1
+200000,969.3,1
+300000,973.0,1
+400000,989.6,1
+500000,985.4,1
+600000,985.0,1
+700000,985.0,1
+800000,975.7,1
+900000,987.3,1
+1000000,978.4,1
+1100000,967.2,1
+1200000,988.1,1
+1300000,983.7,1
+1400000,987.8,1
+1500000,983.0,1
+1600000,984.0,1
+1700000,986.1,1
+1800000,990.3,1
+1900000,948.6,1
+2000000,984.6,1
+2100000,986.5,1
+2200000,981.7,1
+2300000,984.0,1
+2400000,987.0,1
+2500000,979.5,1
+2600000,991.1,1
+2700000,983.6,1
+2800000,990.8,1
+2900000,982.5,1
+3000000,985.1,1
+3100000,987.5,1
+3200000,986.0,1
+3300000,986.0,1
+3400000,981.8,1
+3500000,985.5,1
+3600000,985.2,1
+3700000,983.1,1
+3800000,984.2,1
+3900000,985.5,1
+4000000,986.8,1
+0,140.8,3
+100000,859.9,3
+200000,973.9,3
+300000,978.4,3
+400000,987.9,3
+500000,880.7,3
+600000,980.9,3
+700000,980.0,3
+800000,984.0,3
+900000,976.8,3
+1000000,987.8,3
+1100000,982.0,3
+1200000,987.9,3
+1300000,979.9,3
+1400000,984.5,3
+1500000,981.0,3
+1600000,986.7,3
+1700000,986.5,3
+1800000,985.9,3
+1900000,986.9,3
+2000000,981.9,3
+2100000,983.1,3
+2200000,984.5,3
+2300000,962.7,3
+2400000,988.5,3
+2500000,984.7,3
+2600000,986.1,3
+2700000,907.3,3
+2800000,982.6,3
+2900000,984.3,3
+3000000,941.1,3
+3100000,985.1,3
+3200000,983.5,3
+3300000,984.7,3
+3400000,983.2,3
+3500000,982.3,3
+3600000,985.7,3
+3700000,985.9,3
+3800000,984.6,3
+3900000,986.7,3
+4000000,985.3,3
diff --git a/results/reacher-hard.csv b/results/reacher-hard.csv
new file mode 100644
index 0000000..5376622
--- /dev/null
+++ b/results/reacher-hard.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,4.0,3
+100000,256.3,3
+200000,668.7,3
+300000,880.2,3
+400000,981.0,3
+500000,974.8,3
+600000,901.4,3
+700000,974.2,3
+800000,983.1,3
+900000,973.4,3
+1000000,984.3,3
+1100000,978.4,3
+1200000,982.5,3
+1300000,977.4,3
+1400000,981.6,3
+1500000,977.9,3
+1600000,983.6,3
+1700000,982.5,3
+1800000,983.2,3
+1900000,982.0,3
+2000000,978.4,3
+2100000,980.4,3
+2200000,982.4,3
+2300000,981.5,3
+2400000,984.3,3
+2500000,981.5,3
+2600000,981.9,3
+2700000,981.8,3
+2800000,979.7,3
+2900000,983.1,3
+3000000,983.5,3
+3100000,981.6,3
+3200000,979.7,3
+3300000,981.7,3
+3400000,981.2,3
+3500000,978.7,3
+3600000,982.2,3
+3700000,979.7,3
+3800000,981.3,3
+3900000,982.6,3
+4000000,975.0,3
+0,23.4,2
+100000,479.1,2
+200000,972.9,2
+300000,896.9,2
+400000,973.4,2
+500000,879.0,2
+600000,969.4,2
+700000,977.1,2
+800000,980.2,2
+900000,882.6,2
+1000000,981.1,2
+1100000,980.9,2
+1200000,977.8,2
+1300000,985.9,2
+1400000,981.9,2
+1500000,981.2,2
+1600000,977.5,2
+1700000,988.3,2
+1800000,978.9,2
+1900000,977.9,2
+2000000,978.4,2
+2100000,977.2,2
+2200000,980.4,2
+2300000,984.3,2
+2400000,981.4,2
+2500000,982.9,2
+2600000,976.3,2
+2700000,983.7,2
+2800000,977.4,2
+2900000,979.1,2
+3000000,978.7,2
+3100000,978.8,2
+3200000,982.7,2
+3300000,982.7,2
+3400000,978.5,2
+3500000,969.5,2
+3600000,985.2,2
+3700000,980.9,2
+3800000,968.3,2
+3900000,983.3,2
+4000000,985.9,2
+0,19.2,1
+100000,507.3,1
+200000,971.2,1
+300000,972.0,1
+400000,885.1,1
+500000,887.4,1
+600000,977.2,1
+700000,980.1,1
+800000,979.3,1
+900000,984.9,1
+1000000,977.7,1
+1100000,931.6,1
+1200000,986.5,1
+1300000,980.1,1
+1400000,985.0,1
+1500000,980.9,1
+1600000,980.0,1
+1700000,982.3,1
+1800000,986.2,1
+1900000,983.0,1
+2000000,981.7,1
+2100000,983.9,1
+2200000,977.4,1
+2300000,981.4,1
+2400000,982.7,1
+2500000,976.9,1
+2600000,987.1,1
+2700000,979.2,1
+2800000,987.9,1
+2900000,980.3,1
+3000000,982.1,1
+3100000,984.6,1
+3200000,981.8,1
+3300000,982.9,1
+3400000,985.4,1
+3500000,982.0,1
+3600000,968.9,1
+3700000,979.7,1
+3800000,982.1,1
+3900000,982.5,1
+4000000,984.5,1
diff --git a/results/reacher-three-easy.csv b/results/reacher-three-easy.csv
new file mode 100644
index 0000000..481ea6b
--- /dev/null
+++ b/results/reacher-three-easy.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,33.9,3
+100000,883.3,3
+200000,945.4,3
+300000,974.1,3
+400000,979.5,3
+500000,969.9,3
+600000,942.2,3
+700000,976.8,3
+800000,976.0,3
+900000,976.2,3
+1000000,972.3,3
+1100000,975.6,3
+1200000,978.6,3
+1300000,977.9,3
+1400000,975.5,3
+1500000,975.2,3
+1600000,929.5,3
+1700000,978.7,3
+1800000,975.1,3
+1900000,973.5,3
+2000000,958.1,3
+2100000,875.6,3
+2200000,980.1,3
+2300000,982.2,3
+2400000,982.1,3
+2500000,885.9,3
+2600000,982.9,3
+2700000,983.9,3
+2800000,979.0,3
+2900000,981.3,3
+3000000,982.4,3
+3100000,978.6,3
+3200000,985.4,3
+3300000,984.2,3
+3400000,973.3,3
+3500000,980.0,3
+3600000,979.7,3
+3700000,977.7,3
+3800000,978.6,3
+3900000,984.2,3
+4000000,978.9,3
+0,41.6,1
+100000,839.5,1
+200000,973.6,1
+300000,976.4,1
+400000,978.1,1
+500000,873.8,1
+600000,973.1,1
+700000,975.9,1
+800000,871.8,1
+900000,980.3,1
+1000000,879.5,1
+1100000,979.9,1
+1200000,975.5,1
+1300000,973.4,1
+1400000,925.6,1
+1500000,978.0,1
+1600000,972.8,1
+1700000,979.2,1
+1800000,983.5,1
+1900000,982.5,1
+2000000,974.7,1
+2100000,977.9,1
+2200000,872.9,1
+2300000,979.7,1
+2400000,982.5,1
+2500000,978.3,1
+2600000,974.5,1
+2700000,979.7,1
+2800000,980.2,1
+2900000,981.8,1
+3000000,982.4,1
+3100000,981.5,1
+3200000,983.1,1
+3300000,979.5,1
+3400000,983.2,1
+3500000,984.1,1
+3600000,985.7,1
+3700000,979.4,1
+3800000,986.9,1
+3900000,978.2,1
+4000000,978.9,1
+0,102.2,2
+100000,918.7,2
+200000,968.5,2
+300000,973.1,2
+400000,976.7,2
+500000,972.5,2
+600000,975.0,2
+700000,875.7,2
+800000,977.4,2
+900000,975.6,2
+1000000,979.1,2
+1100000,976.6,2
+1200000,980.5,2
+1300000,978.5,2
+1400000,983.1,2
+1500000,981.1,2
+1600000,982.0,2
+1700000,982.4,2
+1800000,975.7,2
+1900000,979.3,2
+2000000,977.2,2
+2100000,984.1,2
+2200000,977.2,2
+2300000,985.3,2
+2400000,981.5,2
+2500000,982.4,2
+2600000,977.8,2
+2700000,980.9,2
+2800000,974.5,2
+2900000,980.2,2
+3000000,884.3,2
+3100000,981.0,2
+3200000,983.4,2
+3300000,979.7,2
+3400000,982.3,2
+3500000,980.0,2
+3600000,985.9,2
+3700000,980.5,2
+3800000,986.2,2
+3900000,981.2,2
+4000000,986.9,2
diff --git a/results/reacher-three-hard.csv b/results/reacher-three-hard.csv
new file mode 100644
index 0000000..8c48dff
--- /dev/null
+++ b/results/reacher-three-hard.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,7.7,3
+100000,275.6,3
+200000,673.0,3
+300000,867.8,3
+400000,588.9,3
+500000,878.5,3
+600000,782.4,3
+700000,676.8,3
+800000,771.3,3
+900000,680.6,3
+1000000,500.7,3
+1100000,677.6,3
+1200000,776.5,3
+1300000,781.3,3
+1400000,967.8,3
+1500000,682.1,3
+1600000,776.4,3
+1700000,772.8,3
+1800000,578.5,3
+1900000,783.2,3
+2000000,773.9,3
+2100000,384.3,3
+2200000,585.0,3
+2300000,677.3,3
+2400000,784.3,3
+2500000,880.9,3
+2600000,775.2,3
+2700000,682.4,3
+2800000,778.8,3
+2900000,586.0,3
+3000000,869.5,3
+3100000,871.0,3
+3200000,773.7,3
+3300000,779.2,3
+3400000,781.4,3
+3500000,681.9,3
+3600000,871.6,3
+3700000,872.5,3
+3800000,875.1,3
+3900000,489.0,3
+4000000,875.5,3
+0,18.8,2
+100000,221.4,2
+200000,649.3,2
+300000,759.4,2
+400000,684.2,2
+500000,487.2,2
+600000,654.0,2
+700000,682.8,2
+800000,876.1,2
+900000,873.8,2
+1000000,782.8,2
+1100000,581.9,2
+1200000,781.2,2
+1300000,880.6,2
+1400000,874.8,2
+1500000,784.2,2
+1600000,971.3,2
+1700000,783.3,2
+1800000,969.5,2
+1900000,681.5,2
+2000000,976.6,2
+2100000,687.2,2
+2200000,573.3,2
+2300000,884.3,2
+2400000,883.4,2
+2500000,881.7,2
+2600000,586.6,2
+2700000,778.9,2
+2800000,783.0,2
+2900000,879.1,2
+3000000,781.5,2
+3100000,878.2,2
+3200000,881.1,2
+3300000,976.9,2
+3400000,979.6,2
+3500000,686.9,2
+3600000,782.5,2
+3700000,782.6,2
+3800000,786.6,2
+3900000,881.3,2
+4000000,786.3,2
+0,0.0,1
+100000,303.9,1
+200000,674.1,1
+300000,775.2,1
+400000,973.2,1
+500000,677.8,1
+600000,876.2,1
+700000,876.5,1
+800000,864.3,1
+900000,854.5,1
+1000000,875.1,1
+1100000,778.5,1
+1200000,577.3,1
+1300000,874.3,1
+1400000,778.4,1
+1500000,878.9,1
+1600000,774.6,1
+1700000,968.1,1
+1800000,977.9,1
+1900000,879.6,1
+2000000,872.8,1
+2100000,874.4,1
+2200000,678.2,1
+2300000,682.7,1
+2400000,873.5,1
+2500000,776.0,1
+2600000,775.7,1
+2700000,680.5,1
+2800000,585.2,1
+2900000,971.5,1
+3000000,774.2,1
+3100000,957.1,1
+3200000,876.4,1
+3300000,871.5,1
+3400000,583.2,1
+3500000,873.7,1
+3600000,783.0,1
+3700000,878.0,1
+3800000,976.9,1
+3900000,675.6,1
+4000000,877.5,1
diff --git a/results/stack-cube.csv b/results/stack-cube.csv
new file mode 100644
index 0000000..c8f3df0
--- /dev/null
+++ b/results/stack-cube.csv
@@ -0,0 +1,124 @@
+step,success,seed
+0,0.0,3
+100000,0.0,3
+200000,0.0,3
+300000,0.0,3
+400000,0.0,3
+500000,0.0,3
+600000,0.0,3
+700000,0.0,3
+800000,0.0,3
+900000,0.0,3
+1000000,0.0,3
+1100000,0.0,3
+1200000,0.0,3
+1300000,0.0,3
+1400000,0.1,3
+1500000,0.0,3
+1600000,0.0,3
+1700000,0.3,3
+1800000,0.3,3
+1900000,0.5,3
+2000000,0.8,3
+2100000,0.4,3
+2200000,0.5,3
+2300000,0.6,3
+2400000,0.4,3
+2500000,0.6,3
+2600000,0.9,3
+2700000,0.7,3
+2800000,0.8,3
+2900000,0.6,3
+3000000,0.9,3
+3100000,0.9,3
+3200000,0.9,3
+3300000,0.9,3
+3400000,1.0,3
+3500000,0.9,3
+3600000,0.9,3
+3700000,0.8,3
+3800000,1.0,3
+3900000,0.9,3
+4000000,0.9,3
+0,0.0,1
+100000,0.0,1
+200000,0.0,1
+300000,0.0,1
+400000,0.0,1
+500000,0.0,1
+600000,0.0,1
+700000,0.0,1
+800000,0.0,1
+900000,0.0,1
+1000000,0.0,1
+1100000,0.1,1
+1200000,0.0,1
+1300000,0.3,1
+1400000,0.3,1
+1500000,0.1,1
+1600000,0.4,1
+1700000,0.5,1
+1800000,0.3,1
+1900000,0.7,1
+2000000,0.3,1
+2100000,0.6,1
+2200000,0.4,1
+2300000,0.7,1
+2400000,0.8,1
+2500000,0.8,1
+2600000,0.9,1
+2700000,0.8,1
+2800000,0.8,1
+2900000,0.7,1
+3000000,1.0,1
+3100000,0.9,1
+3200000,0.8,1
+3300000,0.9,1
+3400000,1.0,1
+3500000,0.9,1
+3600000,0.8,1
+3700000,0.9,1
+3800000,0.9,1
+3900000,1.0,1
+4000000,1.0,1
+0,0.0,2
+100000,0.0,2
+200000,0.0,2
+300000,0.0,2
+400000,0.0,2
+500000,0.0,2
+600000,0.0,2
+700000,0.0,2
+800000,0.0,2
+900000,0.0,2
+1000000,0.0,2
+1100000,0.0,2
+1200000,0.0,2
+1300000,0.0,2
+1400000,0.0,2
+1500000,0.0,2
+1600000,0.3,2
+1700000,0.5,2
+1800000,0.5,2
+1900000,0.2,2
+2000000,0.4,2
+2100000,0.5,2
+2200000,0.4,2
+2300000,0.2,2
+2400000,0.7,2
+2500000,0.8,2
+2600000,0.8,2
+2700000,0.8,2
+2800000,1.0,2
+2900000,0.6,2
+3000000,1.0,2
+3100000,0.8,2
+3200000,0.9,2
+3300000,1.0,2
+3400000,1.0,2
+3500000,0.9,2
+3600000,0.6,2
+3700000,0.9,2
+3800000,1.0,2
+3900000,1.0,2
+4000000,0.8,2
diff --git a/results/turn-faucet.csv b/results/turn-faucet.csv
new file mode 100644
index 0000000..8e8b2c8
--- /dev/null
+++ b/results/turn-faucet.csv
@@ -0,0 +1,121 @@
+step,success,seed
+0,0.0,3
+100000,0.1,3
+200000,0.3,3
+300000,0.3,3
+400000,0.8,3
+500000,0.4,3
+600000,0.4,3
+700000,0.5,3
+800000,0.9,3
+900000,1.0,3
+1000000,0.7,3
+1100000,1.0,3
+1200000,0.8,3
+1300000,1.0,3
+1400000,0.7,3
+1500000,0.9,3
+1600000,0.9,3
+1700000,0.9,3
+1800000,0.7,3
+1900000,0.8,3
+2000000,1.0,3
+2100000,0.9,3
+2200000,0.9,3
+2300000,0.9,3
+2400000,0.9,3
+2500000,1.0,3
+2600000,0.9,3
+2700000,0.9,3
+2800000,1.0,3
+2900000,0.9,3
+3000000,0.9,3
+3100000,1.0,3
+3200000,0.8,3
+3300000,0.9,3
+3400000,0.9,3
+3500000,1.0,3
+3600000,1.0,3
+3700000,0.9,3
+3800000,1.0,3
+3900000,1.0,3
+0,0.0,2
+100000,0.0,2
+200000,0.6,2
+300000,0.3,2
+400000,0.8,2
+500000,0.7,2
+600000,0.8,2
+700000,0.7,2
+800000,1.0,2
+900000,0.9,2
+1000000,0.9,2
+1100000,0.9,2
+1200000,0.8,2
+1300000,0.9,2
+1400000,0.9,2
+1500000,1.0,2
+1600000,0.9,2
+1700000,1.0,2
+1800000,1.0,2
+1900000,0.7,2
+2000000,0.9,2
+2100000,0.8,2
+2200000,1.0,2
+2300000,1.0,2
+2400000,0.9,2
+2500000,1.0,2
+2600000,1.0,2
+2700000,0.9,2
+2800000,1.0,2
+2900000,0.9,2
+3000000,1.0,2
+3100000,1.0,2
+3200000,0.9,2
+3300000,1.0,2
+3400000,0.9,2
+3500000,1.0,2
+3600000,0.9,2
+3700000,1.0,2
+3800000,0.9,2
+3900000,0.9,2
+0,0.0,1
+100000,0.2,1
+200000,0.6,1
+300000,0.3,1
+400000,0.8,1
+500000,0.6,1
+600000,0.7,1
+700000,0.7,1
+800000,0.8,1
+900000,1.0,1
+1000000,0.6,1
+1100000,1.0,1
+1200000,0.8,1
+1300000,0.8,1
+1400000,0.9,1
+1500000,0.8,1
+1600000,1.0,1
+1700000,1.0,1
+1800000,0.9,1
+1900000,0.8,1
+2000000,1.0,1
+2100000,0.9,1
+2200000,1.0,1
+2300000,0.9,1
+2400000,1.0,1
+2500000,1.0,1
+2600000,1.0,1
+2700000,1.0,1
+2800000,0.9,1
+2900000,0.9,1
+3000000,0.9,1
+3100000,1.0,1
+3200000,1.0,1
+3300000,1.0,1
+3400000,1.0,1
+3500000,1.0,1
+3600000,1.0,1
+3700000,0.9,1
+3800000,0.9,1
+3900000,1.0,1
diff --git a/results/walker-run-backwards.csv b/results/walker-run-backwards.csv
new file mode 100644
index 0000000..729527d
--- /dev/null
+++ b/results/walker-run-backwards.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,20.6,2
+100000,530.5,2
+200000,652.0,2
+300000,716.2,2
+400000,794.7,2
+500000,854.1,2
+600000,867.1,2
+700000,874.6,2
+800000,880.8,2
+900000,912.3,2
+1000000,910.1,2
+1100000,923.4,2
+1200000,922.5,2
+1300000,933.5,2
+1400000,933.6,2
+1500000,937.6,2
+1600000,925.9,2
+1700000,932.8,2
+1800000,934.7,2
+1900000,938.0,2
+2000000,936.0,2
+2100000,937.3,2
+2200000,936.7,2
+2300000,936.2,2
+2400000,932.7,2
+2500000,938.5,2
+2600000,936.5,2
+2700000,947.1,2
+2800000,939.4,2
+2900000,943.2,2
+3000000,935.8,2
+3100000,938.5,2
+3200000,941.6,2
+3300000,941.2,2
+3400000,941.0,2
+3500000,952.2,2
+3600000,946.2,2
+3700000,943.5,2
+3800000,937.6,2
+3900000,944.3,2
+4000000,939.3,2
+0,21.9,1
+100000,535.0,1
+200000,659.3,1
+300000,696.1,1
+400000,729.3,1
+500000,760.7,1
+600000,788.1,1
+700000,799.2,1
+800000,804.3,1
+900000,806.9,1
+1000000,839.2,1
+1100000,859.7,1
+1200000,876.6,1
+1300000,877.4,1
+1400000,868.0,1
+1500000,886.1,1
+1600000,882.2,1
+1700000,872.8,1
+1800000,884.2,1
+1900000,891.4,1
+2000000,886.2,1
+2100000,889.7,1
+2200000,895.8,1
+2300000,904.4,1
+2400000,909.3,1
+2500000,914.4,1
+2600000,912.1,1
+2700000,903.4,1
+2800000,920.7,1
+2900000,909.1,1
+3000000,920.1,1
+3100000,926.8,1
+3200000,930.9,1
+3300000,927.0,1
+3400000,932.8,1
+3500000,932.7,1
+3600000,933.0,1
+3700000,931.3,1
+3800000,930.9,1
+3900000,931.1,1
+4000000,936.5,1
+0,24.0,3
+100000,482.1,3
+200000,684.7,3
+300000,724.9,3
+400000,746.9,3
+500000,769.7,3
+600000,801.0,3
+700000,805.3,3
+800000,825.5,3
+900000,816.8,3
+1000000,850.1,3
+1100000,842.0,3
+1200000,858.7,3
+1300000,876.8,3
+1400000,865.1,3
+1500000,870.7,3
+1600000,884.9,3
+1700000,888.0,3
+1800000,884.0,3
+1900000,901.4,3
+2000000,889.6,3
+2100000,897.6,3
+2200000,904.4,3
+2300000,902.0,3
+2400000,909.9,3
+2500000,911.8,3
+2600000,909.9,3
+2700000,912.9,3
+2800000,912.6,3
+2900000,870.9,3
+3000000,920.7,3
+3100000,928.5,3
+3200000,923.7,3
+3300000,925.2,3
+3400000,932.7,3
+3500000,935.6,3
+3600000,936.1,3
+3700000,931.0,3
+3800000,935.9,3
+3900000,942.4,3
+4000000,941.0,3
diff --git a/results/walker-run.csv b/results/walker-run.csv
new file mode 100644
index 0000000..84d1a31
--- /dev/null
+++ b/results/walker-run.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,20.7,2
+100000,718.3,2
+200000,773.5,2
+300000,801.7,2
+400000,809.8,2
+500000,815.2,2
+600000,821.7,2
+700000,815.4,2
+800000,830.9,2
+900000,834.1,2
+1000000,833.7,2
+1100000,843.0,2
+1200000,839.5,2
+1300000,850.3,2
+1400000,848.3,2
+1500000,844.9,2
+1600000,854.0,2
+1700000,859.7,2
+1800000,858.1,2
+1900000,860.7,2
+2000000,858.3,2
+2100000,863.2,2
+2200000,865.1,2
+2300000,866.1,2
+2400000,871.7,2
+2500000,869.5,2
+2600000,831.8,2
+2700000,869.3,2
+2800000,873.9,2
+2900000,874.4,2
+3000000,871.5,2
+3100000,874.1,2
+3200000,876.1,2
+3300000,869.9,2
+3400000,870.8,2
+3500000,875.2,2
+3600000,875.9,2
+3700000,876.8,2
+3800000,878.4,2
+3900000,881.6,2
+4000000,879.3,2
+0,25.8,1
+100000,691.5,1
+200000,767.8,1
+300000,796.7,1
+400000,817.1,1
+500000,811.6,1
+600000,823.0,1
+700000,829.8,1
+800000,837.8,1
+900000,838.2,1
+1000000,837.0,1
+1100000,849.1,1
+1200000,844.1,1
+1300000,845.8,1
+1400000,849.5,1
+1500000,853.7,1
+1600000,852.4,1
+1700000,851.9,1
+1800000,853.7,1
+1900000,851.9,1
+2000000,856.3,1
+2100000,855.8,1
+2200000,857.2,1
+2300000,862.5,1
+2400000,860.8,1
+2500000,860.3,1
+2600000,866.9,1
+2700000,867.4,1
+2800000,868.8,1
+2900000,867.4,1
+3000000,867.2,1
+3100000,871.1,1
+3200000,870.3,1
+3300000,867.5,1
+3400000,867.4,1
+3500000,867.1,1
+3600000,870.1,1
+3700000,869.8,1
+3800000,872.4,1
+3900000,868.1,1
+4000000,868.1,1
+0,24.3,3
+100000,759.1,3
+200000,800.2,3
+300000,827.3,3
+400000,826.7,3
+500000,834.4,3
+600000,839.6,3
+700000,845.1,3
+800000,850.4,3
+900000,847.9,3
+1000000,856.3,3
+1100000,860.0,3
+1200000,862.0,3
+1300000,865.6,3
+1400000,870.4,3
+1500000,868.4,3
+1600000,867.5,3
+1700000,869.0,3
+1800000,877.0,3
+1900000,878.3,3
+2000000,872.1,3
+2100000,878.7,3
+2200000,872.5,3
+2300000,881.2,3
+2400000,882.1,3
+2500000,880.8,3
+2600000,882.8,3
+2700000,885.3,3
+2800000,887.2,3
+2900000,887.0,3
+3000000,883.0,3
+3100000,883.0,3
+3200000,884.2,3
+3300000,878.4,3
+3400000,891.5,3
+3500000,885.3,3
+3600000,884.3,3
+3700000,882.9,3
+3800000,889.4,3
+3900000,888.0,3
+4000000,884.2,3
diff --git a/results/walker-stand.csv b/results/walker-stand.csv
new file mode 100644
index 0000000..f8199d1
--- /dev/null
+++ b/results/walker-stand.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,126.2,2
+100000,979.5,2
+200000,965.5,2
+300000,990.1,2
+400000,971.6,2
+500000,988.1,2
+600000,988.7,2
+700000,988.7,2
+800000,986.1,2
+900000,990.4,2
+1000000,991.0,2
+1100000,989.7,2
+1200000,989.3,2
+1300000,994.2,2
+1400000,990.7,2
+1500000,988.5,2
+1600000,993.4,2
+1700000,996.3,2
+1800000,992.0,2
+1900000,991.5,2
+2000000,991.9,2
+2100000,993.2,2
+2200000,994.3,2
+2300000,993.5,2
+2400000,992.2,2
+2500000,987.2,2
+2600000,994.4,2
+2700000,990.3,2
+2800000,991.1,2
+2900000,994.6,2
+3000000,993.5,2
+3100000,994.5,2
+3200000,996.6,2
+3300000,991.2,2
+3400000,991.6,2
+3500000,991.4,2
+3600000,993.1,2
+3700000,994.9,2
+3800000,992.3,2
+3900000,995.3,2
+4000000,992.2,2
+0,152.5,1
+100000,985.2,1
+200000,980.1,1
+300000,987.1,1
+400000,985.9,1
+500000,898.1,1
+600000,985.7,1
+700000,988.9,1
+800000,992.3,1
+900000,986.9,1
+1000000,987.0,1
+1100000,994.9,1
+1200000,991.3,1
+1300000,991.2,1
+1400000,994.1,1
+1500000,992.6,1
+1600000,994.3,1
+1700000,993.3,1
+1800000,994.8,1
+1900000,991.8,1
+2000000,992.3,1
+2100000,991.6,1
+2200000,993.4,1
+2300000,990.9,1
+2400000,995.0,1
+2500000,993.3,1
+2600000,992.9,1
+2700000,995.2,1
+2800000,994.4,1
+2900000,992.1,1
+3000000,995.0,1
+3100000,992.9,1
+3200000,995.5,1
+3300000,992.5,1
+3400000,988.9,1
+3500000,991.5,1
+3600000,994.6,1
+3700000,994.0,1
+3800000,993.4,1
+3900000,992.0,1
+4000000,992.0,1
+0,129.2,3
+100000,978.9,3
+200000,987.4,3
+300000,991.6,3
+400000,982.6,3
+500000,985.3,3
+600000,988.2,3
+700000,991.2,3
+800000,992.1,3
+900000,985.2,3
+1000000,993.8,3
+1100000,991.8,3
+1200000,989.8,3
+1300000,991.1,3
+1400000,995.4,3
+1500000,992.2,3
+1600000,993.5,3
+1700000,993.0,3
+1800000,994.4,3
+1900000,994.2,3
+2000000,992.1,3
+2100000,996.3,3
+2200000,992.4,3
+2300000,998.0,3
+2400000,995.0,3
+2500000,994.9,3
+2600000,997.5,3
+2700000,997.3,3
+2800000,996.5,3
+2900000,996.1,3
+3000000,994.6,3
+3100000,991.2,3
+3200000,992.5,3
+3300000,990.2,3
+3400000,998.9,3
+3500000,993.8,3
+3600000,991.5,3
+3700000,989.7,3
+3800000,994.0,3
+3900000,996.4,3
+4000000,993.9,3
diff --git a/results/walker-walk-backwards.csv b/results/walker-walk-backwards.csv
new file mode 100644
index 0000000..d7c8f35
--- /dev/null
+++ b/results/walker-walk-backwards.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,26.7,3
+100000,952.3,3
+200000,966.2,3
+300000,971.4,3
+400000,975.4,3
+500000,981.2,3
+600000,978.1,3
+700000,978.8,3
+800000,975.0,3
+900000,973.2,3
+1000000,979.6,3
+1100000,978.3,3
+1200000,980.1,3
+1300000,977.8,3
+1400000,979.0,3
+1500000,980.7,3
+1600000,982.5,3
+1700000,975.9,3
+1800000,983.6,3
+1900000,985.4,3
+2000000,981.6,3
+2100000,982.6,3
+2200000,977.2,3
+2300000,986.8,3
+2400000,983.0,3
+2500000,984.2,3
+2600000,983.8,3
+2700000,984.1,3
+2800000,986.3,3
+2900000,981.4,3
+3000000,985.7,3
+3100000,984.9,3
+3200000,979.3,3
+3300000,975.9,3
+3400000,988.1,3
+3500000,984.8,3
+3600000,980.3,3
+3700000,978.1,3
+3800000,984.8,3
+3900000,986.1,3
+4000000,980.4,3
+0,21.4,2
+100000,958.7,2
+200000,967.6,2
+300000,970.1,2
+400000,965.4,2
+500000,984.3,2
+600000,961.4,2
+700000,971.0,2
+800000,981.6,2
+900000,969.8,2
+1000000,971.9,2
+1100000,981.3,2
+1200000,979.0,2
+1300000,988.1,2
+1400000,985.3,2
+1500000,980.1,2
+1600000,978.1,2
+1700000,976.5,2
+1800000,980.3,2
+1900000,985.8,2
+2000000,983.6,2
+2100000,986.1,2
+2200000,986.0,2
+2300000,977.6,2
+2400000,983.1,2
+2500000,982.9,2
+2600000,981.5,2
+2700000,984.5,2
+2800000,984.2,2
+2900000,982.2,2
+3000000,980.8,2
+3100000,981.5,2
+3200000,976.5,2
+3300000,983.6,2
+3400000,982.9,2
+3500000,991.5,2
+3600000,980.7,2
+3700000,983.9,2
+3800000,982.6,2
+3900000,982.0,2
+4000000,984.9,2
+0,27.0,1
+100000,949.0,1
+200000,956.5,1
+300000,981.1,1
+400000,948.6,1
+500000,969.2,1
+600000,980.7,1
+700000,981.7,1
+800000,979.3,1
+900000,978.2,1
+1000000,980.3,1
+1100000,981.4,1
+1200000,987.6,1
+1300000,976.1,1
+1400000,988.6,1
+1500000,979.2,1
+1600000,981.9,1
+1700000,978.7,1
+1800000,980.7,1
+1900000,984.6,1
+2000000,985.7,1
+2100000,978.5,1
+2200000,986.1,1
+2300000,984.1,1
+2400000,983.2,1
+2500000,988.2,1
+2600000,983.3,1
+2700000,980.9,1
+2800000,984.0,1
+2900000,986.0,1
+3000000,985.4,1
+3100000,981.3,1
+3200000,984.3,1
+3300000,979.3,1
+3400000,984.8,1
+3500000,986.8,1
+3600000,981.4,1
+3700000,984.9,1
+3800000,977.6,1
+3900000,980.4,1
+4000000,985.2,1
diff --git a/results/walker-walk.csv b/results/walker-walk.csv
new file mode 100644
index 0000000..dd89cb2
--- /dev/null
+++ b/results/walker-walk.csv
@@ -0,0 +1,124 @@
+step,reward,seed
+0,24.8,2
+100000,947.7,2
+200000,971.2,2
+300000,977.2,2
+400000,978.4,2
+500000,973.5,2
+600000,980.6,2
+700000,976.7,2
+800000,981.0,2
+900000,976.4,2
+1000000,980.5,2
+1100000,983.9,2
+1200000,976.0,2
+1300000,983.0,2
+1400000,979.0,2
+1500000,979.5,2
+1600000,978.2,2
+1700000,987.1,2
+1800000,983.5,2
+1900000,983.3,2
+2000000,983.3,2
+2100000,982.8,2
+2200000,986.4,2
+2300000,983.6,2
+2400000,985.8,2
+2500000,984.7,2
+2600000,984.7,2
+2700000,982.8,2
+2800000,984.1,2
+2900000,984.6,2
+3000000,983.4,2
+3100000,984.6,2
+3200000,988.1,2
+3300000,975.8,2
+3400000,985.2,2
+3500000,981.4,2
+3600000,986.4,2
+3700000,985.1,2
+3800000,983.6,2
+3900000,987.3,2
+4000000,977.4,2
+0,27.9,1
+100000,966.7,1
+200000,970.5,1
+300000,975.3,1
+400000,973.3,1
+500000,979.4,1
+600000,980.4,1
+700000,978.5,1
+800000,978.6,1
+900000,979.3,1
+1000000,975.2,1
+1100000,985.8,1
+1200000,978.8,1
+1300000,980.5,1
+1400000,984.7,1
+1500000,985.5,1
+1600000,985.6,1
+1700000,982.4,1
+1800000,984.7,1
+1900000,984.0,1
+2000000,985.6,1
+2100000,981.4,1
+2200000,984.0,1
+2300000,983.1,1
+2400000,984.2,1
+2500000,984.5,1
+2600000,984.5,1
+2700000,986.2,1
+2800000,984.4,1
+2900000,984.5,1
+3000000,986.6,1
+3100000,984.2,1
+3200000,984.1,1
+3300000,983.2,1
+3400000,981.9,1
+3500000,983.2,1
+3600000,984.5,1
+3700000,985.2,1
+3800000,982.9,1
+3900000,983.8,1
+4000000,982.9,1
+0,31.4,3
+100000,971.3,3
+200000,978.6,3
+300000,977.8,3
+400000,977.4,3
+500000,983.2,3
+600000,984.5,3
+700000,982.9,3
+800000,981.1,3
+900000,972.5,3
+1000000,983.5,3
+1100000,982.6,3
+1200000,981.5,3
+1300000,982.9,3
+1400000,986.8,3
+1500000,980.8,3
+1600000,982.7,3
+1700000,981.1,3
+1800000,985.2,3
+1900000,988.5,3
+2000000,983.1,3
+2100000,988.1,3
+2200000,982.7,3
+2300000,991.2,3
+2400000,987.4,3
+2500000,987.6,3
+2600000,987.4,3
+2700000,987.2,3
+2800000,988.6,3
+2900000,987.1,3
+3000000,983.0,3
+3100000,984.2,3
+3200000,985.9,3
+3300000,976.1,3
+3400000,991.7,3
+3500000,983.8,3
+3600000,977.9,3
+3700000,980.2,3
+3800000,986.0,3
+3900000,988.7,3
+4000000,983.1,3
diff --git a/tdmpc2/__init__.py b/tdmpc2/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/tdmpc2/common/__init__.py b/tdmpc2/common/__init__.py
new file mode 100644
index 0000000..7fa5309
--- /dev/null
+++ b/tdmpc2/common/__init__.py
@@ -0,0 +1,60 @@
+MODEL_SIZE = { # parameters (M)
+	1:   {'enc_dim': 256,
+		  'mlp_dim': 384,
+		  'latent_dim': 128,
+		  'num_enc_layers': 2,
+		  'num_q': 2},
+	5:   {'enc_dim': 256,
+		  'mlp_dim': 512,
+		  'latent_dim': 512,
+		  'num_enc_layers': 2},
+	19:  {'enc_dim': 1024,
+		  'mlp_dim': 1024,
+		  'latent_dim': 768,
+		  'num_enc_layers': 3},
+	48:  {'enc_dim': 1792,
+		  'mlp_dim': 1792,
+		  'latent_dim': 768,
+		  'num_enc_layers': 4},
+	317: {'enc_dim': 4096,
+		  'mlp_dim': 4096,
+		  'latent_dim': 1376,
+		  'num_enc_layers': 5,
+		  'num_q': 8},
+}
+
+TASK_SET = {
+	'mt30': [
+		# 19 original dmcontrol tasks
+		'walker-stand', 'walker-walk', 'walker-run', 'cheetah-run', 'reacher-easy',
+	    'reacher-hard', 'acrobot-swingup', 'pendulum-swingup', 'cartpole-balance', 'cartpole-balance-sparse',
+		'cartpole-swingup', 'cartpole-swingup-sparse', 'cup-catch', 'finger-spin', 'finger-turn-easy',
+		'finger-turn-hard', 'fish-swim', 'hopper-stand', 'hopper-hop',
+		# 11 custom dmcontrol tasks
+		'walker-walk-backwards', 'walker-run-backwards', 'cheetah-run-backwards', 'cheetah-run-front', 'cheetah-run-back',
+		'cheetah-jump', 'hopper-hop-backwards', 'reacher-three-easy', 'reacher-three-hard', 'cup-spin',
+		'pendulum-spin',
+	],
+	'mt80': [
+		# 19 original dmcontrol tasks
+		'walker-stand', 'walker-walk', 'walker-run', 'cheetah-run', 'reacher-easy',
+	    'reacher-hard', 'acrobot-swingup', 'pendulum-swingup', 'cartpole-balance', 'cartpole-balance-sparse',
+		'cartpole-swingup', 'cartpole-swingup-sparse', 'cup-catch', 'finger-spin', 'finger-turn-easy',
+		'finger-turn-hard', 'fish-swim', 'hopper-stand', 'hopper-hop',
+		# 11 custom dmcontrol tasks
+		'walker-walk-backwards', 'walker-run-backwards', 'cheetah-run-backwards', 'cheetah-run-front', 'cheetah-run-back',
+		'cheetah-jump', 'hopper-hop-backwards', 'reacher-three-easy', 'reacher-three-hard', 'cup-spin',
+		'pendulum-spin',
+		# meta-world mt50
+		'mw-assembly', 'mw-basketball', 'mw-button-press-topdown', 'mw-button-press-topdown-wall', 'mw-button-press',
+		'mw-button-press-wall', 'mw-coffee-button', 'mw-coffee-pull', 'mw-coffee-push', 'mw-dial-turn',
+		'mw-disassemble', 'mw-door-open', 'mw-door-close', 'mw-drawer-close', 'mw-drawer-open',
+		'mw-faucet-open', 'mw-faucet-close', 'mw-hammer', 'mw-handle-press-side', 'mw-handle-press',
+		'mw-handle-pull-side', 'mw-handle-pull', 'mw-lever-pull', 'mw-peg-insert-side', 'mw-peg-unplug-side',
+		'mw-pick-out-of-hole', 'mw-pick-place', 'mw-pick-place-wall', 'mw-plate-slide', 'mw-plate-slide-side',
+		'mw-plate-slide-back', 'mw-plate-slide-back-side', 'mw-push-back', 'mw-push', 'mw-push-wall',
+		'mw-reach', 'mw-reach-wall', 'mw-shelf-place', 'mw-soccer', 'mw-stick-push',
+		'mw-stick-pull', 'mw-sweep-into', 'mw-sweep', 'mw-window-open', 'mw-window-close',
+		'mw-bin-picking', 'mw-box-close', 'mw-door-lock', 'mw-door-unlock', 'mw-hand-insert',
+	],
+}
diff --git a/tdmpc2/common/buffer.py b/tdmpc2/common/buffer.py
new file mode 100644
index 0000000..dbbfea6
--- /dev/null
+++ b/tdmpc2/common/buffer.py
@@ -0,0 +1,115 @@
+from pathlib import Path
+import torch
+from tensordict.tensordict import TensorDict
+from torchrl.data.replay_buffers import ReplayBuffer, LazyTensorStorage
+from torchrl.data.replay_buffers.samplers import RandomSampler
+from torchrl.envs import RandomCropTensorDict, Transform, Compose
+
+from common.logger import make_dir
+
+
+class DataPrepTransform(Transform):
+	"""
+	Preprocesses data for TD-MPC2 training.
+	Replay data is expected to be a TensorDict with the following keys:
+		obs: observations
+		action: actions
+		reward: rewards
+		task: task IDs (optional)
+	A TensorDict with T time steps has T+1 observations and T actions and rewards.
+	The first actions and rewards in each TensorDict are dummies and should be ignored.
+	"""
+
+	def __init__(self):
+		super().__init__([])
+	
+	def forward(self, td):
+		td = td.permute(1,0)
+		return td['obs'], td['action'][1:], td['reward'][1:].unsqueeze(-1), (td['task'][0] if 'task' in td.keys() else None)
+
+
+class Buffer():
+	"""
+	Create a replay buffer for TD-MPC2 training.
+	Uses CUDA memory if available, and CPU memory otherwise.
+	"""
+
+	def __init__(self, cfg):
+		self.cfg = cfg
+		self._device = torch.device('cuda')
+		self._capacity = min(cfg.buffer_size, cfg.steps)//cfg.episode_length
+		self._num_eps = 0
+
+	@property
+	def capacity(self):
+		"""Return the capacity of the buffer."""
+		return self._capacity
+	
+	@property
+	def num_eps(self):
+		"""Return the number of episodes in the buffer."""
+		return self._num_eps
+
+	def _reserve_buffer(self, storage):
+		"""
+		Reserve a buffer with the given storage.
+		Uses the RandomSampler to sample trajectories,
+		and the RandomCropTensorDict transform to crop trajectories to the desired length.
+		DataPrepTransform is used to preprocess data to the expected format in TD-MPC2 updates.
+		"""
+		return ReplayBuffer(
+			storage=storage,
+			sampler=RandomSampler(),
+			pin_memory=True,
+			prefetch=1,
+			transform=Compose(
+				RandomCropTensorDict(self.cfg.horizon+1, -1),
+				DataPrepTransform(),
+			),
+			batch_size=self.cfg.batch_size,
+		)
+
+	def _init(self, tds):
+		"""Initialize the replay buffer. Use the first episode to estimate storage requirements."""
+		mem_free, _ = torch.cuda.mem_get_info()
+		bytes_per_ep = sum([
+				(v.numel()*v.element_size() if not isinstance(v, TensorDict) \
+				else sum([x.numel()*x.element_size() for x in v.values()])) \
+			for k,v in tds.items()
+		])		
+		print(f'Bytes per episode: {bytes_per_ep:,}')
+		total_bytes = bytes_per_ep*self._capacity
+		print(f'Storage required: {total_bytes/1e9:.2f} GB')
+		# Heuristic: decide whether to use CUDA or CPU memory
+		if 2.5*total_bytes > mem_free: # Insufficient CUDA memory
+			print('Using CPU memory for storage.')
+			return self._reserve_buffer(
+				LazyTensorStorage(self._capacity, device=torch.device('cpu'))
+			)
+		else: # Sufficient CUDA memory
+			print('Using CUDA memory for storage.')
+			return self._reserve_buffer(
+				LazyTensorStorage(self._capacity, device=torch.device('cuda'))
+			)
+
+	def add(self, tds):
+		"""Add an episode to the buffer. All episodes are expected to have the same length."""
+		if self._num_eps == 0:
+			self._buffer = self._init(tds)
+		self._buffer.add(tds)
+		self._num_eps += 1
+		return self._num_eps
+
+	def sample(self):
+		"""Sample a batch of sub-trajectories from the buffer."""
+		obs, action, reward, task = self._buffer.sample(batch_size=self.cfg.batch_size)
+		return obs.to(self._device, non_blocking=True), \
+			   action.to(self._device, non_blocking=True), \
+			   reward.to(self._device, non_blocking=True), \
+			   task.to(self._device, non_blocking=True) if task is not None else None
+
+	def save(self):
+		"""Save the buffer to disk. Useful for storing offline datasets."""
+		td = self._buffer._storage._storage.cpu()
+		fp = make_dir(Path(self.cfg.buffer_dir) / self.cfg.task / str(self.cfg.seed)) / f'{self._num_eps}.pt'
+		torch.save(td, fp)
diff --git a/tdmpc2/common/init.py b/tdmpc2/common/init.py
new file mode 100644
index 0000000..45a3f5e
--- /dev/null
+++ b/tdmpc2/common/init.py
@@ -0,0 +1,22 @@
+import torch.nn as nn
+
+
+def weight_init(m):
+	"""Custom weight initialization for TD-MPC2."""
+	if isinstance(m, nn.Linear):
+		nn.init.trunc_normal_(m.weight, std=0.02)
+		if m.bias is not None:
+			nn.init.constant_(m.bias, 0)
+	elif isinstance(m, nn.Embedding):
+		nn.init.uniform_(m.weight, -0.02, 0.02)
+	elif isinstance(m, nn.ParameterList):
+		for i,p in enumerate(m):
+			if p.dim() == 3: # Linear
+				nn.init.trunc_normal_(p, std=0.02) # Weight
+				nn.init.constant_(m[i+1], 0) # Bias
+
+
+def zero_(params):
+	"""Initialize parameters to zero."""
+	for p in params:
+		p.data.fill_(0)
diff --git a/tdmpc2/common/layers.py b/tdmpc2/common/layers.py
new file mode 100644
index 0000000..baebf73
--- /dev/null
+++ b/tdmpc2/common/layers.py
@@ -0,0 +1,97 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from functorch import combine_state_for_ensemble
+
+
+class Ensemble(nn.Module):
+	"""
+	Vectorized ensemble of modules.
+	"""
+
+	def __init__(self, modules, **kwargs):
+		super().__init__()
+		modules = nn.ModuleList(modules)
+		fn, params, _ = combine_state_for_ensemble(modules)
+		self.vmap = torch.vmap(fn, in_dims=(0, 0, None), randomness='different', **kwargs)
+		self.params = nn.ParameterList([nn.Parameter(p) for p in params])
+		self._repr = str(modules)
+
+	def modules(self):
+		return self.vmap.__wrapped__.stateless_model
+
+	def forward(self, *args, **kwargs):
+		return self.vmap([p for p in self.params], (), *args, **kwargs)
+
+	def __repr__(self):
+		return 'Vectorized ' + self._repr
+
+
+class SimNorm(nn.Module):
+	"""
+	Simplicial normalization.
+	Adapted from https://arxiv.org/abs/2204.00616.
+	"""
+	
+	def __init__(self, cfg):
+		super().__init__()
+		self.dim = cfg.simnorm_dim
+	
+	def forward(self, x):
+		shp = x.shape
+		x = x.view(*shp[:-1], -1, self.dim)
+		x = F.softmax(x, dim=-1)
+		return x.view(*shp)
+		
+	def __repr__(self):
+		return f"SimNorm(dim={self.dim})"
+
+
+class NormedLinear(nn.Linear):
+	"""
+	Linear layer with LayerNorm, activation, and optionally dropout.
+	"""
+
+	def __init__(self, *args, dropout=0., act=nn.Mish(inplace=True), **kwargs):
+		super().__init__(*args, **kwargs)
+		self.ln = nn.LayerNorm(self.out_features)
+		self.act = act
+		self.dropout = nn.Dropout(dropout, inplace=True) if dropout else None
+
+	def forward(self, x):
+		x = super().forward(x)
+		if self.dropout:
+			x = self.dropout(x)
+		return self.act(self.ln(x))
+	
+	def __repr__(self):
+		repr_dropout = f", dropout={self.dropout.p}" if self.dropout else ""
+		return f"NormedLinear(in_features={self.in_features}, "\
+			f"out_features={self.out_features}, "\
+			f"bias={self.bias is not None}{repr_dropout}, "\
+			f"act={self.act.__class__.__name__})"
+
+
+def enc(cfg, out={}):
+	"""
+	Returns a dictionary of encoders for each observation in the dict.
+	"""
+	for k in cfg.obs_shape.keys():
+		assert k == 'state'
+		out[k] = mlp(cfg.obs_shape[k][0] + cfg.task_dim, max(cfg.num_enc_layers-1, 1)*[cfg.enc_dim], cfg.latent_dim, act=SimNorm(cfg))
+	return nn.ModuleDict(out)
+
+
+def mlp(in_dim, mlp_dims, out_dim, act=None, dropout=0.):
+	"""
+	Basic building block of TD-MPC2.
+	MLP with LayerNorm, Mish activations, and optionally dropout.
+	"""
+	if isinstance(mlp_dims, int):
+		mlp_dims = [mlp_dims]
+	dims = [in_dim] + mlp_dims + [out_dim]
+	mlp = nn.ModuleList()
+	for i in range(len(dims) - 2):
+		mlp.append(NormedLinear(dims[i], dims[i+1], dropout=dropout*(i==0)))
+	mlp.append(NormedLinear(dims[-2], dims[-1], act=act) if act else nn.Linear(dims[-2], dims[-1]))
+	return nn.Sequential(*mlp)
diff --git a/tdmpc2/common/logger.py b/tdmpc2/common/logger.py
new file mode 100755
index 0000000..39c93fe
--- /dev/null
+++ b/tdmpc2/common/logger.py
@@ -0,0 +1,238 @@
+import os
+import datetime
+import re
+import numpy as np
+import pandas as pd
+from termcolor import colored
+from omegaconf import OmegaConf
+
+from common import TASK_SET
+
+
+CONSOLE_FORMAT = [
+	("iteration", "I", "int"),
+	("episode", "E", "int"),
+	("step", "I", "int"),
+	("episode_reward", "R", "float"),
+	("episode_success", "S", "float"),
+	("total_time", "T", "time"),
+]
+
+CAT_TO_COLOR = {
+	"pretrain": "yellow",
+	"train": "blue",
+	"eval": "green",
+}
+
+
+def make_dir(dir_path):
+	"""Create directory if it does not already exist."""
+	try:
+		os.makedirs(dir_path)
+	except OSError:
+		pass
+	return dir_path
+
+
+def print_run(cfg):
+	"""
+	Pretty-printing of current run information.
+	Logger calls this method at initialization.
+	"""
+	prefix, color, attrs = "  ", "green", ["bold"]
+
+	def _limstr(s, maxlen=36):
+		return str(s[:maxlen]) + "..." if len(str(s)) > maxlen else s
+
+	def _pprint(k, v):
+		print(
+			prefix + colored(f'{k.capitalize()+":":<15}', color, attrs=attrs), _limstr(v)
+		)
+
+	obs_dim = cfg.obs_shape['state'][0] if 'state' in cfg.obs_shape else cfg.obs_shape[0]
+	kvs = [
+		("task", cfg.task_title),
+		("steps", f"{int(cfg.steps):,}"),
+		("observations", obs_dim),
+		("actions", cfg.action_dim),
+		("experiment", cfg.exp_name),
+	]
+	w = np.max([len(_limstr(str(kv[1]))) for kv in kvs]) + 25
+	div = "-" * w
+	print(div)
+	for k, v in kvs:
+		_pprint(k, v)
+	print(div)
+
+
+def cfg_to_group(cfg, return_list=False):
+	"""
+	Return a wandb-safe group name for logging.
+	Optionally returns group name as list.
+	"""
+	lst = [cfg.task, re.sub("[^0-9a-zA-Z]+", "-", cfg.exp_name)]
+	return lst if return_list else "-".join(lst)
+
+
+class VideoRecorder:
+	"""Utility class for logging evaluation videos."""
+
+	def __init__(self, cfg, wandb, fps=15):
+		self.cfg = cfg
+		self._save_dir = make_dir(cfg.work_dir / 'eval_video')
+		self._wandb = wandb
+		self.fps = fps
+		self.frames = []
+		self.enabled = False
+
+	def init(self, env, enabled=True):
+		self.frames = []
+		self.enabled = self._save_dir and self._wandb and enabled
+		self.record(env)
+
+	def record(self, env):
+		if self.enabled:
+			self.frames.append(env.render())
+
+	def save(self, step, key='videos/eval_video'):
+		if self.enabled and len(self.frames) > 0:
+			frames = np.stack(self.frames)
+			return self._wandb.log(
+				{key: self._wandb.Video(frames.transpose(0, 3, 1, 2), fps=self.fps, format='mp4')}, step=step
+			)
+
+
+class Logger:
+	"""Primary logging object. Logs either locally or using wandb."""
+
+	def __init__(self, cfg):
+		self._log_dir = make_dir(cfg.work_dir)
+		self._model_dir = make_dir(self._log_dir / "models")
+		self._save_csv = cfg.save_csv
+		self._save_agent = cfg.save_agent
+		self._group = cfg_to_group(cfg)
+		self._seed = cfg.seed
+		self._eval = []
+		print_run(cfg)
+		self.project = cfg.get("wandb_project", "none")
+		self.entity = cfg.get("wandb_entity", "none")
+		if cfg.disable_wandb or self.project == "none" or self.entity == "none":
+			print(colored("Wandb disabled.", "blue", attrs=["bold"]))
+			cfg.save_agent = False
+			cfg.save_video = False
+			self._wandb = None
+			self._video = None
+			return
+		os.environ["WANDB_SILENT"] = "true" if cfg.wandb_silent else "false"
+		import wandb
+
+		wandb.init(
+			project=self.project,
+			entity=self.entity,
+			name=str(cfg.seed),
+			group=self._group,
+			tags=cfg_to_group(cfg, return_list=True) + [f"seed:{cfg.seed}"],
+			dir=self._log_dir,
+			config=OmegaConf.to_container(cfg, resolve=True),
+		)
+		print(colored("Logs will be synced with wandb.", "blue", attrs=["bold"]))
+		self._wandb = wandb
+		self._video = (
+			VideoRecorder(cfg, self._wandb)
+			if self._wandb and cfg.save_video
+			else None
+		)
+
+	@property
+	def video(self):
+		return self._video
+
+	@property
+	def model_dir(self):
+		return self._model_dir
+
+	def save_agent(self, agent=None, identifier='final'):
+		if self._save_agent and agent:
+			fp = self._model_dir / f'{str(identifier)}.pt'
+			agent.save(fp)
+			if self._wandb:
+				artifact = self._wandb.Artifact(
+					self._group + '-' + str(self._seed) + '-' + str(identifier),
+					type='model',
+				)
+				artifact.add_file(fp)
+				self._wandb.log_artifact(artifact)
+
+	def finish(self, agent=None):
+		try:
+			self.save_agent(agent)
+		except Exception as e:
+			print(colored(f"Failed to save model: {e}", "red"))
+		if self._wandb:
+			self._wandb.finish()
+
+	def _format(self, key, value, ty):
+		if ty == "int":
+			return f'{colored(key+":", "blue")} {int(value):,}'
+		elif ty == "float":
+			return f'{colored(key+":", "blue")} {value:.01f}'
+		elif ty == "time":
+			value = str(datetime.timedelta(seconds=int(value)))
+			return f'{colored(key+":", "blue")} {value}'
+		else:
+			raise f"invalid log format type: {ty}"
+
+	def _print(self, d, category):
+		category = colored(category, CAT_TO_COLOR[category])
+		pieces = [f" {category:<14}"]
+		for k, disp_k, ty in CONSOLE_FORMAT:
+			if k in d:
+				pieces.append(f"{self._format(disp_k, d[k], ty):<22}")
+		print("   ".join(pieces))
+
+	def pprint_multitask(self, d, cfg):
+		"""Pretty-print evaluation metrics for multi-task training."""
+		print(colored(f'Evaluated agent on {len(cfg.tasks)} tasks:', 'yellow', attrs=['bold']))
+		dmcontrol_reward = []
+		metaworld_reward = []
+		metaworld_success = []
+		for k, v in d.items():
+			if '+' not in k:
+				continue
+			task = k.split('+')[1]
+			if task in TASK_SET['mt30'] and k.startswith('episode_reward'): # DMControl
+				dmcontrol_reward.append(v)
+				print(colored(f'  {task:<22}\tR: {v:.01f}', 'yellow'))
+			elif task in TASK_SET['mt80'] and task not in TASK_SET['mt30']: # Meta-World
+				if k.startswith('episode_reward'):
+					metaworld_reward.append(v)
+				elif k.startswith('episode_success'):
+					metaworld_success.append(v)
+					print(colored(f'  {task:<22}\tS: {v:.02f}', 'yellow'))
+		dmcontrol_reward = np.nanmean(dmcontrol_reward)
+		d['episode_reward+avg_dmcontrol'] = dmcontrol_reward
+		print(colored(f'  {"dmcontrol":<22}\tR: {dmcontrol_reward:.01f}', 'yellow', attrs=['bold']))
+		if cfg.task == 'mt80':
+			metaworld_reward = np.nanmean(metaworld_reward)
+			metaworld_success = np.nanmean(metaworld_success)
+			d['episode_reward+avg_metaworld'] = metaworld_reward
+			d['episode_success+avg_metaworld'] = metaworld_success
+			print(colored(f'  {"metaworld":<22}\tR: {metaworld_reward:.01f}', 'yellow', attrs=['bold']))
+			print(colored(f'  {"metaworld":<22}\tS: {metaworld_success:.02f}', 'yellow', attrs=['bold']))
+
+	def log(self, d, category="train"):
+		assert category in CAT_TO_COLOR.keys(), f"invalid category: {category}"
+		if self._wandb:
+			if category in {"train", "eval"}:
+				xkey = "step"
+			elif category == "pretrain":
+				xkey = "iteration"
+			for k, v in d.items():
+				self._wandb.log({category + "/" + k: v}, step=d[xkey])
+		if category == "eval" and self._save_csv:
+			keys = ["step", "episode_reward"]
+			self._eval.append(np.array([d[keys[0]], d[keys[1]]]))
+			pd.DataFrame(np.array(self._eval)).to_csv(
+				self._log_dir / "eval.csv", header=keys, index=None
+			)
+		self._print(d, category)
diff --git a/tdmpc2/common/math.py b/tdmpc2/common/math.py
new file mode 100644
index 0000000..62b8230
--- /dev/null
+++ b/tdmpc2/common/math.py
@@ -0,0 +1,95 @@
+import torch
+import torch.nn.functional as F
+
+
+def soft_ce(pred, target, cfg):
+	"""Computes the cross entropy loss between predictions and soft targets."""
+	pred = F.log_softmax(pred, dim=-1)
+	target = two_hot(target, cfg)
+	return -(target * pred).sum(-1, keepdim=True)
+
+
+@torch.jit.script
+def log_std(x, low, dif):
+	return low + 0.5 * dif * (torch.tanh(x) + 1)
+
+
+@torch.jit.script
+def _gaussian_residual(eps, log_std):
+	return -0.5 * eps.pow(2) - log_std
+
+
+@torch.jit.script
+def _gaussian_logprob(residual):
+	return residual - 0.5 * torch.log(2 * torch.pi)
+
+
+def gaussian_logprob(eps, log_std, size=None):
+	"""Compute Gaussian log probability."""
+	residual = _gaussian_residual(eps, log_std).sum(-1, keepdim=True)
+	if size is None:
+		size = eps.size(-1)
+	return _gaussian_logprob(residual) * size
+
+
+@torch.jit.script
+def _squash(pi):
+	return torch.log(F.relu(1 - pi.pow(2)) + 1e-6)
+
+
+def squash(mu, pi, log_pi):
+	"""Apply squashing function."""
+	mu = torch.tanh(mu)
+	pi = torch.tanh(pi)
+	log_pi -= _squash(pi).sum(-1, keepdim=True)
+	return mu, pi, log_pi
+
+
+@torch.jit.script
+def symlog(x):
+	"""
+	Symmetric logarithmic function.
+	Adapted from https://github.com/danijar/dreamerv3.
+	"""
+	return torch.sign(x) * torch.log(1 + torch.abs(x))
+
+
+@torch.jit.script
+def symexp(x):
+	"""
+	Symmetric exponential function.
+	Adapted from https://github.com/danijar/dreamerv3.
+	"""
+	return torch.sign(x) * (torch.exp(torch.abs(x)) - 1)
+
+
+def two_hot(x, cfg):
+	"""Converts a batch of scalars to soft two-hot encoded targets for discrete regression."""
+	if cfg.num_bins == 0:
+		return x
+	elif cfg.num_bins == 1:
+		return symlog(x)
+	x = torch.clamp(symlog(x), cfg.vmin, cfg.vmax).squeeze(1)
+	bin_idx = torch.floor((x - cfg.vmin) / cfg.bin_size).long()
+	bin_offset = ((x - cfg.vmin) / cfg.bin_size - bin_idx.float()).unsqueeze(-1)
+	soft_two_hot = torch.zeros(x.size(0), cfg.num_bins, device=x.device)
+	soft_two_hot.scatter_(1, bin_idx.unsqueeze(1), 1 - bin_offset)
+	soft_two_hot.scatter_(1, (bin_idx.unsqueeze(1) + 1) % cfg.num_bins, bin_offset)
+	return soft_two_hot
+
+
+DREG_BINS = None
+
+
+def two_hot_inv(x, cfg):
+	"""Converts a batch of soft two-hot encoded vectors to scalars."""
+	global DREG_BINS
+	if cfg.num_bins == 0:
+		return x
+	elif cfg.num_bins == 1:
+		return symexp(x)
+	if DREG_BINS is None:
+		DREG_BINS = torch.linspace(cfg.vmin, cfg.vmax, cfg.num_bins, device=x.device)
+	x = F.softmax(x, dim=-1)
+	x = torch.sum(x * DREG_BINS, dim=-1, keepdim=True)
+	return symexp(x)
diff --git a/tdmpc2/common/parser.py b/tdmpc2/common/parser.py
new file mode 100755
index 0000000..f36731e
--- /dev/null
+++ b/tdmpc2/common/parser.py
@@ -0,0 +1,60 @@
+import re
+from pathlib import Path
+
+import hydra
+from omegaconf import OmegaConf
+
+from common import MODEL_SIZE, TASK_SET
+
+
+def parse_cfg(cfg: OmegaConf) -> OmegaConf:
+	"""
+	Parses a Hydra config. Mostly for convenience.
+	"""
+
+	# Logic
+	for k in cfg.keys():
+		try:
+			v = cfg[k]
+			if v == None:
+				v = True
+		except:
+			pass
+
+	# Algebraic expressions
+	for k in cfg.keys():
+		try:
+			v = cfg[k]
+			if isinstance(v, str):
+				match = re.match(r"(\d+)([+\-*/])(\d+)", v)
+				if match:
+					cfg[k] = eval(match.group(1) + match.group(2) + match.group(3))
+					if isinstance(cfg[k], float) and cfg[k].is_integer():
+						cfg[k] = int(cfg[k])
+		except:
+			pass
+
+	# Convenience
+	cfg.work_dir = Path(hydra.utils.get_original_cwd()) / 'logs' / cfg.task / str(cfg.seed) / cfg.exp_name
+	cfg.task_title = cfg.task.replace("-", " ").title()
+	cfg.bin_size = (cfg.vmax - cfg.vmin) / (cfg.num_bins-1) # Bin size for discrete regression
+
+	# Model size
+	assert cfg.model_size in MODEL_SIZE.keys(), \
+		f'Invalid model size {cfg.model_size}. Must be one of {list(MODEL_SIZE.keys())}'
+	for k, v in MODEL_SIZE[cfg.model_size].items():
+		cfg[k] = v
+	if cfg.task == 'mt30' and cfg.model_size == 19:
+		cfg.latent_dim = 512 # This checkpoint is slightly smaller
+
+	# Multi-task
+	cfg.multitask = cfg.task in TASK_SET.keys()
+	if cfg.multitask:
+		cfg.task_title = cfg.task.upper()
+		# Account for slight inconsistency in task_dim for the mt30 experiments
+		cfg.task_dim = 96 if cfg.task == 'mt80' or cfg.model_size in {1, 317} else 64
+	else:
+		cfg.task_dim = 0
+	cfg.tasks = TASK_SET.get(cfg.task, [cfg.task])
+
+	return cfg
diff --git a/tdmpc2/common/scale.py b/tdmpc2/common/scale.py
new file mode 100644
index 0000000..63f0bb2
--- /dev/null
+++ b/tdmpc2/common/scale.py
@@ -0,0 +1,48 @@
+import torch
+
+
+class RunningScale:
+	"""Running trimmed scale estimator."""
+
+	def __init__(self, cfg):
+		self.cfg = cfg
+		self._value = torch.ones(1, dtype=torch.float32, device=torch.device('cuda'))
+		self._percentiles = torch.tensor([5, 95], dtype=torch.float32, device=torch.device('cuda'))
+
+	def state_dict(self):
+		return dict(value=self._value, percentiles=self._percentiles)
+
+	def load_state_dict(self, state_dict):
+		self._value.data.copy_(state_dict['value'])
+		self._percentiles.data.copy_(state_dict['percentiles'])
+
+	@property
+	def value(self):
+		return self._value.cpu().item()
+
+	def _percentile(self, x):
+		x_dtype, x_shape = x.dtype, x.shape
+		x = x.view(x.shape[0], -1)
+		in_sorted, _ = torch.sort(x, dim=0)
+		positions = self._percentiles * (x.shape[0]-1) / 100
+		floored = torch.floor(positions)
+		ceiled = floored + 1
+		ceiled[ceiled > x.shape[0] - 1] = x.shape[0] - 1
+		weight_ceiled = positions-floored
+		weight_floored = 1.0 - weight_ceiled
+		d0 = in_sorted[floored.long(), :] * weight_floored[:, None]
+		d1 = in_sorted[ceiled.long(), :] * weight_ceiled[:, None]
+		return (d0+d1).view(-1, *x_shape[1:]).type(x_dtype)
+
+	def update(self, x):
+		percentiles = self._percentile(x.detach())
+		value = torch.clamp(percentiles[1] - percentiles[0], min=1.)
+		self._value.data.lerp_(value, self.cfg.tau)
+
+	def __call__(self, x, update=False):
+		if update:
+			self.update(x)
+		return x * (1/self.value)
+
+	def __repr__(self):
+		return f'RunningScale(S: {self.value})'
diff --git a/tdmpc2/common/seed.py b/tdmpc2/common/seed.py
new file mode 100644
index 0000000..5c8972e
--- /dev/null
+++ b/tdmpc2/common/seed.py
@@ -0,0 +1,12 @@
+import random
+
+import numpy as np
+import torch
+
+
+def set_seed(seed):
+	"""Set seed for reproducibility."""
+	random.seed(seed)
+	np.random.seed(seed)
+	torch.manual_seed(seed)
+	torch.cuda.manual_seed_all(seed)
diff --git a/tdmpc2/common/world_model.py b/tdmpc2/common/world_model.py
new file mode 100644
index 0000000..30fb1d4
--- /dev/null
+++ b/tdmpc2/common/world_model.py
@@ -0,0 +1,174 @@
+from copy import deepcopy
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from common import layers, math, init
+
+
+class WorldModel(nn.Module):
+	"""
+	TD-MPC2 implicit world model architecture.
+	Can be used for both single-task and multi-task experiments.
+	"""
+
+	def __init__(self, cfg):
+		super().__init__()
+		self.cfg = cfg
+		if cfg.multitask:
+			self._task_emb = nn.Embedding(len(cfg.tasks), cfg.task_dim, max_norm=1)
+			self._action_masks = torch.zeros(len(cfg.tasks), cfg.action_dim)
+			for i in range(len(cfg.tasks)):
+				self._action_masks[i, :cfg.action_dims[i]] = 1.
+		self._encoder = layers.enc(cfg)
+		self._dynamics = layers.mlp(cfg.latent_dim + cfg.action_dim + cfg.task_dim, 2*[cfg.mlp_dim], cfg.latent_dim, act=layers.SimNorm(cfg))
+		self._reward = layers.mlp(cfg.latent_dim + cfg.action_dim + cfg.task_dim, 2*[cfg.mlp_dim], max(cfg.num_bins, 1))
+		self._pi = layers.mlp(cfg.latent_dim + cfg.task_dim, 2*[cfg.mlp_dim], 2*cfg.action_dim)
+		self._Qs = layers.Ensemble([layers.mlp(cfg.latent_dim + cfg.action_dim + cfg.task_dim, 2*[cfg.mlp_dim], max(cfg.num_bins, 1), dropout=cfg.dropout) for _ in range(cfg.num_q)])
+		self.apply(init.weight_init)
+		init.zero_([self._reward[-1].weight, self._Qs.params[-2]])
+		self._target_Qs = deepcopy(self._Qs).requires_grad_(False)
+		self.log_std_min = torch.tensor(cfg.log_std_min)
+		self.log_std_dif = torch.tensor(cfg.log_std_max) - self.log_std_min
+
+	@property
+	def total_params(self):
+		return sum(p.numel() for p in self.parameters() if p.requires_grad)
+		
+	def to(self, *args, **kwargs):
+		"""
+		Overriding `to` method to also move additional tensors to device.
+		"""
+		super().to(*args, **kwargs)
+		if self.cfg.multitask:
+			self._action_masks = self._action_masks.to(*args, **kwargs)
+		self.log_std_min = self.log_std_min.to(*args, **kwargs)
+		self.log_std_dif = self.log_std_dif.to(*args, **kwargs)
+		return self
+	
+	def train(self, mode=True):
+		"""
+		Overriding `train` method to keep target Q-networks in eval mode.
+		"""
+		super().train(mode)
+		self._target_Qs.train(False)
+		return self
+
+	def track_q_grad(self, mode=True):
+		"""
+		Enables/disables gradient tracking of Q-networks.
+		Avoids unnecessary computation during policy optimization.
+		This method also enables/disables gradients for task embeddings,
+		and sets the dropout probability to 0 if `mode` is False.
+		"""
+		for p in self._Qs.parameters():
+			p.requires_grad_(mode)
+		if self.cfg.multitask:
+			for p in self._task_emb.parameters():
+				p.requires_grad_(mode)
+		for m in self._Qs.modules():
+			if isinstance(m, nn.Dropout):
+				m.p = self.cfg.dropout if mode else 0
+
+	def soft_update_target_Q(self):
+		"""
+		Soft-update target Q-networks using Polyak averaging.
+		"""
+		with torch.no_grad():
+			for p, p_target in zip(self._Qs.parameters(), self._target_Qs.parameters()):
+				p_target.data.lerp_(p.data, self.cfg.tau)
+	
+	def task_emb(self, x, task):
+		"""
+		Continuous task embedding for multi-task experiments.
+		Retrieves the task embedding for a given task ID `task`
+		and concatenates it to the input `x`.
+		"""
+		if isinstance(task, int):
+			task = torch.tensor([task], device=x.device)
+		emb = self._task_emb(task.long())
+		if x.ndim == 3:
+			emb = emb.unsqueeze(0).repeat(x.shape[0], 1, 1)
+		elif emb.shape[0] == 1:
+			emb = emb.repeat(x.shape[0], 1)
+		return torch.cat([x, emb], dim=-1)
+
+	def encode(self, obs, task):
+		"""
+		Encodes an observation into its latent representation.
+		This implementation assumes a single state-based observation.
+		"""
+		if self.cfg.multitask:
+			obs = self.task_emb(obs, task)
+		return self._encoder['state'](obs)
+
+	def next(self, z, a, task):
+		"""
+		Predicts the next latent state given the current latent state and action.
+		"""
+		if self.cfg.multitask:
+			z = self.task_emb(z, task)
+		z = torch.cat([z, a], dim=-1)
+		return self._dynamics(z)
+	
+	def reward(self, z, a, task):
+		"""
+		Predicts instantaneous (single-step) reward.
+		"""
+		if self.cfg.multitask:
+			z = self.task_emb(z, task)
+		z = torch.cat([z, a], dim=-1)
+		return self._reward(z)
+
+	def pi(self, z, task):
+		"""
+		Samples an action from the policy prior.
+		The policy prior is a Gaussian distribution with
+		mean and (log) std predicted by a neural network.
+		"""
+		if self.cfg.multitask:
+			z = self.task_emb(z, task)
+
+		# Gaussian policy prior
+		mu, log_std = self._pi(z).chunk(2, dim=-1)
+		log_std = math.log_std(log_std, self.log_std_min, self.log_std_dif)
+		eps = torch.randn_like(mu)
+
+		if self.cfg.multitask: # Mask out unused action dimensions
+			mu = mu * self._action_masks[task]
+			log_std = log_std * self._action_masks[task]
+			eps = eps * self._action_masks[task]
+			action_dims = self._action_masks.sum(-1)[task].unsqueeze(-1)
+		else: # No masking
+			action_dims = None
+
+		log_pi = math.gaussian_logprob(eps, log_std, size=action_dims)
+		pi = mu + eps * log_std.exp()
+		mu, pi, log_pi = math.squash(mu, pi, log_pi)
+
+		return mu, pi, log_pi, log_std
+
+	def Q(self, z, a, task, return_type='min', target=False):
+		"""
+		Predict state-action value.
+		`return_type` can be one of [`min`, `avg`, `all`]:
+			- `min`: return the minimum of two randomly subsampled Q-values.
+			- `avg`: return the average of two randomly subsampled Q-values.
+			- `all`: return all Q-values.
+		`target` specifies whether to use the target Q-networks or not.
+		"""
+		assert return_type in {'min', 'avg', 'all'}
+
+		if self.cfg.multitask:
+			z = self.task_emb(z, task)
+			
+		z = torch.cat([z, a], dim=-1)
+		out = (self._target_Qs if target else self._Qs)(z)
+
+		if return_type == 'all':
+			return out
+
+		Q1, Q2 = out[np.random.choice(self.cfg.num_q, 2, replace=False)]
+		Q1, Q2 = math.two_hot_inv(Q1, self.cfg), math.two_hot_inv(Q2, self.cfg)
+		return torch.min(Q1, Q2) if return_type == 'min' else (Q1 + Q2) / 2
diff --git a/tdmpc2/config.yaml b/tdmpc2/config.yaml
new file mode 100755
index 0000000..3b945ee
--- /dev/null
+++ b/tdmpc2/config.yaml
@@ -0,0 +1,86 @@
+defaults:
+    - override hydra/launcher: submitit_local
+
+# environment
+task: dog-run
+
+# evaluation
+checkpoint: ???
+eval_episodes: 10
+eval_freq: 50000
+
+# training
+steps: 10_000_000
+batch_size: 256
+reward_coef: 0.1
+value_coef: 0.1
+consistency_coef: 20
+rho: 0.5
+lr: 3e-4
+enc_lr_scale: 0.3
+grad_clip_norm: 20
+tau: 0.01
+discount_denom: 5
+discount_min: 0.95
+discount_max: 0.995
+buffer_size: 1_000_000
+exp_name: default
+data_dir: ???
+
+# planning
+mpc: true
+iterations: 6
+num_samples: 512
+num_elites: 64
+num_pi_trajs: 24
+horizon: 3
+min_std: 0.05
+max_std: 2
+temperature: 0.5
+
+# actor
+log_std_min: -10
+log_std_max: 2
+entropy_coef: 1e-4
+
+# critic
+num_bins: 101
+vmin: -10
+vmax: +10
+
+# architecture
+model_size: 5
+num_enc_layers: 2
+enc_dim: 256
+mlp_dim: 512
+latent_dim: 512
+task_dim: 96
+num_q: 5
+dropout: 0.01
+simnorm_dim: 8
+
+# logging
+wandb_project: ???
+wandb_entity: ???
+wandb_silent: false
+disable_wandb: true
+save_csv: true
+
+# misc
+save_video: true
+save_agent: true
+seed: 1
+
+# convenience
+work_dir: ???
+task_title: ???
+multitask: ???
+tasks: ???
+obs_shape: ???
+action_dim: ???
+episode_length: ???
+obs_shapes: ???
+action_dims: ???
+episode_lengths: ???
+seed_steps: ???
+bin_size: ???
diff --git a/tdmpc2/envs/__init__.py b/tdmpc2/envs/__init__.py
new file mode 100644
index 0000000..ef2a630
--- /dev/null
+++ b/tdmpc2/envs/__init__.py
@@ -0,0 +1,62 @@
+from copy import deepcopy
+import warnings
+
+import gym
+
+from envs.wrappers.multitask import MultitaskWrapper
+from envs.wrappers.tensor import TensorWrapper
+from envs.dmcontrol import make_env as make_dm_control_env
+from envs.maniskill import make_env as make_maniskill_env
+from envs.metaworld import make_env as make_metaworld_env
+from envs.myosuite import make_env as make_myosuite_env
+from envs.exceptions import UnknownTaskError
+
+warnings.filterwarnings('ignore', category=DeprecationWarning)
+
+
+def make_multitask_env(cfg):
+	"""
+	Make a multi-task environment for TD-MPC2 experiments.
+	"""
+	print('Creating multi-task environment with tasks:', cfg.tasks)
+	envs = []
+	for task in cfg.tasks:
+		_cfg = deepcopy(cfg)
+		_cfg.task = task
+		_cfg.multitask = False
+		env = make_env(_cfg)
+		if env is None:
+			raise UnknownTaskError(task)
+		envs.append(env)
+	env = MultitaskWrapper(cfg, envs)
+	cfg.obs_shapes = env._obs_dims
+	cfg.action_dims = env._action_dims
+	cfg.episode_lengths = env._episode_lengths
+	return env
+	
+
+def make_env(cfg):
+	"""
+	Make an environment for TD-MPC2 experiments.
+	"""
+	gym.logger.set_level(40)
+	if cfg.multitask:
+		env = make_multitask_env(cfg)
+	else:
+		env = None
+		for fn in [make_dm_control_env, make_maniskill_env, make_metaworld_env, make_myosuite_env]:
+			try:
+				env = fn(cfg)
+			except UnknownTaskError:
+				pass
+		if env is None:
+			raise UnknownTaskError(cfg.task)
+		env = TensorWrapper(env)
+	try: # Dict
+		cfg.obs_shape = {k: v.shape for k, v in env.observation_space.spaces.items()}
+	except: # Box
+		cfg.obs_shape = {'state': env.observation_space.shape}
+	cfg.action_dim = env.action_space.shape[0]
+	cfg.episode_length = env.max_episode_steps
+	cfg.seed_steps = max(1000, 5*cfg.episode_length)
+	return env
diff --git a/tdmpc2/envs/dmcontrol.py b/tdmpc2/envs/dmcontrol.py
new file mode 100644
index 0000000..32cb4b6
--- /dev/null
+++ b/tdmpc2/envs/dmcontrol.py
@@ -0,0 +1,200 @@
+from collections import deque, defaultdict
+from typing import Any, NamedTuple
+import dm_env
+import numpy as np
+from envs.tasks import cheetah, walker, hopper, reacher, ball_in_cup, pendulum, fish
+from dm_control import suite
+suite.ALL_TASKS = suite.ALL_TASKS + suite._get_tasks('custom')
+suite.TASKS_BY_DOMAIN = suite._get_tasks_by_domain(suite.ALL_TASKS)
+from dm_control.suite.wrappers import action_scale
+from dm_env import StepType, specs
+from envs.exceptions import UnknownTaskError
+import gym
+
+
+class ExtendedTimeStep(NamedTuple):
+	step_type: Any
+	reward: Any
+	discount: Any
+	observation: Any
+	action: Any
+
+	def first(self):
+		return self.step_type == StepType.FIRST
+
+	def mid(self):
+		return self.step_type == StepType.MID
+
+	def last(self):
+		return self.step_type == StepType.LAST
+
+
+class ActionRepeatWrapper(dm_env.Environment):
+	def __init__(self, env, num_repeats):
+		self._env = env
+		self._num_repeats = num_repeats
+
+	def step(self, action):
+		reward = 0.0
+		discount = 1.0
+		for i in range(self._num_repeats):
+			time_step = self._env.step(action)
+			reward += (time_step.reward or 0.0) * discount
+			discount *= time_step.discount
+			if time_step.last():
+				break
+
+		return time_step._replace(reward=reward, discount=discount)
+
+	def observation_spec(self):
+		return self._env.observation_spec()
+
+	def action_spec(self):
+		return self._env.action_spec()
+
+	def reset(self):
+		return self._env.reset()
+
+	def __getattr__(self, name):
+		return getattr(self._env, name)
+
+
+class ActionDTypeWrapper(dm_env.Environment):
+	def __init__(self, env, dtype):
+		self._env = env
+		wrapped_action_spec = env.action_spec()
+		self._action_spec = specs.BoundedArray(wrapped_action_spec.shape,
+											   dtype,
+											   wrapped_action_spec.minimum,
+											   wrapped_action_spec.maximum,
+											   'action')
+
+	def step(self, action):
+		action = action.astype(self._env.action_spec().dtype)
+		return self._env.step(action)
+
+	def observation_spec(self):
+		return self._env.observation_spec()
+
+	def action_spec(self):
+		return self._action_spec
+
+	def reset(self):
+		return self._env.reset()
+
+	def __getattr__(self, name):
+		return getattr(self._env, name)
+
+
+class ExtendedTimeStepWrapper(dm_env.Environment):
+	def __init__(self, env):
+		self._env = env
+
+	def reset(self):
+		time_step = self._env.reset()
+		return self._augment_time_step(time_step)
+
+	def step(self, action):
+		time_step = self._env.step(action)
+		return self._augment_time_step(time_step, action)
+
+	def _augment_time_step(self, time_step, action=None):
+		if action is None:
+			action_spec = self.action_spec()
+			action = np.zeros(action_spec.shape, dtype=action_spec.dtype)
+		return ExtendedTimeStep(observation=time_step.observation,
+								step_type=time_step.step_type,
+								action=action,
+								reward=time_step.reward or 0.0,
+								discount=time_step.discount or 1.0)
+
+	def observation_spec(self):
+		return self._env.observation_spec()
+
+	def action_spec(self):
+		return self._env.action_spec()
+
+	def __getattr__(self, name):
+		return getattr(self._env, name)
+
+
+class TimeStepToGymWrapper:
+	def __init__(self, env, domain, task):
+		obs_shp = []
+		for v in env.observation_spec().values():
+			try:
+				shp = np.prod(v.shape)
+			except:
+				shp = 1
+			obs_shp.append(shp)
+		obs_shp = (int(np.sum(obs_shp)),)
+		act_shp = env.action_spec().shape
+		self.observation_space = gym.spaces.Box(
+			low=np.full(
+				obs_shp,
+				-np.inf,
+				dtype=np.float32),
+			high=np.full(
+				obs_shp,
+				np.inf,
+				dtype=np.float32),
+			dtype=np.float32,
+		)
+		self.action_space = gym.spaces.Box(
+			low=np.full(act_shp, env.action_spec().minimum),
+			high=np.full(act_shp, env.action_spec().maximum),
+			dtype=env.action_spec().dtype)
+		self.env = env
+		self.domain = domain
+		self.task = task
+		self.max_episode_steps = 500
+		self.t = 0
+	
+	@property
+	def unwrapped(self):
+		return self.env
+
+	@property
+	def reward_range(self):
+		return None
+
+	@property
+	def metadata(self):
+		return None
+	
+	def _obs_to_array(self, obs):
+		return np.concatenate([v.flatten() for v in obs.values()])
+
+	def reset(self):
+		self.t = 0
+		return self._obs_to_array(self.env.reset().observation)
+	
+	def step(self, action):
+		self.t += 1
+		time_step = self.env.step(action)
+		return self._obs_to_array(time_step.observation), time_step.reward, time_step.last() or self.t == self.max_episode_steps, defaultdict(float)
+
+	def render(self, mode='rgb_array', width=384, height=384, camera_id=0):
+		camera_id = dict(quadruped=2).get(self.domain, camera_id)
+		return self.env.physics.render(height, width, camera_id)
+
+
+def make_env(cfg):
+	"""
+	Make DMControl environment.
+	Adapted from https://github.com/facebookresearch/drqv2
+	"""
+	domain, task = cfg.task.replace('-', '_').split('_', 1)
+	domain = dict(cup='ball_in_cup', pointmass='point_mass').get(domain, domain)
+	if (domain, task) not in suite.ALL_TASKS:
+		raise UnknownTaskError(cfg.task)
+	env = suite.load(domain,
+					 task,
+					 task_kwargs={'random': cfg.seed},
+					 visualize_reward=False)
+	env = ActionDTypeWrapper(env, np.float32)
+	env = ActionRepeatWrapper(env, 2)
+	env = action_scale.Wrapper(env, minimum=-1., maximum=1.)
+	env = ExtendedTimeStepWrapper(env)
+	env = TimeStepToGymWrapper(env, domain, task)
+	return env
diff --git a/tdmpc2/envs/exceptions.py b/tdmpc2/envs/exceptions.py
new file mode 100644
index 0000000..9bf1390
--- /dev/null
+++ b/tdmpc2/envs/exceptions.py
@@ -0,0 +1,4 @@
+
+class UnknownTaskError(Exception):
+	def __init__(self, task):
+		super().__init__(f'Unknown task: {task}')
diff --git a/tdmpc2/envs/maniskill.py b/tdmpc2/envs/maniskill.py
new file mode 100644
index 0000000..1d2e4c9
--- /dev/null
+++ b/tdmpc2/envs/maniskill.py
@@ -0,0 +1,79 @@
+import gym
+import numpy as np
+from envs.wrappers.time_limit import TimeLimit
+from envs.exceptions import UnknownTaskError
+
+import mani_skill2.envs
+
+
+MANISKILL_TASKS = {
+	'lift-cube': dict(
+		env='LiftCube-v0',
+		control_mode='pd_ee_delta_pos',
+	),
+	'pick-cube': dict(
+		env='PickCube-v0',
+		control_mode='pd_ee_delta_pos',
+	),
+	'stack-cube': dict(
+		env='StackCube-v0',
+		control_mode='pd_ee_delta_pos',
+	),
+	'pick-ycb': dict(
+		env='PickSingleYCB-v0',
+		control_mode='pd_ee_delta_pose',
+	),
+	'turn-faucet': dict(
+		env='TurnFaucet-v0',
+		control_mode='pd_ee_delta_pose',
+	),
+}
+
+
+class ManiSkillWrapper(gym.Wrapper):
+	def __init__(self, env, cfg):
+		super().__init__(env)
+		self.env = env
+		self.cfg = cfg
+		self.observation_space = self.env.observation_space
+		self.action_space = gym.spaces.Box(
+			low=np.full(self.env.action_space.shape, self.env.action_space.low.min()),
+			high=np.full(self.env.action_space.shape, self.env.action_space.high.max()),
+			dtype=self.env.action_space.dtype,
+		)
+
+	def reset(self):
+		return self.env.reset()
+	
+	def step(self, action):
+		reward = 0
+		for _ in range(2):
+			obs, r, _, info = self.env.step(action)
+			reward += r
+		return obs, reward, False, info
+
+	@property
+	def unwrapped(self):
+		return self.env.unwrapped
+
+	def render(self, args, **kwargs):
+		return self.env.render(mode='cameras')
+
+
+def make_env(cfg):
+	"""
+	Make ManiSkill2 environment.
+	"""
+	if cfg.task not in MANISKILL_TASKS:
+		raise UnknownTaskError(cfg.task)
+	task_cfg = MANISKILL_TASKS[cfg.task]
+	env = gym.make(
+		task_cfg['env'],
+		obs_mode='state',
+		control_mode=task_cfg['control_mode'],
+		render_camera_cfgs=dict(width=384, height=384),
+	)
+	env = ManiSkillWrapper(env, cfg)
+	env = TimeLimit(env, max_episode_steps=100)
+	env.max_episode_steps = env._max_episode_steps
+	return env
diff --git a/tdmpc2/envs/metaworld.py b/tdmpc2/envs/metaworld.py
new file mode 100644
index 0000000..fd7379d
--- /dev/null
+++ b/tdmpc2/envs/metaworld.py
@@ -0,0 +1,52 @@
+import numpy as np
+import gym
+from envs.wrappers.time_limit import TimeLimit
+from envs.exceptions import UnknownTaskError
+
+from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE
+
+
+class MetaWorldWrapper(gym.Wrapper):
+	def __init__(self, env, cfg):
+		super().__init__(env)
+		self.env = env
+		self.cfg = cfg
+		self.camera_name = "corner2"
+		self.env.model.cam_pos[2] = [0.75, 0.075, 0.7]
+		self.env._freeze_rand_vec = False
+
+	def reset(self, **kwargs):
+		obs = super().reset(**kwargs).astype(np.float32)
+		self.env.step(np.zeros(self.env.action_space.shape))
+		return obs
+
+	def step(self, action):
+		reward = 0
+		for _ in range(2):
+			obs, r, _, info = self.env.step(action.copy())
+			reward += r
+		obs = obs.astype(np.float32)
+		return obs, reward, False, info
+
+	@property
+	def unwrapped(self):
+		return self.env.unwrapped
+
+	def render(self, *args, **kwargs):
+		return self.env.render(
+			offscreen=True, resolution=(384, 384), camera_name=self.camera_name
+		).copy()
+
+
+def make_env(cfg):
+	"""
+	Make Meta-World environment.
+	"""
+	env_id = cfg.task.split("-", 1)[-1] + "-v2-goal-observable"
+	if not cfg.task.startswith('mw-') or env_id not in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE:
+		raise UnknownTaskError(cfg.task)
+	env = ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id](seed=cfg.seed)
+	env = MetaWorldWrapper(env, cfg)
+	env = TimeLimit(env, max_episode_steps=100)
+	env.max_episode_steps = env._max_episode_steps
+	return env
diff --git a/tdmpc2/envs/myosuite.py b/tdmpc2/envs/myosuite.py
new file mode 100644
index 0000000..c503782
--- /dev/null
+++ b/tdmpc2/envs/myosuite.py
@@ -0,0 +1,59 @@
+import numpy as np
+import gym
+from envs.wrappers.time_limit import TimeLimit
+from envs.exceptions import UnknownTaskError
+
+
+MYOSUITE_TASKS = {
+	'myo-finger-reach': 'myoFingerReachFixed-v0',
+	'myo-finger-reach-hard': 'myoFingerReachRandom-v0',
+	'myo-finger-pose': 'myoFingerPoseFixed-v0',
+	'myo-finger-pose-hard': 'myoFingerPoseRandom-v0',
+	'myo-hand-reach': 'myoHandReachFixed-v0',
+	'myo-hand-reach-hard': 'myoHandReachRandom-v0',
+	'myo-hand-pose': 'myoHandPoseFixed-v0',
+	'myo-hand-pose-hard': 'myoHandPoseRandom-v0',
+	'myo-hand-obj-hold': 'myoHandObjHoldFixed-v0',
+	'myo-hand-obj-hold-hard': 'myoHandObjHoldRandom-v0',
+	'myo-hand-key-turn': 'myoHandKeyTurnFixed-v0',
+	'myo-hand-key-turn-hard': 'myoHandKeyTurnRandom-v0',
+	'myo-hand-pen-twirl': 'myoHandPenTwirlFixed-v0',
+	'myo-hand-pen-twirl-hard': 'myoHandPenTwirlRandom-v0',
+}
+
+
+class MyoSuiteWrapper(gym.Wrapper):
+	def __init__(self, env, cfg):
+		super().__init__(env)
+		self.env = env
+		self.cfg = cfg
+		self.camera_id = 'hand_side_inter'
+
+	def step(self, action):
+		obs, reward, _, info = self.env.step(action.copy())
+		obs = obs.astype(np.float32)
+		info['success'] = info['solved']
+		return obs, reward, False, info
+
+	@property
+	def unwrapped(self):
+		return self.env.unwrapped
+
+	def render(self, *args, **kwargs):
+		return self.env.sim.renderer.render_offscreen(
+			width=384, height=384, camera_id=self.camera_id
+		).copy()
+
+
+def make_env(cfg):
+	"""
+	Make Myosuite environment.
+	"""
+	if not cfg.task in MYOSUITE_TASKS:
+		raise UnknownTaskError(cfg.task)
+	import myosuite
+	env = gym.make(MYOSUITE_TASKS[cfg.task])
+	env = MyoSuiteWrapper(env, cfg)
+	env = TimeLimit(env, max_episode_steps=100)
+	env.max_episode_steps = env._max_episode_steps
+	return env
diff --git a/tdmpc2/envs/tasks/__init__.py b/tdmpc2/envs/tasks/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tdmpc2/envs/tasks/ball_in_cup.py b/tdmpc2/envs/tasks/ball_in_cup.py
new file mode 100644
index 0000000..fea86f6
--- /dev/null
+++ b/tdmpc2/envs/tasks/ball_in_cup.py
@@ -0,0 +1,99 @@
+import collections
+import os
+
+from dm_control import mujoco
+from dm_control.rl import control
+from dm_control.suite import base
+from dm_control.suite import ball_in_cup
+from dm_control.suite import common
+from dm_control.utils import rewards
+from dm_control.utils import io as resources
+import numpy as np
+
+_TASKS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'tasks')
+
+_DIST_TARGET = 0.5
+_TARGET_SPEED = 6.
+
+_DEFAULT_TIME_LIMIT = 20  # (seconds)
+_CONTROL_TIMESTEP = .02   # (seconds)
+
+
+def get_model_and_assets():
+    """Returns a tuple containing the model XML string and a dict of assets."""
+    return resources.GetResource(os.path.join(_TASKS_DIR, 'ball_in_cup.xml')), common.ASSETS
+
+
+@ball_in_cup.SUITE.add('custom')
+def spin(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Ball-in-Cup Spin task."""
+  physics = Physics.from_xml_string(*get_model_and_assets())
+  task = CustomBallInCup(random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """Physics with additional features for the Ball-in-Cup domain."""
+
+  def ball_to_target(self):
+    """Returns the vector from the ball to the target."""
+    target = self.named.data.site_xpos['target', ['x', 'z']]
+    ball = self.named.data.xpos['ball', ['x', 'z']]
+    return target - ball
+
+  def in_target(self):
+    """Returns 1 if the ball is in the target, 0 otherwise."""
+    ball_to_target = abs(self.ball_to_target())
+    target_size = self.named.model.site_size['target', [0, 2]]
+    ball_size = self.named.model.geom_size['ball', 0]
+    return float(all(ball_to_target < target_size - ball_size))
+
+
+class CustomBallInCup(ball_in_cup.BallInCup):
+  """Custom Ball-in-Cup tasks."""
+
+  def initialize_episode(self, physics):
+    # Find a collision-free random initial position of the ball.
+    penetrating = True
+    valid_pos = False
+    init_out_of_target = self.random.uniform() < 0.1
+    while penetrating or not valid_pos:
+      # Assign a random ball position.
+      physics.named.data.qpos['ball_x'] = self.random.uniform(-.2, .2)
+      physics.named.data.qpos['ball_z'] = self.random.uniform(.2, .5)
+      # Check for collisions.
+      physics.after_reset()
+      penetrating = physics.data.ncon > 0
+      valid_pos = bool(physics.in_target()) or init_out_of_target
+    base.Task.initialize_episode(self, physics)
+
+  def get_observation(self, physics):
+    """Returns an observation of the state."""
+    obs = collections.OrderedDict()
+    obs['position'] = physics.position()
+    obs['velocity'] = physics.velocity()
+    return obs
+
+  def get_reward(self, physics):
+    dist = np.linalg.norm(physics.ball_to_target())
+    ball_vel_x = abs(physics.named.data.qvel['ball_x'])
+    ball_vel_z = abs(physics.named.data.qvel['ball_z'])
+    ball_vel = np.linalg.norm([ball_vel_x, ball_vel_z])
+
+    # reward: spin around target (maximize distance to target + ball velocity)
+    dist_reward = rewards.tolerance(dist,
+                                    bounds=(_DIST_TARGET, float('inf')),
+                                    margin=_DIST_TARGET/2,
+                                    value_at_margin=0.5,
+                                    sigmoid='linear')
+    not_in_target = 1 - physics.in_target()
+    vel_reward = rewards.tolerance(ball_vel,
+                                   bounds=(_TARGET_SPEED, float('inf')),
+                                   margin=_TARGET_SPEED/2,
+                                   value_at_margin=0.5,
+                                   sigmoid='linear')
+    spin_reward = not_in_target * (dist_reward + 2*vel_reward) / 3
+    return spin_reward
diff --git a/tdmpc2/envs/tasks/ball_in_cup.xml b/tdmpc2/envs/tasks/ball_in_cup.xml
new file mode 100644
index 0000000..32708c1
--- /dev/null
+++ b/tdmpc2/envs/tasks/ball_in_cup.xml
@@ -0,0 +1,53 @@
+<mujoco model="ball in cup">
+
+  <include file="./common/visual.xml"/>
+  <include file="./common/skybox.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <default>
+    <motor ctrllimited="true" ctrlrange="-1 1" gear="5"/>
+    <default class="cup">
+      <joint type="slide" damping="3" stiffness="20"/>
+      <geom type="capsule" size=".008" material="self"/>
+    </default>
+  </default>
+
+  <worldbody>
+    <light name="light" directional="true" diffuse=".6 .6 .6" pos="0 0 2" specular=".3 .3 .3"/>
+    <geom name="ground" type="plane" pos="0 0 0" size=".6 .2 10" material="grid"/>
+    <camera name="cam0" pos="0 -1 .8" xyaxes="1 0 0 0 1 2"/>
+    <camera name="cam1" pos="0 -1 .4" xyaxes="1 0 0 0 0 1" />
+
+    <body name="cup" pos="0 0 .6" childclass="cup">
+      <joint name="cup_x" axis="1 0 0"/>
+      <joint name="cup_z" axis="0 0 1"/>
+      <geom name="cup_part_0" fromto="-.05 0 0 -.05 0 -.075" />
+      <geom name="cup_part_1" fromto="-.05 0 -.075 -.025 0 -.1" />
+      <geom name="cup_part_2" fromto="-.025 0 -.1 .025 0 -.1" />
+      <geom name="cup_part_3" fromto=".025 0 -.1 .05 0 -.075" />
+      <geom name="cup_part_4" fromto=".05 0 -.075 .05 0 0" />
+      <site name="cup" pos="0 0 -.108" size=".005"/>
+      <site name="target" type="box" pos="0 0 -.05" size=".05 .006 .05" group="4"/>
+    </body>
+
+    <body name="ball" pos="0 0 .2">
+      <joint name="ball_x" type="slide" axis="1 0 0"/>
+      <joint name="ball_z" type="slide" axis="0 0 1"/>
+      <geom name="ball" type="sphere" size=".025" material="effector"/>
+      <site name="ball" size=".005"/>
+    </body>
+  </worldbody>
+
+  <actuator>
+    <motor name="x" joint="cup_x"/>
+    <motor name="z" joint="cup_z"/>
+  </actuator>
+
+  <tendon>
+    <spatial name="string" limited="true" range="0 0.3" width="0.003">
+      <site site="ball"/>
+      <site site="cup"/>
+    </spatial>
+  </tendon>
+
+</mujoco>
diff --git a/tdmpc2/envs/tasks/cheetah.py b/tdmpc2/envs/tasks/cheetah.py
new file mode 100644
index 0000000..f24d2f6
--- /dev/null
+++ b/tdmpc2/envs/tasks/cheetah.py
@@ -0,0 +1,268 @@
+import os
+
+from dm_control.rl import control
+from dm_control.suite import common
+from dm_control.suite import cheetah
+from dm_control.utils import rewards
+from dm_control.utils import io as resources
+
+_TASKS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'tasks')
+
+_CHEETAH_JUMP_HEIGHT = 1.2
+_CHEETAH_LIE_HEIGHT = 0.25
+_CHEETAH_SPIN_SPEED = 8
+
+
+def get_model_and_assets():
+    """Returns a tuple containing the model XML string and a dict of assets."""
+    return resources.GetResource(os.path.join(_TASKS_DIR, 'cheetah.xml')), common.ASSETS
+
+
+@cheetah.SUITE.add('custom')
+def run_backwards(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+    """Returns the Run Backwards task."""
+    physics = cheetah.Physics.from_xml_string(*get_model_and_assets())
+    task = CustomCheetah(goal='run-backwards', move_speed=cheetah._RUN_SPEED*0.8, random=random)
+    environment_kwargs = environment_kwargs or {}
+    return control.Environment(physics, task, time_limit=time_limit,
+                               **environment_kwargs)
+
+
+@cheetah.SUITE.add('custom')
+def stand_front(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+    """Returns the Stand Front task."""
+    physics = cheetah.Physics.from_xml_string(*get_model_and_assets())
+    task = CustomCheetah(goal='stand-front', move_speed=0.5, random=random)
+    environment_kwargs = environment_kwargs or {}
+    return control.Environment(physics, task, time_limit=time_limit,
+                               **environment_kwargs)
+
+
+@cheetah.SUITE.add('custom')
+def stand_back(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+    """Returns the Stand Back task."""
+    physics = cheetah.Physics.from_xml_string(*get_model_and_assets())
+    task = CustomCheetah(goal='stand-back', move_speed=0.5, random=random)
+    environment_kwargs = environment_kwargs or {}
+    return control.Environment(physics, task, time_limit=time_limit,
+                               **environment_kwargs)
+
+
+@cheetah.SUITE.add('custom')
+def jump(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+    """Returns the Jump task."""
+    physics = cheetah.Physics.from_xml_string(*get_model_and_assets())
+    task = CustomCheetah(goal='jump', move_speed=0.5, random=random)
+    environment_kwargs = environment_kwargs or {}
+    return control.Environment(physics, task, time_limit=time_limit,
+                               **environment_kwargs)
+
+
+@cheetah.SUITE.add('custom')
+def run_front(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+    """Returns the Run Front task."""
+    physics = cheetah.Physics.from_xml_string(*get_model_and_assets())
+    task = CustomCheetah(goal='run-front', move_speed=cheetah._RUN_SPEED*0.6, random=random)
+    environment_kwargs = environment_kwargs or {}
+    return control.Environment(physics, task, time_limit=time_limit,
+                               **environment_kwargs)
+
+
+@cheetah.SUITE.add('custom')
+def run_back(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+    """Returns the Run Back task."""
+    physics = cheetah.Physics.from_xml_string(*get_model_and_assets())
+    task = CustomCheetah(goal='run-back', move_speed=cheetah._RUN_SPEED*0.6, random=random)
+    environment_kwargs = environment_kwargs or {}
+    return control.Environment(physics, task, time_limit=time_limit,
+                               **environment_kwargs)
+
+
+@cheetah.SUITE.add('custom')
+def lie_down(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+    """Returns the Lie Down task."""
+    physics = cheetah.Physics.from_xml_string(*get_model_and_assets())
+    task = CustomCheetah(goal='lie-down', random=random)
+    environment_kwargs = environment_kwargs or {}
+    return control.Environment(physics, task, time_limit=time_limit,
+                               **environment_kwargs)
+
+
+@cheetah.SUITE.add('custom')
+def legs_up(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+    """Returns the Legs Up task."""
+    physics = cheetah.Physics.from_xml_string(*get_model_and_assets())
+    task = CustomCheetah(goal='legs-up', random=random)
+    environment_kwargs = environment_kwargs or {}
+    return control.Environment(physics, task, time_limit=time_limit,
+                               **environment_kwargs)
+
+
+@cheetah.SUITE.add('custom')
+def flip(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+    """Returns the Flip task."""
+    physics = Physics.from_xml_string(*get_model_and_assets())
+    task = CustomCheetah(goal='flip', move_speed=cheetah._RUN_SPEED, random=random)
+    environment_kwargs = environment_kwargs or {}
+    return control.Environment(physics, task, time_limit=time_limit,
+                               **environment_kwargs)
+
+
+@cheetah.SUITE.add('custom')
+def flip_backwards(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+    """Returns the Flip Backwards task."""
+    physics = Physics.from_xml_string(*get_model_and_assets())
+    task = CustomCheetah(goal='flip-backwards', move_speed=cheetah._RUN_SPEED*0.8, random=random)
+    environment_kwargs = environment_kwargs or {}
+    return control.Environment(physics, task, time_limit=time_limit,
+                               **environment_kwargs)
+
+
+class Physics(cheetah.Physics):
+    """Physics simulation with additional features for the Cheetah domain."""
+
+    def angmomentum(self):
+        """Returns the angular momentum of torso of the Cheetah about Y axis."""
+        return self.named.data.subtree_angmom['torso'][1]
+
+
+class CustomCheetah(cheetah.Cheetah):
+    """Custom Cheetah tasks."""
+    
+    def __init__(self, goal='run-backwards', move_speed=0, random=None):
+        super().__init__(random)
+        self._goal = goal
+        self._move_speed = move_speed
+
+    def _run_backwards_reward(self, physics):
+        return rewards.tolerance(physics.speed(),
+                            bounds=(-float('inf'), -self._move_speed),
+                            margin=self._move_speed,
+                            value_at_margin=0,
+                            sigmoid='linear')
+       
+    def _stand_one_foot_reward(self, physics, foot):
+        """Note: `foot` is the foot that is *not* on the ground."""
+        torso_height = physics.named.data.xpos['torso', 'z']
+        foot_height = physics.named.data.xpos[foot, 'z']
+        height_reward = rewards.tolerance((torso_height + foot_height)/2,
+                            bounds=(_CHEETAH_JUMP_HEIGHT, float('inf')),
+                            margin=_CHEETAH_JUMP_HEIGHT/2)
+        horizontal_speed_reward = rewards.tolerance(physics.speed(),
+                            bounds=(-self._move_speed, self._move_speed),
+                            margin=self._move_speed,
+                            value_at_margin=0,
+                            sigmoid='linear')
+        stand_reward = (5*height_reward + horizontal_speed_reward) / 6
+        return stand_reward
+
+    def _stand_front_reward(self, physics):
+        return self._stand_one_foot_reward(physics, 'bfoot')
+    
+    def _stand_back_reward(self, physics):
+        return self._stand_one_foot_reward(physics, 'ffoot')
+    
+    def _jump_reward(self, physics):
+        front_reward = self._stand_front_reward(physics)
+        back_reward = self._stand_back_reward(physics)
+        jump_reward = (front_reward + back_reward) / 2
+        return jump_reward
+
+    def _run_one_foot_reward(self, physics, foot):
+        """Note: `foot` is the foot that is *not* on the ground."""
+        torso_height = physics.named.data.xpos['torso', 'z']
+        foot_height = physics.named.data.xpos[foot, 'z']
+        torso_up = rewards.tolerance(torso_height,
+                            bounds=(_CHEETAH_JUMP_HEIGHT, float('inf')),
+                            margin=_CHEETAH_JUMP_HEIGHT/2)
+        foot_up = rewards.tolerance(foot_height,
+                            bounds=(_CHEETAH_JUMP_HEIGHT, float('inf')),
+                            margin=_CHEETAH_JUMP_HEIGHT/2)
+        up_reward = (3*foot_up + 2*torso_up) / 5
+        if self._move_speed == 0:
+            return up_reward
+        horizontal_speed_reward = rewards.tolerance(physics.speed(),
+                            bounds=(self._move_speed, float('inf')),
+                            margin=self._move_speed,
+                            value_at_margin=0,
+                            sigmoid='linear')
+        return up_reward * (5*horizontal_speed_reward + 1) / 6
+
+    def _run_front_reward(self, physics):
+        return self._run_one_foot_reward(physics, 'bfoot')
+    
+    def _run_back_reward(self, physics):
+        return self._run_one_foot_reward(physics, 'ffoot')
+
+    def _lie_down_reward(self, physics):
+        torso_height = physics.named.data.xpos['torso', 'z']
+        feet_height = (physics.named.data.xpos['ffoot', 'z'] + physics.named.data.xpos['bfoot', 'z']) / 2
+        torso_down = rewards.tolerance(torso_height,
+                            bounds=(-float('inf'), _CHEETAH_LIE_HEIGHT),
+                            margin=_CHEETAH_LIE_HEIGHT,
+                            value_at_margin=0,
+                            sigmoid='linear')
+        feet_down = rewards.tolerance(feet_height,
+                            bounds=(-float('inf'), _CHEETAH_LIE_HEIGHT),
+                            margin=_CHEETAH_LIE_HEIGHT,
+                            value_at_margin=0,
+                            sigmoid='linear')
+        lie_down_reward = (3*torso_down + feet_down) / 4
+        return lie_down_reward
+
+    def _legs_up_reward(self, physics):
+        torso_height = physics.named.data.xpos['torso', 'z']
+        torso_down = rewards.tolerance(torso_height,
+                            bounds=(-float('inf'), _CHEETAH_LIE_HEIGHT),
+                            margin=_CHEETAH_LIE_HEIGHT/2)
+        get_up = self._run_one_foot_reward(physics, 'bfoot')
+        legs_up_reward = (5*torso_down + get_up) / 6
+        return legs_up_reward
+    
+    def _flip_reward(self, physics, forward=True):
+        spin_reward = rewards.tolerance(
+                            (1. if forward else -1.) * physics.angmomentum(),
+                            bounds=(_CHEETAH_SPIN_SPEED, float('inf')),
+                            margin=_CHEETAH_SPIN_SPEED,
+                            value_at_margin=0,
+                            sigmoid='linear')
+        horizontal_speed_reward = rewards.tolerance(
+                            (1. if forward else -1.) * physics.speed(),
+                            bounds=(self._move_speed, float('inf')),
+                            margin=self._move_speed,
+                            value_at_margin=0,
+                            sigmoid='linear')
+        flip_reward = (2*spin_reward + horizontal_speed_reward) / 3
+        return flip_reward
+
+    def get_reward(self, physics):
+        if self._goal == 'run-backwards':
+            return self._run_backwards_reward(physics)
+        elif self._goal == 'stand-front':
+            return self._stand_front_reward(physics)
+        elif self._goal == 'stand-back':
+            return self._stand_back_reward(physics)
+        elif self._goal == 'jump':
+            return self._jump_reward(physics)
+        elif self._goal == 'run-front':
+            return self._run_front_reward(physics)
+        elif self._goal == 'run-back':
+            return self._run_back_reward(physics)
+        elif self._goal == 'lie-down':
+            return self._lie_down_reward(physics)
+        elif self._goal == 'legs-up':
+            return self._legs_up_reward(physics)
+        elif self._goal == 'flip':
+            return self._flip_reward(physics, forward=True)
+        elif self._goal == 'flip-backwards':
+            return self._flip_reward(physics, forward=False)
+        else:
+            raise NotImplementedError(f'Goal {self._goal} is not implemented.')
+
+
+if __name__ == '__main__':
+    env = jump()
+    obs = env.reset()
+    import numpy as np
+    next_obs, reward, done, info = env.step(np.zeros(6))
+    print(reward)
diff --git a/tdmpc2/envs/tasks/cheetah.xml b/tdmpc2/envs/tasks/cheetah.xml
new file mode 100644
index 0000000..1a7f6fd
--- /dev/null
+++ b/tdmpc2/envs/tasks/cheetah.xml
@@ -0,0 +1,73 @@
+<mujoco model="cheetah">
+  <include file="./common/skybox.xml"/>
+  <include file="./common/visual.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <compiler settotalmass="14"/>
+
+  <default>
+    <default class="cheetah">
+      <joint limited="true" damping=".01" armature=".1" stiffness="8" type="hinge" axis="0 1 0"/>
+      <geom contype="1" conaffinity="1" condim="3" friction=".4 .1 .1" material="self"/>
+    </default>
+    <default class="free">
+      <joint limited="false" damping="0" armature="0" stiffness="0"/>
+    </default>
+    <motor ctrllimited="true" ctrlrange="-1 1"/>
+  </default>
+
+  <statistic center="0 0 .7" extent="2"/>
+
+  <option timestep="0.01"/>
+
+  <worldbody>
+    <geom name="ground" type="plane" conaffinity="1" pos="98 0 0" size="200 .8 .5" material="grid"/>
+    <body name="torso" pos="0 0 .7" childclass="cheetah">
+      <light name="light" pos="0 0 2" mode="trackcom"/>
+      <camera name="side" pos="0 -3 0" quat="0.707 0.707 0 0" mode="trackcom"/>
+      <camera name="back" pos="-1.8 -1.3 0.8" xyaxes="0.45 -0.9 0 0.3 0.15 0.94" mode="trackcom"/>
+      <joint name="rootx" type="slide" axis="1 0 0" class="free"/>
+      <joint name="rootz" type="slide" axis="0 0 1" class="free"/>
+      <joint name="rooty" type="hinge" axis="0 1 0" class="free"/>
+      <geom name="torso" type="capsule" fromto="-.5 0 0 .5 0 0" size="0.046"/>
+      <geom name="head" type="capsule" pos=".6 0 .1" euler="0 50 0" size="0.046 .15"/>
+      <body name="bthigh" pos="-.5 0 0">
+        <joint name="bthigh" range="-30 60" stiffness="240" damping="6"/>
+        <geom name="bthigh" type="capsule" pos=".1 0 -.13" euler="0 -218 0" size="0.046 .145"/>
+        <body name="bshin" pos=".16 0 -.25">
+          <joint name="bshin" range="-50 50" stiffness="180" damping="4.5"/>
+          <geom name="bshin" type="capsule" pos="-.14 0 -.07" euler="0 -116 0" size="0.046 .15"/>
+          <body name="bfoot" pos="-.28 0 -.14">
+            <joint name="bfoot" range="-230 50" stiffness="120" damping="3"/>
+            <geom name="bfoot" type="capsule" pos=".03 0 -.097" euler="0 -15 0" size="0.046 .094"/>
+          </body>
+        </body>
+      </body>
+      <body name="fthigh" pos=".5 0 0">
+        <joint name="fthigh" range="-57 .40" stiffness="180" damping="4.5"/>
+        <geom name="fthigh" type="capsule" pos="-.07 0 -.12" euler="0 30 0" size="0.046 .133"/>
+        <body name="fshin" pos="-.14 0 -.24">
+          <joint name="fshin" range="-70 50" stiffness="120" damping="3"/>
+          <geom name="fshin" type="capsule" pos=".065 0 -.09" euler="0 -34 0" size="0.046 .106"/>
+          <body name="ffoot" pos=".13 0 -.18">
+            <joint name="ffoot" range="-28 28" stiffness="60" damping="1.5"/>
+            <geom name="ffoot" type="capsule" pos=".045 0 -.07" euler="0 -34 0" size="0.046 .07"/>
+          </body>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+
+  <sensor>
+    <subtreelinvel name="torso_subtreelinvel" body="torso"/>
+  </sensor>
+
+  <actuator>
+    <motor name="bthigh" joint="bthigh" gear="120" />
+    <motor name="bshin" joint="bshin" gear="90" />
+    <motor name="bfoot" joint="bfoot" gear="60" />
+    <motor name="fthigh" joint="fthigh" gear="90" />
+    <motor name="fshin" joint="fshin" gear="60" />
+    <motor name="ffoot" joint="ffoot" gear="30" />
+  </actuator>
+</mujoco>
diff --git a/tdmpc2/envs/tasks/fish.py b/tdmpc2/envs/tasks/fish.py
new file mode 100644
index 0000000..59bed4b
--- /dev/null
+++ b/tdmpc2/envs/tasks/fish.py
@@ -0,0 +1,79 @@
+import collections
+import os
+
+from dm_control import mujoco
+from dm_control.rl import control
+from dm_control.suite import base
+from dm_control.suite import common
+from dm_control.suite import fish
+from dm_control.utils import rewards
+from dm_control.utils import io as resources
+import numpy as np
+
+_TASKS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'tasks')
+
+_DEFAULT_TIME_LIMIT = 40
+_CONTROL_TIMESTEP = .04
+_JOINTS = ['tail1',
+           'tail_twist',
+           'tail2',
+           'finright_roll',
+           'finright_pitch',
+           'finleft_roll',
+           'finleft_pitch']
+
+
+def get_model_and_assets():
+    """Returns a tuple containing the model XML string and a dict of assets."""
+    return resources.GetResource(os.path.join(_TASKS_DIR, 'fish.xml')), common.ASSETS
+
+
+@fish.SUITE.add('custom')
+def obstacles(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Fish Obstacles task."""
+  physics = fish.Physics.from_xml_string(*get_model_and_assets())
+  task = Obstacles(random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit,
+      **environment_kwargs)
+
+
+class Obstacles(fish.Swim):
+  """A custom Fish Obstacles task."""
+
+  def __init__(self, random=None):
+    super().__init__(random=random)
+
+  def in_wall(self, physics, name, min_distance=0.08):
+    """Returns True if the given body is too close to a wall."""
+    for wall in ['wall0', 'wall1', 'wall2', 'wall3']:
+      l1_dist = np.min(np.abs(physics.named.data.geom_xpos[name][:2] - physics.named.data.geom_xpos[wall][:2]))
+      if l1_dist < min_distance:
+        return True
+    return False
+
+  def initialize_episode(self, physics):
+    in_wall = True
+    while in_wall:
+        # Randomize fish position.
+        quat = self.random.randn(4)
+        physics.named.data.qpos['root'][3:7] = quat / np.linalg.norm(quat)
+        for joint in _JOINTS:
+            physics.named.data.qpos[joint] = self.random.uniform(-.2, .2)
+        # Randomize target position.
+        physics.named.model.geom_pos['target', 'x'] = self.random.uniform(-.4, .4)
+        physics.named.model.geom_pos['target', 'y'] = self.random.uniform(-.4, .4)
+        physics.named.model.geom_pos['target', 'z'] = self.random.uniform(.1, .3)
+        # Make sure target is not too close to a wall.
+        physics.after_reset()
+        in_wall = self.in_wall(physics, 'target')
+    base.Task.initialize_episode(self, physics)
+
+  def get_reward(self, physics):
+    radii = physics.named.model.geom_size[['mouth', 'target'], 0].sum()
+    in_target = rewards.tolerance(np.linalg.norm(physics.mouth_to_target()),
+                                  bounds=(0, radii), margin=2*radii)
+    is_upright = 0.5 * (physics.upright() + 1)
+    is_not_in_wall = 1. - self.in_wall(physics, 'torso', min_distance=0.06)
+    return is_not_in_wall * (7*in_target + is_upright) / 8
diff --git a/tdmpc2/envs/tasks/fish.xml b/tdmpc2/envs/tasks/fish.xml
new file mode 100644
index 0000000..82c9ede
--- /dev/null
+++ b/tdmpc2/envs/tasks/fish.xml
@@ -0,0 +1,93 @@
+<mujoco model="fish">
+  <include file="./common/visual.xml"/>
+  <include file="./common/materials.xml"/>
+  <asset>
+      <texture name="skybox" type="skybox" builtin="gradient" rgb1=".4 .6 .8" rgb2="0 0 0" width="800" height="800" mark="random" markrgb="1 1 1"/>
+  </asset>
+
+
+  <option timestep="0.004" density="5000">
+    <flag gravity="disable" constraint="disable"/>
+  </option>
+
+  <default>
+    <general ctrllimited="true"/>
+    <default class="fish">
+      <joint type="hinge" limited="false" range="-60 60" damping="2e-5" solreflimit=".1 1" solimplimit="0 .8 .1"/>
+      <geom material="self"/>
+    </default>
+    <default class="wall">
+      <geom type="box" material="self"/>
+    </default>
+  </default>
+
+  <worldbody>
+    <camera name="tracking_top" pos="0 0 1" xyaxes="1 0 0 0 1 0" mode="trackcom"/>
+    <camera name="tracking_x" pos="-.3 0 .2" xyaxes="0 -1 0 0.342 0 0.940" fovy="60" mode="trackcom"/>
+    <camera name="tracking_y" pos="0 -.3 .2" xyaxes="1 0 0 0 0.342 0.940" fovy="60" mode="trackcom"/>
+    <camera name="fixed_top" pos="0 0 5.5" fovy="10"/>
+    <geom name="ground" type="plane" size=".5 .5 .1" material="grid"/>
+
+    <geom name="wall0" class="wall" pos="-.15 -.15 .1" size=".05 .05 .1"/>
+    <geom name="wall1" class="wall" pos=".15 -.15 .1" size=".05 .05 .1"/>
+    <geom name="wall2" class="wall" pos=".15 .15 .1" size=".05 .05 .1"/>
+    <geom name="wall3" class="wall" pos="-.15 .15 .1" size=".05 .05 .1"/>
+
+    <geom name="target" type="sphere" pos="0 .4 .1" size=".04" material="target"/>
+    <body name="torso" pos="0 0 .1" childclass="fish">
+      <light name="light" diffuse=".6 .6 .6" pos="0 0 0.5" dir="0 0 -1" specular=".3 .3 .3" mode="track"/>
+      <joint name="root" type="free" damping="0" limited="false"/>
+      <site name="torso" size=".01" rgba="0 0 0 0"/>
+      <geom name="eye" type="ellipsoid" pos="0 .055 .015" size=".008 .012 .008" euler="-10 0 0" material="eye" mass="0"/>
+      <camera name="eye" pos="0 .06 .02" xyaxes="1 0 0 0 0 1"/>
+      <geom name="mouth" type="capsule" fromto="0 .079 0 0 .07 0" size=".005" material="effector" mass="0"/>
+      <geom name="lower_mouth" type="capsule" fromto="0 .079 -.004 0 .07 -.003" size=".0045" material="effector" mass="0"/>
+      <geom name="torso" type="ellipsoid" size=".01 .08 .04" mass="0"/>
+      <geom name="back_fin" type="ellipsoid" size=".001 .03 .015" pos="0 -.03 .03" material="effector" mass="0"/>
+      <geom name="torso_massive" type="box" size=".002 .06 .03" group="4"/>
+      <body name="tail1" pos="0 -.09 0">
+        <joint name="tail1" axis="0 0 1" pos="0 .01 0"/>
+        <joint name="tail_twist" axis="0 1 0" pos="0 .01 0" range="-30 30"/>
+        <geom name="tail1" type="ellipsoid" size=".001 .008 .016"/>
+        <body name="tail2" pos="0 -.028 0">
+          <joint name="tail2" axis="0 0 1" pos="0 .02 0" stiffness="8e-5"/>
+          <geom name="tail2" type="ellipsoid" size=".001 .018 .035"/>
+        </body>
+      </body>
+      <body name="finright" pos=".01 0 0">
+        <joint name="finright_roll" axis="0 1 0"/>
+        <joint name="finright_pitch" axis="1 0 0" pos="0 .005 0"/>
+        <geom name="finright" type="ellipsoid" pos=".015 0 0" size=".02 .015 .001"  />
+      </body>
+      <body name="finleft" pos="-.01 0 0">
+        <joint name="finleft_roll" axis="0 1 0"/>
+        <joint name="finleft_pitch" axis="1 0 0" pos="0 .005 0"/>
+        <geom name="finleft" type="ellipsoid"  pos="-.015 0 0" size=".02 .015 .001"/>
+      </body>
+    </body>
+  </worldbody>
+
+  <tendon>
+    <fixed name="fins_flap">
+      <joint joint="finleft_roll"  coef="-.5"/>
+      <joint joint="finright_roll" coef=".5"/>
+    </fixed>
+    <fixed name="fins_sym" stiffness="1e-4">
+      <joint joint="finleft_roll"  coef=".5"/>
+      <joint joint="finright_roll" coef=".5"/>
+    </fixed>
+  </tendon>
+
+  <actuator>
+    <position name="tail"           joint="tail1"           ctrlrange="-1 1"    kp="5e-4"/>
+    <position name="tail_twist"     joint="tail_twist"      ctrlrange="-1 1"    kp="1e-4"/>
+    <position name="fins_flap"      tendon="fins_flap"      ctrlrange="-1 1"    kp="3e-4"/>
+    <position name="finleft_pitch"  joint="finleft_pitch"   ctrlrange="-1 1"    kp="1e-4"/>
+    <position name="finright_pitch" joint="finright_pitch"  ctrlrange="-1 1"    kp="1e-4"/>
+  </actuator>
+
+  <sensor>
+    <velocimeter name="velocimeter" site="torso"/>
+    <gyro name="gyro" site="torso"/>
+  </sensor>
+</mujoco>
diff --git a/tdmpc2/envs/tasks/hopper.py b/tdmpc2/envs/tasks/hopper.py
new file mode 100644
index 0000000..3e19b1c
--- /dev/null
+++ b/tdmpc2/envs/tasks/hopper.py
@@ -0,0 +1,114 @@
+import os
+
+from dm_control import mujoco
+from dm_control.rl import control
+from dm_control.suite import common
+from dm_control.suite import hopper
+from dm_control.utils import rewards
+from dm_control.utils import io as resources
+import numpy as np
+
+_TASKS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'tasks')
+
+_CONTROL_TIMESTEP = .02  # (Seconds)
+
+# Default duration of an episode, in seconds.
+_DEFAULT_TIME_LIMIT = 20
+
+# Minimal height of torso over foot above which stand reward is 1.
+_STAND_HEIGHT = 0.6
+
+# Hopping speed above which hop reward is 1.
+_HOP_SPEED = 2
+
+# Angular momentum above which reward is 1.
+_SPIN_SPEED = 5
+
+
+def get_model_and_assets():
+	"""Returns a tuple containing the model XML string and a dict of assets."""
+	return resources.GetResource(os.path.join(_TASKS_DIR, 'hopper.xml')), common.ASSETS
+
+
+@hopper.SUITE.add('custom')
+def hop_backwards(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+	"""Returns the Hop Backwards task."""
+	physics = Physics.from_xml_string(*get_model_and_assets())
+	task = CustomHopper(goal='hop-backwards', random=random)
+	environment_kwargs = environment_kwargs or {}
+	return control.Environment(
+		physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+		**environment_kwargs)
+
+
+@hopper.SUITE.add('custom')
+def flip(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+	"""Returns the Flip task."""
+	physics = Physics.from_xml_string(*get_model_and_assets())
+	task = CustomHopper(goal='flip', random=random)
+	environment_kwargs = environment_kwargs or {}
+	return control.Environment(
+		physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+		**environment_kwargs)
+
+
+@hopper.SUITE.add('custom')
+def flip_backwards(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+	"""Returns the Flip Backwards task."""
+	physics = Physics.from_xml_string(*get_model_and_assets())
+	task = CustomHopper(goal='flip-backwards', random=random)
+	environment_kwargs = environment_kwargs or {}
+	return control.Environment(
+		physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP,
+		**environment_kwargs)
+
+
+class Physics(hopper.Physics):
+
+	def angmomentum(self):
+		"""Returns the angular momentum of torso of the Cheetah about Y axis."""
+		return self.named.data.subtree_angmom['torso'][1]
+
+
+class CustomHopper(hopper.Hopper):
+	"""Custom Hopper tasks."""
+
+	def __init__(self, goal='hop-backwards', random=None):
+		super().__init__(None, random)
+		self._goal = goal
+	
+	def _hop_backwards_reward(self, physics):
+		standing = rewards.tolerance(physics.height(), (_STAND_HEIGHT, 2))
+		hopping = rewards.tolerance(physics.speed(),
+									bounds=(-float('inf'), -_HOP_SPEED/2),
+									margin=_HOP_SPEED/4,
+									value_at_margin=0.5,
+									sigmoid='linear')
+		return standing * hopping
+	
+	def _flip_reward(self, physics, forward=True):
+		reward = rewards.tolerance((1. if forward else -1.) * physics.angmomentum(),
+								   bounds=(_SPIN_SPEED, float('inf')),
+								   margin=_SPIN_SPEED/2,
+								   value_at_margin=0,
+								   sigmoid='linear')
+		return reward
+
+
+	def get_reward(self, physics):
+		if self._goal == 'hop-backwards':
+			return self._hop_backwards_reward(physics)
+		elif self._goal == 'flip':
+			return self._flip_reward(physics, forward=True)
+		elif self._goal == 'flip-backwards':
+			return self._flip_reward(physics, forward=False)
+		else:
+			raise NotImplementedError(f'Goal {self._goal} is not implemented.')
+
+
+if __name__ == '__main__':
+	env = hop_backwards()
+	obs = env.reset()
+	import numpy as np
+	next_obs, reward, done, info = env.step(np.zeros(2))
+	print(reward)
diff --git a/tdmpc2/envs/tasks/hopper.xml b/tdmpc2/envs/tasks/hopper.xml
new file mode 100644
index 0000000..84ad72e
--- /dev/null
+++ b/tdmpc2/envs/tasks/hopper.xml
@@ -0,0 +1,66 @@
+<mujoco model="planar hopper">
+  <include file="./common/skybox.xml"/>
+  <include file="./common/visual.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <statistic extent="2" center="0 0 .5"/>
+
+  <default>
+    <default class="hopper">
+      <joint type="hinge" axis="0 1 0" limited="true" damping=".05" armature=".2"/>
+      <geom type="capsule" material="self"/>
+      <site type="sphere" size="0.05" group="3"/>
+    </default>
+    <default class="free">
+      <joint limited="false" damping="0" armature="0" stiffness="0"/>
+    </default>
+    <motor ctrlrange="-1 1" ctrllimited="true"/>
+  </default>
+
+  <option timestep="0.005"/>
+
+  <worldbody>
+    <camera name="cam0" pos="0 -2.8 0.8" euler="90 0 0" mode="trackcom"/>
+    <camera name="back" pos="-2 -.2 1.2" xyaxes="0.2 -1 0 .5 0 2" mode="trackcom"/>
+    <geom name="floor" type="plane" conaffinity="1" pos="48 0 0" size="50 1 .2" material="grid"/>
+    <body name="torso" pos="0 0 1" childclass="hopper">
+      <light name="top" pos="0 0 2" mode="trackcom"/>
+      <joint name="rootx" type="slide" axis="1 0 0" class="free"/>
+      <joint name="rootz" type="slide" axis="0 0 1" class="free"/>
+      <joint name="rooty" type="hinge" axis="0 1 0" class="free"/>
+      <geom name="torso" fromto="0 0 -.05 0 0 .2" size="0.0653"/>
+      <geom name="nose" fromto=".08 0 .13 .15 0 .14" size="0.03"/>
+      <body name="pelvis" pos="0 0 -.05">
+        <joint name="waist" range="-30 30"/>
+        <geom name="pelvis" fromto="0 0 0 0 0 -.15" size="0.065"/>
+        <body name="thigh" pos="0 0 -.2">
+          <joint name="hip" range="-170 10"/>
+          <geom name="thigh" fromto="0 0 0 0 0 -.33" size="0.04"/>
+          <body name="calf" pos="0 0 -.33">
+            <joint name="knee" range="5 150"/>
+            <geom name="calf" fromto="0 0 0 0 0 -.32" size="0.03"/>
+            <body name="foot" pos="0 0 -.32">
+              <joint name="ankle" range="-45 45"/>
+              <geom name="foot" fromto="-.08 0 0 .17 0 0" size="0.04"/>
+              <site name="touch_toe" pos=".17 0 0"/>
+              <site name="touch_heel" pos="-.08 0 0"/>
+            </body>
+          </body>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+
+  <sensor>
+    <subtreelinvel name="torso_subtreelinvel" body="torso"/>
+    <touch name="touch_toe" site="touch_toe"/>
+    <touch name="touch_heel" site="touch_heel"/>
+  </sensor>
+
+  <actuator>
+    <motor name="waist" joint="waist" gear="30"/>
+    <motor name="hip" joint="hip" gear="40"/>
+    <motor name="knee" joint="knee" gear="30"/>
+    <motor name="ankle" joint="ankle" gear="10"/>
+  </actuator>
+</mujoco>
diff --git a/tdmpc2/envs/tasks/pendulum.py b/tdmpc2/envs/tasks/pendulum.py
new file mode 100644
index 0000000..3a5b636
--- /dev/null
+++ b/tdmpc2/envs/tasks/pendulum.py
@@ -0,0 +1,43 @@
+import os
+
+from dm_control.rl import control
+from dm_control.suite import pendulum
+from dm_control.suite import common
+from dm_control.utils import rewards
+from dm_control.utils import io as resources
+import numpy as np
+
+_TASKS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'tasks')
+
+_DEFAULT_TIME_LIMIT = 20
+_TARGET_SPEED = 9.
+
+
+def get_model_and_assets():
+    """Returns a tuple containing the model XML string and a dict of assets."""
+    return resources.GetResource(os.path.join(_TASKS_DIR, 'pendulum.xml')), common.ASSETS
+
+
+@pendulum.SUITE.add('custom')
+def spin(time_limit=_DEFAULT_TIME_LIMIT, random=None,
+            environment_kwargs=None):
+  """Returns pendulum spin task."""
+  physics = pendulum.Physics.from_xml_string(*get_model_and_assets())
+  task = Spin(random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, **environment_kwargs)
+
+
+class Spin(pendulum.SwingUp):
+  """A custom Pendulum Spin task."""
+
+  def __init__(self, random=None):
+    super().__init__(random=random)
+
+  def get_reward(self, physics):
+    return rewards.tolerance(np.linalg.norm(physics.angular_velocity()),
+                             bounds=(_TARGET_SPEED, float('inf')),
+                             margin=_TARGET_SPEED/2,
+                             value_at_margin=0.5,
+                            sigmoid='linear')
diff --git a/tdmpc2/envs/tasks/pendulum.xml b/tdmpc2/envs/tasks/pendulum.xml
new file mode 100644
index 0000000..14377ae
--- /dev/null
+++ b/tdmpc2/envs/tasks/pendulum.xml
@@ -0,0 +1,26 @@
+<mujoco model="pendulum">
+  <include file="./common/visual.xml"/>
+  <include file="./common/skybox.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <option timestep="0.02">
+    <flag contact="disable" energy="enable"/>
+  </option>
+
+  <worldbody>
+    <light name="light" pos="0 0 2"/>
+    <geom name="floor" size="2 2 .2" type="plane" material="grid"/>
+    <camera name="fixed" pos="0 -1.5 2" xyaxes='1 0 0 0 1 1'/>
+    <camera name="lookat" mode="targetbodycom" target="pole" pos="0 -2 1"/>
+    <body name="pole" pos="0 0 .6">
+      <joint name="hinge" type="hinge" axis="0 1 0" damping="0.1"/>
+      <geom name="base" material="decoration" type="cylinder" fromto="0 -.03 0 0 .03 0" size="0.021" mass="0"/>
+      <geom name="pole" material="self" type="capsule" fromto="0 0 0 0 0 0.5" size="0.02" mass="0"/>
+      <geom name="mass" material="effector" type="sphere" pos="0 0 0.5" size="0.05" mass="1"/>
+    </body>
+  </worldbody>
+
+  <actuator>
+    <motor name="torque" joint="hinge" gear="1" ctrlrange="-1 1" ctrllimited="true"/>
+  </actuator>
+</mujoco>
diff --git a/tdmpc2/envs/tasks/reacher.py b/tdmpc2/envs/tasks/reacher.py
new file mode 100644
index 0000000..4c1778e
--- /dev/null
+++ b/tdmpc2/envs/tasks/reacher.py
@@ -0,0 +1,89 @@
+import collections
+import os
+
+from dm_control import mujoco
+from dm_control.rl import control
+from dm_control.suite import common
+from dm_control.suite import reacher
+from dm_control.utils import io as resources
+import numpy as np
+
+_TASKS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'tasks')
+
+_DEFAULT_TIME_LIMIT = 20
+_BIG_TARGET = .05
+_SMALL_TARGET = .015
+
+
+def get_model_and_assets(links):
+    """Returns a tuple containing the model XML string and a dict of assets."""
+    assert links in {3, 4}, 'Only 3 or 4 links are supported.'
+    fn = 'reacher_three_links.xml' if links == 3 else 'reacher_four_links.xml'
+    return resources.GetResource(os.path.join(_TASKS_DIR, fn)), common.ASSETS
+
+
+@reacher.SUITE.add('custom')
+def three_easy(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns three-link reacher with sparse reward with 5e-2 tol and randomized target."""
+  physics = Physics.from_xml_string(*get_model_and_assets(links=3))
+  task = CustomThreeLinkReacher(target_size=_BIG_TARGET, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, **environment_kwargs)
+
+
+@reacher.SUITE.add('custom')
+def three_hard(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns three-link reacher with sparse reward with 1e-2 tol and randomized target."""
+  physics = Physics.from_xml_string(*get_model_and_assets(links=3))
+  task = CustomThreeLinkReacher(target_size=_SMALL_TARGET, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, **environment_kwargs)
+
+
+@reacher.SUITE.add('custom')
+def four_easy(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns three-link reacher with sparse reward with 5e-2 tol and randomized target."""
+  physics = Physics.from_xml_string(*get_model_and_assets(links=4))
+  task = CustomThreeLinkReacher(target_size=_BIG_TARGET, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, **environment_kwargs)
+
+
+@reacher.SUITE.add('custom')
+def four_hard(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns three-link reacher with sparse reward with 1e-2 tol and randomized target."""
+  physics = Physics.from_xml_string(*get_model_and_assets(links=4))
+  task = CustomThreeLinkReacher(target_size=_SMALL_TARGET, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, **environment_kwargs)
+
+
+class Physics(mujoco.Physics):
+  """Physics simulation with additional features for the Reacher domain."""
+
+  def finger_to_target(self):
+    """Returns the vector from target to finger in global coordinates."""
+    return (self.named.data.geom_xpos['target', :2] -
+            self.named.data.geom_xpos['finger', :2])
+
+  def finger_to_target_dist(self):
+    """Returns the signed distance between the finger and target surface."""
+    return np.linalg.norm(self.finger_to_target())
+
+
+class CustomThreeLinkReacher(reacher.Reacher):
+  """Custom Reacher tasks."""
+
+  def __init__(self, target_size, random=None):
+    super().__init__(target_size, random)
+
+  def get_observation(self, physics):
+    obs = collections.OrderedDict()
+    obs['position'] = physics.position()
+    obs['to_target'] = physics.finger_to_target()
+    obs['velocity'] = physics.velocity()
+    return obs
diff --git a/tdmpc2/envs/tasks/reacher_four_links.xml b/tdmpc2/envs/tasks/reacher_four_links.xml
new file mode 100644
index 0000000..d5aa8e5
--- /dev/null
+++ b/tdmpc2/envs/tasks/reacher_four_links.xml
@@ -0,0 +1,57 @@
+<mujoco model="two-link planar reacher">
+  <include file="./common/skybox.xml"/>
+  <include file="./common/visual.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <option timestep="0.02">
+    <flag contact="disable"/>
+  </option>
+
+  <default>
+    <joint type="hinge" axis="0 0 1" damping="0.01"/>
+    <motor gear=".05" ctrlrange="-1 1" ctrllimited="true"/>
+  </default>
+
+  <worldbody>
+    <light name="light" pos="0 0 1"/>
+    <camera name="fixed" pos="0 0 .75" quat="1 0 0 0"/>
+    <!-- Arena -->
+    <geom name="ground" type="plane" pos="0 0 0" size=".3 .3 10" material="grid"/>
+    <geom name="wall_x" type="plane" pos="-.3 0 .02" zaxis="1 0 0"  size=".02 .3 .02" material="decoration"/>
+    <geom name="wall_y" type="plane" pos="0 -.3 .02" zaxis="0 1 0"  size=".3 .02 .02" material="decoration"/>
+    <geom name="wall_neg_x" type="plane" pos=".3 0 .02" zaxis="-1 0 0"  size=".02 .3 .02" material="decoration"/>
+    <geom name="wall_neg_y" type="plane" pos="0 .3 .02" zaxis="0 -1 0"  size=".3 .02 .02" material="decoration"/>
+
+    <!-- Arm -->
+    <geom name="root" type="cylinder" fromto="0 0 0 0 0 0.02" size=".011" material="decoration"/>
+    <body name="arm0" pos="0 0 .01">
+      <geom name="arm0" type="capsule" fromto="0 0 0 0.06 0 0" size=".01" material="self"/>
+      <joint name="shoulder0"/>
+      <body name="arm1" pos=".06 0 0">
+        <geom name="arm1" type="capsule" fromto="0 0 0 0.06 0 0" size=".01" material="self"/>
+        <joint name="shoulder1" limited="true" range="-80 80"/>
+        <body name="arm2" pos=".06 0 0">
+            <geom name="arm2" type="capsule" fromto="0 0 0 0.06 0 0" size=".01" material="self"/>
+            <joint name="shoulder2" limited="true" range="-80 80"/>
+            <body name="hand" pos=".06 0 0">
+                <geom name="hand" type="capsule" fromto="0 0 0 0.1 0 0" size=".01" material="self"/>
+                <joint name="wrist" limited="true" range="-80 80"/>
+                <body name="finger" pos=".06 0 0">
+                    <camera name="hand" pos="0 0 .2" mode="track"/>
+                    <geom name="finger" type="sphere" size=".01" material="effector"/>
+                </body>
+            </body>
+        </body>
+      </body>
+    </body>
+    <!-- Target -->
+    <geom name="target" pos="0 0 .01" material="target" type="sphere" size=".05"/>
+  </worldbody>
+
+  <actuator>
+    <motor name="shoulder0" joint="shoulder0"/>
+    <motor name="shoulder1" joint="shoulder1"/>
+    <motor name="shoulder2" joint="shoulder2"/>
+    <motor name="wrist" joint="wrist"/>
+  </actuator>
+</mujoco>
diff --git a/tdmpc2/envs/tasks/reacher_three_links.xml b/tdmpc2/envs/tasks/reacher_three_links.xml
new file mode 100644
index 0000000..f32f4bc
--- /dev/null
+++ b/tdmpc2/envs/tasks/reacher_three_links.xml
@@ -0,0 +1,52 @@
+<mujoco model="two-link planar reacher">
+  <include file="./common/skybox.xml"/>
+  <include file="./common/visual.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <option timestep="0.02">
+    <flag contact="disable"/>
+  </option>
+
+  <default>
+    <joint type="hinge" axis="0 0 1" damping="0.01"/>
+    <motor gear=".05" ctrlrange="-1 1" ctrllimited="true"/>
+  </default>
+
+  <worldbody>
+    <light name="light" pos="0 0 1"/>
+    <camera name="fixed" pos="0 0 .75" quat="1 0 0 0"/>
+    <!-- Arena -->
+    <geom name="ground" type="plane" pos="0 0 0" size=".3 .3 10" material="grid"/>
+    <geom name="wall_x" type="plane" pos="-.3 0 .02" zaxis="1 0 0"  size=".02 .3 .02" material="decoration"/>
+    <geom name="wall_y" type="plane" pos="0 -.3 .02" zaxis="0 1 0"  size=".3 .02 .02" material="decoration"/>
+    <geom name="wall_neg_x" type="plane" pos=".3 0 .02" zaxis="-1 0 0"  size=".02 .3 .02" material="decoration"/>
+    <geom name="wall_neg_y" type="plane" pos="0 .3 .02" zaxis="0 -1 0"  size=".3 .02 .02" material="decoration"/>
+
+    <!-- Arm -->
+    <geom name="root" type="cylinder" fromto="0 0 0 0 0 0.02" size=".011" material="decoration"/>
+    <body name="arm0" pos="0 0 .01">
+      <geom name="arm0" type="capsule" fromto="0 0 0 0.09 0 0" size=".01" material="self"/>
+      <joint name="shoulder0"/>
+      <body name="arm1" pos=".09 0 0">
+        <geom name="arm1" type="capsule" fromto="0 0 0 0.09 0 0" size=".01" material="self"/>
+        <joint name="shoulder1" limited="true" range="-80 80"/>
+        <body name="hand" pos=".09 0 0">
+            <geom name="hand" type="capsule" fromto="0 0 0 0.1 0 0" size=".01" material="self"/>
+            <joint name="wrist" limited="true" range="-80 80"/>
+            <body name="finger" pos=".09 0 0">
+              <camera name="hand" pos="0 0 .2" mode="track"/>
+              <geom name="finger" type="sphere" size=".01" material="effector"/>
+            </body>
+        </body>
+      </body>
+    </body>
+    <!-- Target -->
+    <geom name="target" pos="0 0 .01" material="target" type="sphere" size=".05"/>
+  </worldbody>
+
+  <actuator>
+    <motor name="shoulder0" joint="shoulder0"/>
+    <motor name="shoulder1" joint="shoulder1"/>
+    <motor name="wrist" joint="wrist"/>
+  </actuator>
+</mujoco>
diff --git a/tdmpc2/envs/tasks/walker.py b/tdmpc2/envs/tasks/walker.py
new file mode 100644
index 0000000..d04c404
--- /dev/null
+++ b/tdmpc2/envs/tasks/walker.py
@@ -0,0 +1,223 @@
+import os
+
+from dm_control.rl import control
+from dm_control.suite import common
+from dm_control.suite import walker
+from dm_control.utils import rewards
+from dm_control.utils import io as resources
+
+_TASKS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'tasks')
+
+_YOGA_STAND_HEIGHT = 1.0
+_YOGA_LIE_DOWN_HEIGHT = 0.08
+_YOGA_LEGS_UP_HEIGHT = 1.1
+
+
+def get_model_and_assets():
+    """Returns a tuple containing the model XML string and a dict of assets."""
+    return resources.GetResource(os.path.join(_TASKS_DIR, 'walker.xml')), common.ASSETS
+
+
+@walker.SUITE.add('custom')
+def walk_backwards(time_limit=walker._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Walk Backwards task."""
+  physics = walker.Physics.from_xml_string(*get_model_and_assets())
+  task = BackwardsPlanarWalker(move_speed=walker._WALK_SPEED, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=walker._CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+@walker.SUITE.add('custom')
+def run_backwards(time_limit=walker._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Run Backwards task."""
+  physics = walker.Physics.from_xml_string(*get_model_and_assets())
+  task = BackwardsPlanarWalker(move_speed=walker._RUN_SPEED, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=walker._CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+@walker.SUITE.add('custom')
+def arabesque(time_limit=walker._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Arabesque task."""
+  physics = walker.Physics.from_xml_string(*get_model_and_assets())
+  task = YogaPlanarWalker(goal='arabesque', random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=walker._CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+@walker.SUITE.add('custom')
+def lie_down(time_limit=walker._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Lie Down task."""
+  physics = walker.Physics.from_xml_string(*get_model_and_assets())
+  task = YogaPlanarWalker(goal='lie_down', random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=walker._CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+@walker.SUITE.add('custom')
+def legs_up(time_limit=walker._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Legs Up task."""
+  physics = walker.Physics.from_xml_string(*get_model_and_assets())
+  task = YogaPlanarWalker(goal='legs_up', random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=walker._CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+@walker.SUITE.add('custom')
+def headstand(time_limit=walker._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Headstand task."""
+  physics = walker.Physics.from_xml_string(*get_model_and_assets())
+  task = YogaPlanarWalker(goal='flip', move_speed=0, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=walker._CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+@walker.SUITE.add('custom')
+def flip(time_limit=walker._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Flip task."""
+  physics = walker.Physics.from_xml_string(*get_model_and_assets())
+  task = YogaPlanarWalker(goal='flip', move_speed=walker._RUN_SPEED*0.75, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=walker._CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+@walker.SUITE.add('custom')
+def backflip(time_limit=walker._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None):
+  """Returns the Backflip task."""
+  physics = walker.Physics.from_xml_string(*get_model_and_assets())
+  task = YogaPlanarWalker(goal='flip', move_speed=-walker._RUN_SPEED*0.75, random=random)
+  environment_kwargs = environment_kwargs or {}
+  return control.Environment(
+      physics, task, time_limit=time_limit, control_timestep=walker._CONTROL_TIMESTEP,
+      **environment_kwargs)
+
+
+class BackwardsPlanarWalker(walker.PlanarWalker):
+    """Backwards PlanarWalker task."""
+    def __init__(self, move_speed, random=None):
+        super().__init__(move_speed, random)
+    
+    def get_reward(self, physics):
+        standing = rewards.tolerance(physics.torso_height(),
+                                 bounds=(walker._STAND_HEIGHT, float('inf')),
+                                 margin=walker._STAND_HEIGHT/2)
+        upright = (1 + physics.torso_upright()) / 2
+        stand_reward = (3*standing + upright) / 4
+        if self._move_speed == 0:
+            return stand_reward
+        else:
+            move_reward = rewards.tolerance(physics.horizontal_velocity(),
+                                            bounds=(-float('inf'), -self._move_speed),
+                                            margin=self._move_speed/2,
+                                            value_at_margin=0.5,
+                                            sigmoid='linear')
+            return stand_reward * (5*move_reward + 1) / 6
+
+
+class YogaPlanarWalker(walker.PlanarWalker):
+    """Yoga PlanarWalker tasks."""
+    
+    def __init__(self, goal='arabesque', move_speed=0, random=None):
+        super().__init__(0, random)
+        self._goal = goal
+        self._move_speed = move_speed
+    
+    def _arabesque_reward(self, physics):
+        standing = rewards.tolerance(physics.torso_height(),
+                                bounds=(_YOGA_STAND_HEIGHT, float('inf')),
+                                margin=_YOGA_STAND_HEIGHT/2)
+        left_foot_height = physics.named.data.xpos['left_foot', 'z']
+        right_foot_height = physics.named.data.xpos['right_foot', 'z']
+        left_foot_down = rewards.tolerance(left_foot_height,
+                                bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT),
+                                margin=_YOGA_STAND_HEIGHT/2)
+        right_foot_up = rewards.tolerance(right_foot_height,
+                                bounds=(_YOGA_STAND_HEIGHT, float('inf')),
+                                margin=_YOGA_STAND_HEIGHT/2)
+        upright = (1 - physics.torso_upright()) / 2
+        arabesque_reward = (3*standing + left_foot_down + right_foot_up + upright) / 6
+        return arabesque_reward
+    
+    def _lie_down_reward(self, physics):
+        torso_down = rewards.tolerance(physics.torso_height(),
+                                bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT),
+                                margin=_YOGA_LIE_DOWN_HEIGHT/2)
+        thigh_height = (physics.named.data.xpos['left_thigh', 'z'] + physics.named.data.xpos['right_thigh', 'z']) / 2
+        thigh_down = rewards.tolerance(thigh_height,
+                                bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT),
+                                margin=_YOGA_LIE_DOWN_HEIGHT/2)
+        feet_height = (physics.named.data.xpos['left_foot', 'z'] + physics.named.data.xpos['right_foot', 'z']) / 2
+        feet_down = rewards.tolerance(feet_height,
+                                bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT),
+                                margin=_YOGA_LIE_DOWN_HEIGHT/2)
+        upright = (1 - physics.torso_upright()) / 2
+        lie_down_reward = (3*torso_down + thigh_down + upright) / 5
+        return lie_down_reward
+    
+    def _legs_up_reward(self, physics):
+        torso_down = rewards.tolerance(physics.torso_height(),
+                                bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT),
+                                margin=_YOGA_LIE_DOWN_HEIGHT/2)
+        thigh_height = (physics.named.data.xpos['left_thigh', 'z'] + physics.named.data.xpos['right_thigh', 'z']) / 2
+        thigh_down = rewards.tolerance(thigh_height,
+                                bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT),
+                                margin=_YOGA_LIE_DOWN_HEIGHT/2)
+        feet_height = (physics.named.data.xpos['left_foot', 'z'] + physics.named.data.xpos['right_foot', 'z']) / 2
+        legs_up = rewards.tolerance(feet_height,
+                                bounds=(_YOGA_LEGS_UP_HEIGHT, float('inf')),
+                                margin=_YOGA_LEGS_UP_HEIGHT/2)
+        upright = (1 - physics.torso_upright()) / 2
+        legs_up_reward = (3*torso_down + 2*legs_up + thigh_down + upright) / 7
+        return legs_up_reward
+    
+    def _flip_reward(self, physics):
+        thigh_height = (physics.named.data.xpos['left_thigh', 'z'] + physics.named.data.xpos['right_thigh', 'z']) / 2
+        thigh_up = rewards.tolerance(thigh_height,
+                                bounds=(_YOGA_STAND_HEIGHT, float('inf')),
+                                margin=_YOGA_STAND_HEIGHT/2)
+        feet_height = (physics.named.data.xpos['left_foot', 'z'] + physics.named.data.xpos['right_foot', 'z']) / 2
+        legs_up = rewards.tolerance(feet_height,
+                                bounds=(_YOGA_LEGS_UP_HEIGHT, float('inf')),
+                                margin=_YOGA_LEGS_UP_HEIGHT/2)
+        upside_down_reward = (3*legs_up + 2*thigh_up) / 5
+        if self._move_speed == 0:
+            return upside_down_reward
+        move_reward = rewards.tolerance(physics.horizontal_velocity(),
+                                    bounds=(self._move_speed, float('inf')) if self._move_speed > 0 else (-float('inf'), self._move_speed),
+                                    margin=abs(self._move_speed)/2,
+                                    value_at_margin=0.5,
+                                    sigmoid='linear')
+        return upside_down_reward * (5*move_reward + 1) / 6
+    
+    def get_reward(self, physics):
+        if self._goal == 'arabesque':
+            return self._arabesque_reward(physics)
+        elif self._goal == 'lie_down':
+            return self._lie_down_reward(physics)
+        elif self._goal == 'legs_up':
+            return self._legs_up_reward(physics)
+        elif self._goal == 'flip':
+            return self._flip_reward(physics)
+        else:
+            raise NotImplementedError(f'Goal {self._goal} is not implemented.')
+
+
+if __name__ == '__main__':
+    env = legs_up()
+    obs = env.reset()
+    import numpy as np
+    next_obs, reward, done, info = env.step(np.zeros(6))
diff --git a/tdmpc2/envs/tasks/walker.xml b/tdmpc2/envs/tasks/walker.xml
new file mode 100644
index 0000000..1d17637
--- /dev/null
+++ b/tdmpc2/envs/tasks/walker.xml
@@ -0,0 +1,70 @@
+<mujoco model="planar walker">
+  <include file="./common/visual.xml"/>
+  <include file="./common/skybox.xml"/>
+  <include file="./common/materials.xml"/>
+
+  <option timestep="0.0025"/>
+
+  <statistic extent="2" center="0 0 1"/>
+
+  <default>
+    <joint damping=".1" armature="0.01" limited="true" solimplimit="0 .99 .01"/>
+    <geom contype="1" conaffinity="0" friction=".7 .1 .1"/>
+    <motor ctrlrange="-1 1" ctrllimited="true"/>
+    <site size="0.01"/>
+    <default class="walker">
+      <geom material="self" type="capsule"/>
+      <joint axis="0 -1 0"/>
+    </default>
+  </default>
+
+  <worldbody>
+    <geom name="floor" type="plane" conaffinity="1" pos="248 0 0" size="500 .8 .2" material="grid" zaxis="0 0 1"/>
+    <body name="torso" pos="0 0 1.3" childclass="walker">
+      <light name="light" pos="0 0 2" mode="trackcom"/>
+      <camera name="side" pos="0 -2 .7" euler="60 0 0" mode="trackcom"/>
+      <camera name="back" pos="-2 0 .5" xyaxes="0 -1 0 1 0 3" mode="trackcom"/>
+      <joint name="rootz" axis="0 0 1" type="slide" limited="false" armature="0" damping="0"/>
+      <joint name="rootx" axis="1 0 0" type="slide" limited="false" armature="0" damping="0"/>
+      <joint name="rooty" axis="0 1 0" type="hinge" limited="false" armature="0" damping="0"/>
+      <geom name="torso" size="0.07 0.3"/>
+      <body name="right_thigh" pos="0 -.05 -0.3">
+        <joint name="right_hip" range="-20 100"/>
+        <geom name="right_thigh" pos="0 0 -0.225" size="0.05 0.225"/>
+        <body name="right_leg" pos="0 0 -0.7">
+          <joint name="right_knee" pos="0 0 0.25" range="-150 0"/>
+          <geom name="right_leg" size="0.04 0.25"/>
+          <body name="right_foot" pos="0.06 0 -0.25">
+            <joint name="right_ankle" pos="-0.06 0 0" range="-45 45"/>
+            <geom name="right_foot" zaxis="1 0 0" size="0.05 0.1"/>
+          </body>
+        </body>
+      </body>
+      <body name="left_thigh" pos="0 .05 -0.3" >
+        <joint name="left_hip" range="-20 100"/>
+        <geom name="left_thigh" pos="0 0 -0.225" size="0.05 0.225"/>
+        <body name="left_leg" pos="0 0 -0.7">
+          <joint name="left_knee" pos="0 0 0.25" range="-150 0"/>
+          <geom name="left_leg" size="0.04 0.25"/>
+          <body name="left_foot" pos="0.06 0 -0.25">
+            <joint name="left_ankle" pos="-0.06 0 0" range="-45 45"/>
+            <geom name="left_foot" zaxis="1 0 0" size="0.05 0.1"/>
+          </body>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+
+  <sensor>
+    <subtreelinvel name="torso_subtreelinvel" body="torso"/>
+  </sensor>
+
+  <actuator>
+    <motor name="right_hip" joint="right_hip" gear="100"/>
+    <motor name="right_knee" joint="right_knee" gear="50"/>
+    <motor name="right_ankle" joint="right_ankle" gear="20"/>
+    <motor name="left_hip" joint="left_hip" gear="100"/>
+    <motor name="left_knee" joint="left_knee" gear="50"/>
+    <motor name="left_ankle" joint="left_ankle" gear="20"/>
+  </actuator>
+</mujoco>
diff --git a/tdmpc2/envs/wrappers/__init__.py b/tdmpc2/envs/wrappers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tdmpc2/envs/wrappers/multitask.py b/tdmpc2/envs/wrappers/multitask.py
new file mode 100644
index 0000000..08dd4eb
--- /dev/null
+++ b/tdmpc2/envs/wrappers/multitask.py
@@ -0,0 +1,57 @@
+import gym
+import numpy as np
+import torch
+
+
+class MultitaskWrapper(gym.Wrapper):
+	"""
+	Wrapper for multi-task environments.
+	"""
+
+	def __init__(self, cfg, envs):
+		super().__init__(envs[0])
+		self.cfg = cfg
+		self.envs = envs
+		self._task = cfg.tasks[0]
+		self._task_idx = 0
+		self._obs_dims = [env.observation_space.shape[0] for env in self.envs]
+		self._action_dims = [env.action_space.shape[0] for env in self.envs]
+		self._episode_lengths = [env.max_episode_steps for env in self.envs]
+		self._obs_shape = (max(self._obs_dims),)
+		self._action_dim = max(self._action_dims)
+		self.observation_space = gym.spaces.Box(
+			low=-np.inf, high=np.inf, shape=self._obs_shape, dtype=np.float32
+		)
+		self.action_space = gym.spaces.Box(
+			low=-1, high=1, shape=(self._action_dim,), dtype=np.float32
+		)
+	
+	@property
+	def task(self):
+		return self._task
+	
+	@property
+	def task_idx(self):
+		return self._task_idx
+	
+	@property
+	def _env(self):
+		return self.envs[self.task_idx]
+
+	def rand_act(self):
+		return torch.from_numpy(self.action_space.sample().astype(np.float32))
+
+	def _pad_obs(self, obs):
+		if obs.shape != self._obs_shape:
+			obs = torch.cat((obs, torch.zeros(self._obs_shape[0]-obs.shape[0], dtype=obs.dtype, device=obs.device)))
+		return obs
+	
+	def reset(self, task_idx=-1):
+		self._task_idx = task_idx
+		self._task = self.cfg.tasks[task_idx]
+		self.env = self._env
+		return self._pad_obs(self.env.reset())
+
+	def step(self, action):
+		obs, reward, done, info = self.env.step(action[:self.env.action_space.shape[0]])
+		return self._pad_obs(obs), reward, done, info
diff --git a/tdmpc2/envs/wrappers/tensor.py b/tdmpc2/envs/wrappers/tensor.py
new file mode 100644
index 0000000..548a5f4
--- /dev/null
+++ b/tdmpc2/envs/wrappers/tensor.py
@@ -0,0 +1,40 @@
+from collections import defaultdict
+
+import gym
+import numpy as np
+import torch
+
+
+class TensorWrapper(gym.Wrapper):
+	"""
+	Wrapper for converting numpy arrays to torch tensors.
+	"""
+
+	def __init__(self, env):
+		super().__init__(env)
+	
+	def rand_act(self):
+		return torch.from_numpy(self.action_space.sample().astype(np.float32))
+
+	def _try_f32_tensor(self, x):
+		x = torch.from_numpy(x)
+		if x.dtype == torch.float64:
+			x = x.float()
+		return x
+
+	def _obs_to_tensor(self, obs):
+		if isinstance(obs, dict):
+			for k in obs.keys():
+				obs[k] = self._try_f32_tensor(obs[k])
+		else:
+			obs = self._try_f32_tensor(obs)
+		return obs
+
+	def reset(self, task_idx=None):
+		return self._obs_to_tensor(self.env.reset())
+
+	def step(self, action):
+		obs, reward, done, info = self.env.step(action.numpy())
+		info = defaultdict(float, info)
+		info['success'] = float(info['success'])
+		return self._obs_to_tensor(obs), torch.tensor(reward, dtype=torch.float32), done, info
diff --git a/tdmpc2/envs/wrappers/time_limit.py b/tdmpc2/envs/wrappers/time_limit.py
new file mode 100644
index 0000000..f81c281
--- /dev/null
+++ b/tdmpc2/envs/wrappers/time_limit.py
@@ -0,0 +1,72 @@
+"""
+Wrapper for limiting the time steps of an environment.
+Source: https://github.com/openai/gym/blob/3498617bf031538a808b75b932f4ed2c11896a3e/gym/wrappers/time_limit.py
+"""
+from typing import Optional
+
+import gym
+
+
+class TimeLimit(gym.Wrapper):
+    """This wrapper will issue a `done` signal if a maximum number of timesteps is exceeded.
+
+    Oftentimes, it is **very** important to distinguish `done` signals that were produced by the
+    :class:`TimeLimit` wrapper (truncations) and those that originate from the underlying environment (terminations).
+    This can be done by looking at the ``info`` that is returned when `done`-signal was issued.
+    The done-signal originates from the time limit (i.e. it signifies a *truncation*) if and only if
+    the key `"TimeLimit.truncated"` exists in ``info`` and the corresponding value is ``True``.
+
+    Example:
+       >>> from gym.envs.classic_control import CartPoleEnv
+       >>> from gym.wrappers import TimeLimit
+       >>> env = CartPoleEnv()
+       >>> env = TimeLimit(env, max_episode_steps=1000)
+    """
+
+    def __init__(self, env: gym.Env, max_episode_steps: Optional[int] = None):
+        """Initializes the :class:`TimeLimit` wrapper with an environment and the number of steps after which truncation will occur.
+
+        Args:
+            env: The environment to apply the wrapper
+            max_episode_steps: An optional max episode steps (if ``Ǹone``, ``env.spec.max_episode_steps`` is used)
+        """
+        super().__init__(env)
+        if max_episode_steps is None and self.env.spec is not None:
+            max_episode_steps = env.spec.max_episode_steps
+        if self.env.spec is not None:
+            self.env.spec.max_episode_steps = max_episode_steps
+        self._max_episode_steps = max_episode_steps
+        self._elapsed_steps = None
+
+    def step(self, action):
+        """Steps through the environment and if the number of steps elapsed exceeds ``max_episode_steps`` then truncate.
+
+        Args:
+            action: The environment step action
+
+        Returns:
+            The environment step ``(observation, reward, done, info)`` with "TimeLimit.truncated"=True
+            when truncated (the number of steps elapsed >= max episode steps) or
+            "TimeLimit.truncated"=False if the environment terminated
+        """
+        observation, reward, done, info = self.env.step(action)
+        self._elapsed_steps += 1
+        if self._elapsed_steps >= self._max_episode_steps:
+            # TimeLimit.truncated key may have been already set by the environment
+            # do not overwrite it
+            episode_truncated = not done or info.get("TimeLimit.truncated", False)
+            info["TimeLimit.truncated"] = episode_truncated
+            done = True
+        return observation, reward, done, info
+
+    def reset(self, **kwargs):
+        """Resets the environment with :param:`**kwargs` and sets the number of steps elapsed to zero.
+
+        Args:
+            **kwargs: The kwargs to reset the environment with
+
+        Returns:
+            The reset environment
+        """
+        self._elapsed_steps = 0
+        return self.env.reset(**kwargs)
diff --git a/tdmpc2/evaluate.py b/tdmpc2/evaluate.py
new file mode 100755
index 0000000..f5b8628
--- /dev/null
+++ b/tdmpc2/evaluate.py
@@ -0,0 +1,103 @@
+import os
+os.environ['MUJOCO_GL'] = 'egl'
+import warnings
+warnings.filterwarnings('ignore')
+
+import hydra
+import imageio
+import numpy as np
+import torch
+from termcolor import colored
+
+from common.parser import parse_cfg
+from common.seed import set_seed
+from envs import make_env
+from tdmpc2 import TDMPC2
+
+torch.backends.cudnn.benchmark = True
+
+
+@hydra.main(config_name='config', config_path='.')
+def evaluate(cfg: dict):
+	"""
+	Script for evaluating a single-task / multi-task TD-MPC2 checkpoint.
+
+	Most relevant args:
+		`task`: task name (or mt30/mt80 for multi-task evaluation)
+		`model_size`: model size, must be one of `[1, 5, 19, 48, 317]` (default: 5)
+		`checkpoint`: path to model checkpoint to load
+		`eval_episodes`: number of episodes to evaluate on per task (default: 10)
+		`save_video`: whether to save a video of the evaluation (default: True)
+		`seed`: random seed (default: 1)
+	
+	See config.yaml for a full list of args.
+
+	Example usage:
+	````
+		$ python evaluate.py task=mt80 model_size=48 checkpoint=/path/to/mt80-48M.pt
+		$ python evaluate.py task=mt30 model_size=317 checkpoint=/path/to/mt30-317M.pt
+		$ python evaluate.py task=dog-run checkpoint=/path/to/dog-1.pt save_video=true
+	```
+	"""
+	assert torch.cuda.is_available()
+	assert cfg.eval_episodes > 0, 'Must evaluate at least 1 episode.'
+	cfg = parse_cfg(cfg)
+	set_seed(cfg.seed)
+	print(colored(f'Task: {cfg.task}', 'blue', attrs=['bold']))
+	print(colored(f'Model size: {cfg.model_size}', 'blue', attrs=['bold']))
+	print(colored(f'Checkpoint: {cfg.checkpoint}', 'blue', attrs=['bold']))
+	if not cfg.multitask and ('mt80' in cfg.checkpoint or 'mt30' in cfg.checkpoint):
+		print(colored('Warning: single-task evaluation of multi-task models is not currently supported.', 'red', attrs=['bold']))
+		print(colored('To evaluate a multi-task model, use task=mt80 or task=mt30.', 'red', attrs=['bold']))
+
+	# Make environment
+	env = make_env(cfg)
+
+	# Load agent
+	agent = TDMPC2(cfg)
+	assert os.path.exists(cfg.checkpoint), f'Checkpoint {cfg.checkpoint} not found! Must be a valid filepath.'
+	agent.load(cfg.checkpoint)
+	
+	# Evaluate
+	if cfg.multitask:
+		print(colored(f'Evaluating agent on {len(cfg.tasks)} tasks:', 'yellow', attrs=['bold']))
+	else:
+		print(colored(f'Evaluating agent on {cfg.task}:', 'yellow', attrs=['bold']))
+	if cfg.save_video:
+		video_dir = os.path.join(cfg.work_dir, 'videos')
+		os.makedirs(video_dir, exist_ok=True)
+	scores = []
+	tasks = cfg.tasks if cfg.multitask else [cfg.task]
+	for task_idx, task in enumerate(tasks):
+		if not cfg.multitask:
+			task_idx = None
+		ep_rewards, ep_successes = [], []
+		for i in range(cfg.eval_episodes):
+			obs, done, ep_reward, t = env.reset(task_idx=task_idx), False, 0, 0
+			if cfg.save_video:
+				frames = [env.render()]
+			while not done:
+				action = agent.act(obs, t0=t==0, task=task_idx)
+				obs, reward, done, info = env.step(action)
+				ep_reward += reward
+				t += 1
+				if cfg.save_video:
+					frames.append(env.render())
+			ep_rewards.append(ep_reward)
+			ep_successes.append(info['success'])
+			if cfg.save_video:
+				imageio.mimsave(
+					os.path.join(video_dir, f'{task}-{i}.mp4'), frames, fps=15)
+		ep_rewards = np.mean(ep_rewards)
+		ep_successes = np.mean(ep_successes)
+		if cfg.multitask:
+			scores.append(ep_successes*100 if task.startswith('mw-') else ep_rewards/10)
+		print(colored(f'  {task:<22}' \
+			f'\tR: {ep_rewards:.01f}  ' \
+			f'\tS: {ep_successes:.02f}', 'yellow'))
+	if cfg.multitask:
+		print(colored(f'Normalized score: {np.mean(scores):.02f}', 'yellow', attrs=['bold']))
+
+
+if __name__ == '__main__':
+	evaluate()
diff --git a/tdmpc2/tdmpc2.py b/tdmpc2/tdmpc2.py
new file mode 100755
index 0000000..9ee3ff5
--- /dev/null
+++ b/tdmpc2/tdmpc2.py
@@ -0,0 +1,286 @@
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+from common import math
+from common.scale import RunningScale
+from common.world_model import WorldModel
+
+
+class TDMPC2:
+	"""
+	TD-MPC2 agent. Implements training + inference.
+	Can be used for both single-task and multi-task experiments.
+	"""
+
+	def __init__(self, cfg):
+		self.cfg = cfg
+		self.device = torch.device('cuda')
+		self.model = WorldModel(cfg).to(self.device)
+		self.optim = torch.optim.Adam([
+			{'params': self.model._encoder.parameters(), 'lr': self.cfg.lr*self.cfg.enc_lr_scale},
+			{'params': self.model._dynamics.parameters()},
+			{'params': self.model._reward.parameters()},
+			{'params': self.model._Qs.parameters()},
+			{'params': self.model._task_emb.parameters() if self.cfg.multitask else []}
+		], lr=self.cfg.lr)
+		self.pi_optim = torch.optim.Adam(self.model._pi.parameters(), lr=self.cfg.lr, eps=1e-5)
+		self.model.eval()
+		self.scale = RunningScale(cfg)
+		self.cfg.iterations += 2*int(cfg.action_dim >= 20) # Heuristic for large action spaces
+		self.discount = torch.tensor(
+			[self._get_discount(ep_len) for ep_len in cfg.episode_lengths], device='cuda'
+		) if self.cfg.multitask else self._get_discount(cfg.episode_length)
+
+	def _get_discount(self, episode_length):
+		"""
+		Returns discount factor for a given episode length.
+		Simple heuristic that scales discount linearly with episode length.
+		Default values should work well for most tasks, but can be changed as needed.
+
+		Args:
+			episode_length (int): Length of the episode. Assumes episodes are of fixed length.
+
+		Returns:
+			float: Discount factor for the task.
+		"""
+		frac = episode_length/self.cfg.discount_denom
+		return min(max((frac-1)/(frac), self.cfg.discount_min), self.cfg.discount_max)
+
+	def save(self, fp):
+		"""
+		Save state dict of the agent to filepath.
+		
+		Args:
+			fp (str): Filepath to save state dict to.
+		"""
+		torch.save({"model": self.model.state_dict()}, fp)
+
+	def load(self, fp):
+		"""
+		Load a saved state dict from filepath (or dictionary) into current agent.
+		
+		Args:
+			fp (str or dict): Filepath or state dict to load.
+		"""
+		state_dict = fp if isinstance(fp, dict) else torch.load(fp)
+		self.model.load_state_dict(state_dict["model"])
+
+	@torch.no_grad()
+	def act(self, obs, t0=False, eval_mode=False, task=None):
+		"""
+		Select an action by planning in the latent space of the world model.
+		
+		Args:
+			obs (torch.Tensor): Observation from the environment.
+			t0 (bool): Whether this is the first observation in the episode.
+			eval_mode (bool): Whether to use the mean of the action distribution.
+			task (int): Task index (only used for multi-task experiments).
+		
+		Returns:
+			torch.Tensor: Action to take in the environment.
+		"""
+		obs = obs.to(self.device, non_blocking=True).unsqueeze(0)
+		if task is not None:
+			task = torch.tensor([task], device=self.device)
+		z = self.model.encode(obs, task)
+		a = self.plan(z, t0=t0, eval_mode=eval_mode, task=task)
+		return a.cpu()
+
+	@torch.no_grad()
+	def _estimate_value(self, z, actions, task):
+		"""Estimate value of a trajectory starting at latent state z and executing given actions."""
+		G, discount = 0, 1
+		for t in range(self.cfg.horizon):
+			reward = math.two_hot_inv(self.model.reward(z, actions[t], task), self.cfg)
+			z = self.model.next(z, actions[t], task)
+			G += discount * reward
+			discount *= self.discount[torch.tensor(task)] if self.cfg.multitask else self.discount
+		return G + discount * self.model.Q(z, self.model.pi(z, task)[1], task, return_type='avg')
+
+	@torch.no_grad()
+	def plan(self, z, t0=False, eval_mode=False, task=None):
+		"""
+		Plan a sequence of actions using the learned world model.
+		
+		Args:
+			z (torch.Tensor): Latent state from which to plan.
+			t0 (bool): Whether this is the first observation in the episode.
+			eval_mode (bool): Whether to use the mean of the action distribution.
+			task (Torch.Tensor): Task index (only used for multi-task experiments).
+
+		Returns:
+			torch.Tensor: Action to take in the environment.
+		"""		
+		# Sample policy trajectories
+		if self.cfg.num_pi_trajs > 0:
+			pi_actions = torch.empty(self.cfg.horizon, self.cfg.num_pi_trajs, self.cfg.action_dim, device=self.device)
+			_z = z.repeat(self.cfg.num_pi_trajs, 1)
+			for t in range(self.cfg.horizon-1):
+				pi_actions[t] = self.model.pi(_z, task)[1]
+				_z = self.model.next(_z, pi_actions[t], task)
+			pi_actions[-1] = self.model.pi(_z, task)[1]
+
+		# Initialize state and parameters
+		z = z.repeat(self.cfg.num_samples, 1)
+		mean = torch.zeros(self.cfg.horizon, self.cfg.action_dim, device=self.device)
+		std = self.cfg.max_std*torch.ones(self.cfg.horizon, self.cfg.action_dim, device=self.device)
+		if not t0:
+			mean[:-1] = self._prev_mean[1:]
+		actions = torch.empty(self.cfg.horizon, self.cfg.num_samples, self.cfg.action_dim, device=self.device)
+		if self.cfg.num_pi_trajs > 0:
+			actions[:, :self.cfg.num_pi_trajs] = pi_actions
+	
+		# Iterate MPPI
+		for i in range(self.cfg.iterations):
+
+			# Sample actions
+			actions[:, self.cfg.num_pi_trajs:] = (mean.unsqueeze(1) + std.unsqueeze(1) * \
+				torch.randn(self.cfg.horizon, self.cfg.num_samples-self.cfg.num_pi_trajs, self.cfg.action_dim, device=std.device)) \
+				.clamp(-1, 1)
+			if self.cfg.multitask:
+				actions = actions * self.model._action_masks[task]
+
+			# Compute elite actions
+			value = self._estimate_value(z, actions, task).nan_to_num_(0)
+			elite_idxs = torch.topk(value.squeeze(1), self.cfg.num_elites, dim=0).indices
+			elite_value, elite_actions = value[elite_idxs], actions[:, elite_idxs]
+
+			# Update parameters
+			max_value = elite_value.max(0)[0]
+			score = torch.exp(self.cfg.temperature*(elite_value - max_value))
+			score /= score.sum(0)
+			mean = torch.sum(score.unsqueeze(0) * elite_actions, dim=1) / (score.sum(0) + 1e-9)
+			std = torch.sqrt(torch.sum(score.unsqueeze(0) * (elite_actions - mean.unsqueeze(1)) ** 2, dim=1) / (score.sum(0) + 1e-9)) \
+				.clamp_(self.cfg.min_std, self.cfg.max_std)
+			if self.cfg.multitask:
+				mean = mean * self.model._action_masks[task]
+				std = std * self.model._action_masks[task]
+
+		# Select action
+		score = score.squeeze(1).cpu().numpy()
+		actions = elite_actions[:, np.random.choice(np.arange(score.shape[0]), p=score)]
+		self._prev_mean = mean
+		a, std = actions[0], std[0]
+		if not eval_mode:
+			a += std * torch.randn(self.cfg.action_dim, device=std.device)
+		return a.clamp_(-1, 1)
+		
+	def update_pi(self, zs, task):
+		"""
+		Update policy using a sequence of latent states.
+		
+		Args:
+			zs (torch.Tensor): Sequence of latent states.
+			task (torch.Tensor): Task index (only used for multi-task experiments).
+
+		Returns:
+			float: Loss of the policy update.
+		"""
+		self.pi_optim.zero_grad(set_to_none=True)
+		self.model.track_q_grad(False)
+		_, pis, log_pis, _ = self.model.pi(zs, task)
+		qs = self.model.Q(zs, pis, task, return_type='avg')
+		self.scale.update(qs[0])
+		qs = self.scale(qs)
+
+		# Loss is a weighted sum of Q-values
+		rho = torch.pow(self.cfg.rho, torch.arange(len(qs), device=self.device))
+		pi_loss = ((self.cfg.entropy_coef * log_pis - qs).mean(dim=(1,2)) * rho).mean()
+		pi_loss.backward()
+		torch.nn.utils.clip_grad_norm_(self.model._pi.parameters(), self.cfg.grad_clip_norm)
+		self.pi_optim.step()
+		self.model.track_q_grad(True)
+
+		return pi_loss.item()
+
+	@torch.no_grad()
+	def _td_target(self, next_z, reward, task):
+		"""
+		Compute the TD-target from a reward and the observation at the following time step.
+		
+		Args:
+			next_z (torch.Tensor): Latent state at the following time step.
+			reward (torch.Tensor): Reward at the current time step.
+			task (torch.Tensor): Task index (only used for multi-task experiments).
+		
+		Returns:
+			torch.Tensor: TD-target.
+		"""
+		pi = self.model.pi(next_z, task)[1]
+		discount = self.discount[task].unsqueeze(-1) if self.cfg.multitask else self.discount
+		return reward + discount * self.model.Q(next_z, pi, task, return_type='min', target=True)
+
+	def update(self, buffer):
+		"""
+		Main update function. Corresponds to one iteration of model learning.
+		
+		Args:
+			buffer (common.buffer.Buffer): Replay buffer.
+		
+		Returns:
+			dict: Dictionary of training statistics.
+		"""
+		obs, action, reward, task = buffer.sample()
+	
+		# Compute targets
+		with torch.no_grad():
+			next_z = self.model.encode(obs[1:], task)
+			td_targets = self._td_target(next_z, reward, task)
+
+		# Prepare for update
+		self.optim.zero_grad(set_to_none=True)
+		self.model.train()
+
+		# Latent rollout
+		zs = torch.empty(self.cfg.horizon+1, self.cfg.batch_size, self.cfg.latent_dim, device=self.device)
+		z = self.model.encode(obs[0], task)
+		zs[0] = z
+		consistency_loss = 0
+		for t in range(self.cfg.horizon):
+			z = self.model.next(z, action[t], task)
+			consistency_loss += F.mse_loss(z, next_z[t]) * self.cfg.rho**t
+			zs[t+1] = z
+
+		# Predictions
+		_zs = zs[:-1]
+		qs = self.model.Q(_zs, action, task, return_type='all')
+		reward_preds = self.model.reward(_zs, action, task)
+		
+		# Compute losses
+		reward_loss, value_loss = 0, 0
+		for t in range(self.cfg.horizon):
+			reward_loss += math.soft_ce(reward_preds[t], reward[t], self.cfg).mean() * self.cfg.rho**t
+			for q in range(self.cfg.num_q):
+				value_loss += math.soft_ce(qs[q][t], td_targets[t], self.cfg).mean() * self.cfg.rho**t
+		consistency_loss *= (1/self.cfg.horizon)
+		reward_loss *= (1/self.cfg.horizon)
+		value_loss *= (1/(self.cfg.horizon * self.cfg.num_q))
+		total_loss = (
+			self.cfg.consistency_coef * consistency_loss +
+			self.cfg.reward_coef * reward_loss +
+			self.cfg.value_coef * value_loss
+		)
+
+		# Update model
+		total_loss.backward()
+		grad_norm = torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.cfg.grad_clip_norm)
+		self.optim.step()
+
+		# Update policy
+		pi_loss = self.update_pi(zs.detach(), task)
+
+		# Update target Q-functions
+		self.model.soft_update_target_Q()
+
+		# Return training statistics
+		self.model.eval()
+		return {
+			"consistency_loss": float(consistency_loss.mean().item()),
+			"reward_loss": float(reward_loss.mean().item()),
+			"value_loss": float(value_loss.mean().item()),
+			"pi_loss": pi_loss,
+			"total_loss": float(total_loss.mean().item()),
+			"grad_norm": float(grad_norm),
+			"pi_scale": float(self.scale.value),
+		}
diff --git a/tdmpc2/train.py b/tdmpc2/train.py
new file mode 100755
index 0000000..a35c11b
--- /dev/null
+++ b/tdmpc2/train.py
@@ -0,0 +1,61 @@
+import os
+os.environ['MUJOCO_GL'] = 'egl'
+import warnings
+warnings.filterwarnings('ignore')
+import torch
+
+import hydra
+from termcolor import colored
+
+from common.parser import parse_cfg
+from common.seed import set_seed
+from common.buffer import Buffer
+from envs import make_env
+from tdmpc2 import TDMPC2
+from trainer.offline_trainer import OfflineTrainer
+from trainer.online_trainer import OnlineTrainer
+from common.logger import Logger
+
+torch.backends.cudnn.benchmark = True
+
+
+@hydra.main(config_name='config', config_path='.')
+def train(cfg: dict):
+	"""
+	Script for training single-task / multi-task TD-MPC2 agents.
+
+	Most relevant args:
+		`task`: task name (or mt30/mt80 for multi-task training)
+		`model_size`: model size, must be one of `[1, 5, 19, 48, 317]` (default: 5)
+		`steps`: number of training/environment steps (default: 10M)
+		`seed`: random seed (default: 1)
+
+	See config.yaml for a full list of args.
+
+	Example usage:
+	```
+		$ python train.py task=mt80 model_size=48
+		$ python train.py task=mt30 model_size=317
+		$ python train.py task=dog-run steps=7000000
+	```
+	"""
+	assert torch.cuda.is_available()
+	assert cfg.steps > 0, 'Must train for at least 1 step.'
+	cfg = parse_cfg(cfg)
+	set_seed(cfg.seed)
+	print(colored('Work dir:', 'yellow', attrs=['bold']), cfg.work_dir)
+
+	trainer_cls = OfflineTrainer if cfg.multitask else OnlineTrainer
+	trainer = trainer_cls(
+		cfg=cfg,
+		env=make_env(cfg),
+		agent=TDMPC2(cfg),
+		buffer=Buffer(cfg),
+		logger=Logger(cfg),
+	)
+	trainer.train()
+	print('\nTraining completed successfully')
+
+
+if __name__ == '__main__':
+	train()
diff --git a/tdmpc2/trainer/__init__.py b/tdmpc2/trainer/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tdmpc2/trainer/base.py b/tdmpc2/trainer/base.py
new file mode 100755
index 0000000..aaf1a39
--- /dev/null
+++ b/tdmpc2/trainer/base.py
@@ -0,0 +1,19 @@
+class Trainer:
+	"""Base trainer class for TD-MPC2."""
+
+	def __init__(self, cfg, env, agent, buffer, logger):
+		self.cfg = cfg
+		self.env = env
+		self.agent = agent
+		self.buffer = buffer
+		self.logger = logger
+		print("Learnable parameters: {:,}".format(self.agent.model.total_params))
+		print('Architecture:', self.agent.model)
+
+	def eval(self):
+		"""Evaluate a TD-MPC2 agent."""
+		raise NotImplementedError
+
+	def train(self):
+		"""Train a TD-MPC2 agent."""
+		raise NotImplementedError
diff --git a/tdmpc2/trainer/offline_trainer.py b/tdmpc2/trainer/offline_trainer.py
new file mode 100755
index 0000000..9ed4fd4
--- /dev/null
+++ b/tdmpc2/trainer/offline_trainer.py
@@ -0,0 +1,92 @@
+import os
+from copy import deepcopy
+from time import time
+from pathlib import Path
+from glob import glob
+
+import numpy as np
+import torch
+from tqdm import tqdm
+
+from common.buffer import Buffer
+from trainer.base import Trainer
+
+
+class OfflineTrainer(Trainer):
+	"""Trainer class for multi-task offline TD-MPC2 training."""
+
+	def __init__(self, *args, **kwargs):
+		super().__init__(*args, **kwargs)
+		self._start_time = time()
+	
+	def eval(self):
+		"""Evaluate a TD-MPC2 agent."""
+		results = dict()
+		for task_idx in tqdm(range(len(self.cfg.tasks)), desc='Evaluating'):
+			ep_rewards, ep_successes = [], []
+			for _ in range(self.cfg.eval_episodes):
+				obs, done, ep_reward, t = self.env.reset(task_idx), False, 0, 0
+				while not done:
+					action = self.agent.act(obs, t0=t==0, eval_mode=True, task=task_idx)
+					obs, reward, done, info = self.env.step(action)
+					ep_reward += reward
+					t += 1
+				ep_rewards.append(ep_reward)
+				ep_successes.append(info['success'])
+			results.update({
+				f'episode_reward+{self.cfg.tasks[task_idx]}': np.nanmean(ep_rewards),
+				f'episode_success+{self.cfg.tasks[task_idx]}': np.nanmean(ep_successes),})
+		return results
+				
+	def train(self):
+		"""Train a TD-MPC2 agent."""
+		assert self.cfg.multitask and self.cfg.task in {'mt30', 'mt80'}, \
+			'Offline training only supports multitask training with mt30 or mt80 task sets.'
+
+		# Load data
+		assert self.cfg.task in self.cfg.data_dir, \
+			f'Expected data directory {self.cfg.data_dir} to contain {self.cfg.task}, ' \
+			f'please double-check your config.'
+		fp = Path(os.path.join(self.cfg.data_dir, '*.pt'))
+		fps = sorted(glob(str(fp)))
+		assert len(fps) > 0, f'No data found at {fp}'
+		print(f'Found {len(fps)} files in {fp}')
+	
+		# Create buffer for sampling
+		_cfg = deepcopy(self.cfg)
+		_cfg.episode_length = 101 if self.cfg.task == 'mt80' else 501
+		_cfg.buffer_size = 550_450_000 if self.cfg.task == 'mt80' else 345_690_000
+		_cfg.steps = _cfg.buffer_size
+		self.buffer = Buffer(_cfg)
+		for fp in tqdm(fps, desc='Loading data'):
+			td = torch.load(fp)
+			assert td.shape[1] == _cfg.episode_length, \
+				f'Expected episode length {td.shape[1]} to match config episode length {_cfg.episode_length}, ' \
+				f'please double-check your config.'
+			for i in range(len(td)):
+				self.buffer.add(td[i])
+		assert self.buffer.num_eps == self.buffer.capacity, \
+			f'Buffer has {self.buffer.num_eps} episodes, expected {self.buffer.capacity} episodes.'
+		
+		print(f'Training agent for {self.cfg.steps} iterations...')
+		metrics = {}
+		for i in range(self.cfg.steps):
+
+			# Update agent
+			train_metrics = self.agent.update(self.buffer)
+
+			# Evaluate agent periodically
+			if i % self.cfg.eval_freq == 0 or i == 10_000:
+				metrics = {
+					'iteration': i,
+					'total_time': time() - self._start_time,
+				}
+				metrics.update(train_metrics)
+				if i % self.cfg.eval_freq == 0:
+					metrics.update(self.eval())
+					self.logger.pprint_multitask(metrics, self.cfg)
+					if i > 0:
+						self.logger.save_agent(self.agent, identifier=f'{i}')
+				self.logger.log(metrics, 'pretrain')
+			
+		self.logger.finish(self.agent)
diff --git a/tdmpc2/trainer/online_trainer.py b/tdmpc2/trainer/online_trainer.py
new file mode 100755
index 0000000..94835ca
--- /dev/null
+++ b/tdmpc2/trainer/online_trainer.py
@@ -0,0 +1,117 @@
+from time import time
+
+import numpy as np
+import torch
+from tensordict.tensordict import TensorDict
+
+from trainer.base import Trainer
+
+
+class OnlineTrainer(Trainer):
+	"""Trainer class for single-task online TD-MPC2 training."""
+
+	def __init__(self, *args, **kwargs):
+		super().__init__(*args, **kwargs)
+		self._step = 0
+		self._ep_idx = 0
+		self._start_time = time()
+
+	def common_metrics(self):
+		"""Return a dictionary of current metrics."""
+		return dict(
+			step=self._step,
+			episode=self._ep_idx,
+			total_time=time() - self._start_time,
+		)
+
+	def eval(self):
+		"""Evaluate a TD-MPC2 agent."""
+		ep_rewards, ep_successes = [], []
+		for i in range(self.cfg.eval_episodes):
+			obs, done, ep_reward, t = self.env.reset(), False, 0, 0
+			if self.cfg.save_video:
+				self.logger.video.init(self.env, enabled=(i==0))
+			while not done:
+				action = self.agent.act(obs, t0=t==0, eval_mode=True)
+				obs, reward, done, info = self.env.step(action)
+				ep_reward += reward
+				t += 1
+				if self.cfg.save_video:
+					self.logger.video.record(self.env)
+			ep_rewards.append(ep_reward)
+			ep_successes.append(info['success'])
+			if self.cfg.save_video:
+				self.logger.video.save(self._step)
+		return dict(
+			episode_reward=np.nanmean(ep_rewards),
+			episode_success=np.nanmean(ep_successes),
+		)
+
+	def to_td(self, obs, action=None, reward=None):
+		"""Creates a TensorDict for a new episode."""
+		if isinstance(obs, dict):
+			obs = TensorDict({k: v.unsqueeze(0) for k,v in obs.items()}, batch_size=(1,)).cpu()
+		else:
+			obs = obs.unsqueeze(0).cpu()
+		if action is None:
+			action = torch.empty_like(self.env.rand_act())
+		if reward is None:
+			reward = torch.tensor(float('nan'))
+		td = TensorDict(dict(
+			obs=obs,
+			action=action.unsqueeze(0),
+			reward=reward.unsqueeze(0),
+		), batch_size=(1,))
+		return td				
+
+	def train(self):
+		"""Train a TD-MPC2 agent."""
+		train_metrics, done, eval_next = {}, True, True
+		while self._step <= self.cfg.steps:
+
+			# Evaluate agent periodically
+			if self._step % self.cfg.eval_freq == 0:
+				eval_next = True
+
+			# Reset environment
+			if done:
+				if eval_next:
+					eval_metrics = self.eval()
+					eval_metrics.update(self.common_metrics())
+					self.logger.log(eval_metrics, 'eval')
+					eval_next = False
+
+				if self._step > 0:
+					train_metrics.update(
+						episode_reward=torch.tensor([td['reward'] for td in self._tds[1:]]).sum(),
+						episode_success=info['success'],
+					)
+					train_metrics.update(self.common_metrics())
+					self.logger.log(train_metrics, 'train')
+					self._ep_idx = self.buffer.add(torch.cat(self._tds))
+
+				obs = self.env.reset()
+				self._tds = [self.to_td(obs)]
+
+			# Collect experience
+			if self._step > self.cfg.seed_steps:
+				action = self.agent.act(obs, t0=len(self._tds)==1)
+			else:
+				action = self.env.rand_act()
+			obs, reward, done, info = self.env.step(action)
+			self._tds.append(self.to_td(obs, action, reward))
+
+			# Update agent
+			if self._step >= self.cfg.seed_steps:
+				if self._step == self.cfg.seed_steps:
+					num_updates = self.cfg.seed_steps
+					print('Pretraining agent on seed data...')
+				else:
+					num_updates = 1
+				for _ in range(num_updates):
+					_train_metrics = self.agent.update(self.buffer)
+				train_metrics.update(_train_metrics)
+
+			self._step += 1
+	
+		self.logger.finish(self.agent)