commit b67b21c5c638b48b1351864572ee834127717e98 Author: Nicklas Hansen Date: Wed Oct 25 18:26:00 2023 -0700 first commit diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..313b476 --- /dev/null +++ b/.gitignore @@ -0,0 +1,125 @@ +# logging +tdmpc2/logs/ +tdmpc2/outputs/ +tdmpc2/wandb/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..ef47324 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,20 @@ +# Contributing to TD-MPC2 +We want to make contributing to this repository as easy and transparent as +possible. + +## Pull requests +We actively welcome your pull requests. + +1. Fork the repo and create your branch from `main`. +2. If you have added code that should be tested, add tests. +3. If you have changed APIs, update the documentation. +4. Make sure your code lints. +5. Issue that pull request! + +## Issues +We use GitHub issues to track public bugs. Please ensure your description is +clear and has sufficient instructions to be able to reproduce the issue. + +## License +By contributing to TD-MPC2, you agree that your contributions will be licensed +under the `LICENSE` file in the root directory of this source tree. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..bc27ced --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) Nicklas Hansen (2023). + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100755 index 0000000..432d720 --- /dev/null +++ b/README.md @@ -0,0 +1,136 @@ +

TD-MPC2

+ +Official implementation of + +[TD-MPC2: Scalable, Robust World Models for Continuous Control](https://nicklashansen.github.io/td-mpc2) by + +[Nicklas Hansen](https://nicklashansen.github.io/), [Hao Su](https://cseweb.ucsd.edu/~haosu/)\*, [Xiaolong Wang](https://xiaolonw.github.io/)\* (UC San Diego)
+ +
+ +[[Website]](https://nicklashansen.github.io/td-mpc2) [[Paper]](https://arxiv.org/abs/2310.16828) [[Models]](https://nicklashansen.github.io/td-mpc2/models) [[Dataset]](https://nicklashansen.github.io/td-mpc2/dataset) + +---- + +## Overview + +TD-MPC**2** is a scalable, robust model-based reinforcement learning algorithm. It compares favorably to existing model-free and model-based methods across **104** continuous control tasks spanning multiple domains, with a *single* set of hyperparameters (*right*). We further demonstrate the scalability of TD-MPC**2** by training a single 317M parameter agent to perform **80** tasks across multiple domains, embodiments, and action spaces (*left*). + +
+ +This repository contains code for training and evaluating both single-task online RL and multi-task offline RL TD-MPC**2** agents. We additionally open-source **300+** [model checkpoints](https://nicklashansen.github.io/td-mpc2/models) (including 12 multi-task models) across 4 task domains: [DMControl](https://arxiv.org/abs/1801.00690), [Meta-World](https://meta-world.github.io/), [ManiSkill2](https://maniskill2.github.io/), and [MyoSuite](https://sites.google.com/view/myosuite), as well as our [30-task and 80-task datasets](https://nicklashansen.github.io/td-mpc2/dataset) used to train the multi-task models. We hope that this repository will serve as a useful community resource for future research on model-based RL. + +---- + +## Getting started + +You will need a machine with a GPU and at least 12 GB of RAM for single-task online RL with TD-MPC**2**, and 128 GB of RAM for multi-task offline RL on our provided 80-task dataset. A GPU with at least 8 GB of memory is recommended for single-task online RL and for evaluation of the provided multi-task models (up to 317M parameters). Training of the 317M parameter model requires a GPU with at least 24 GB of memory. + +We provide a `Dockerfile` for easy installation. You can build the docker image by running + +``` +cd docker && docker build . -t /tdmpc2:0.1.0 +``` + +If you prefer to install dependencies manually, start by installing dependencies via `conda` by running + +``` +conda env create -f docker/environment.yml +``` + +If you want to run ManiSkill2, you will additionally need to download and link the necessary assets by running + +``` +python -m mani_skill2.utils.download_asset all +``` + +which downloads assets to `./data`. You may move these assets to any location. Then, add the following line to your `~/.bashrc`: + +``` +export MS2_ASSET_DIR=// +``` + +and restart your terminal. Meta-World additionally requires MuJoCo 2.1.0. We host the unrestricted MuJoCo 2.1.0 license (courtesy of Google DeepMind) at [https://www.tdmpc2.com/files/mjkey.txt](https://www.tdmpc2.com/files/mjkey.txt). You can download the license by running + +``` +wget https://www.tdmpc2.com/files/mjkey.txt -O ~/.mujoco/mjkey.txt +``` + +See `docker/Dockerfile` for installation instructions if you do not already have MuJoCo 2.1.0 installed. MyoSuite requires `gym==0.13.0` which is incompatible with Meta-World and ManiSkill2. Install separately with `pip install myosuite` if desired. Depending on your existing system packages, you may need to install other dependencies. See `docker/Dockerfile` for a list of recommended system packages. + +---- + +## Supported tasks + +This codebase currently supports **104** continuous control tasks from **DMControl**, **Meta-World**, **ManiSkill2**, and **MyoSuite**. Specifically, it supports 39 tasks from DMControl (including 11 custom tasks), 50 tasks from Meta-World, 5 tasks from ManiSkill2, and 10 tasks from MyoSuite, and covers all tasks used in the paper. See below table for expected name formatting for each task domain: + +| domain | task +| --- | --- | +| dmcontrol | dog-run +| dmcontrol | cheetah-run-backwards +| metaworld | mw-assembly +| metaworld | mw-pick-place-wall +| maniskill | pick-cube +| maniskill | pick-ycb +| myosuite | myo-hand-key-turn +| myosuite | myo-hand-key-turn-hard + +which can be run by specifying the `task` argument for `evaluation.py`. Multi-task training and evaluation is specified by setting `task=mt80` or `task=mt30` for the 80-task and 30-task sets, respectively. + + +## Example usage + +We provide examples on how to evaluate our provided TD-MPC**2** checkpoints, as well as how to train your own TD-MPC**2** agents, below. + +### Evaluation + +See below examples on how to evaluate downloaded single-task and multi-task checkpoints. + +``` +$ python evaluate.py task=mt80 model_size=48 checkpoint=/path/to/mt80-48M.pt +$ python evaluate.py task=mt30 model_size=317 checkpoint=/path/to/mt30-317M.pt +$ python evaluate.py task=dog-run checkpoint=/path/to/dog-1.pt save_video=true +``` + +All single-task checkpoints expect `model_size=5`. Multi-task checkpoints are available in multiple model sizes. Available arguments are `model_size={1, 5, 19, 48, 317}`. Note that single-task evaluation of multi-task checkpoints is currently not supported. See `config.yaml` for a full list of arguments. + +### Training + +See below examples on how to train TD-MPC**2** on a single task (online RL) and on multi-task datasets (offline RL). We recommend configuring [Weights and Biases](https://wandb.ai) (`wandb`) in `config.yaml` to track training progress. + +``` +$ python train.py task=mt80 model_size=48 batch_size=1024 +$ python train.py task=mt30 model_size=317 batch_size=1024 +$ python train.py task=dog-run steps=7000000 +``` + +We recommend using default hyperparameters for single-task online RL, including the default model size of 5M parameters (`model_size=5`). Multi-task offline RL benefits from a larger model size, but larger models are also increasingly costly to train and evaluate. Available arguments are `model_size={1, 5, 19, 48, 317}`. See `config.yaml` for a full list of arguments. + +---- + +## Citation + +If you find our work useful, please consider citing the paper as follows: + +``` +@misc{hansen2023tdmpc2, + title={TD-MPC2: Scalable, Robust World Models for Continuous Control}, + author={Nicklas Hansen and Hao Su and Xiaolong Wang}, + year={2023}, + eprint={2310.16828}, + archivePrefix={arXiv}, + primaryClass={cs.LG} +} +``` + +---- + +## Contributing + +You are very welcome to contribute to this project. Feel free to open an issue or pull request if you have any suggestions or bug reports, but please review our [guidelines](CONTRIBUTING.md) first. Our goal is to build a codebase that can easily be extended to new environments and tasks, and we would love to hear about your experience! + +---- + +## License + +This project is licensed under the MIT License - see the `LICENSE` file for details. Note that the repository relies on third-party code, which is subject to their respective licenses. diff --git a/assets/0.gif b/assets/0.gif new file mode 100644 index 0000000..ce13634 Binary files /dev/null and b/assets/0.gif differ diff --git a/assets/1.gif b/assets/1.gif new file mode 100644 index 0000000..c4e88b7 Binary files /dev/null and b/assets/1.gif differ diff --git a/assets/2.gif b/assets/2.gif new file mode 100644 index 0000000..c66e41a Binary files /dev/null and b/assets/2.gif differ diff --git a/assets/3.gif b/assets/3.gif new file mode 100644 index 0000000..9eca22b Binary files /dev/null and b/assets/3.gif differ diff --git a/assets/4.gif b/assets/4.gif new file mode 100644 index 0000000..c7c89cc Binary files /dev/null and b/assets/4.gif differ diff --git a/assets/5.gif b/assets/5.gif new file mode 100644 index 0000000..8f3f67c Binary files /dev/null and b/assets/5.gif differ diff --git a/assets/6.gif b/assets/6.gif new file mode 100644 index 0000000..0993bfc Binary files /dev/null and b/assets/6.gif differ diff --git a/assets/7.gif b/assets/7.gif new file mode 100644 index 0000000..96b2221 Binary files /dev/null and b/assets/7.gif differ diff --git a/assets/8.png b/assets/8.png new file mode 100644 index 0000000..8dcacc3 Binary files /dev/null and b/assets/8.png differ diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..7303219 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,59 @@ +########################################## +# Dockerfile for TD-MPC2 # +# TD-MPC2 Anonymous Authors, 2023 (c) # +# -------------------------------------- # +# Instructions: # +# docker build . -t /tdmpc2:0.1.0 # +# docker push /tdmpc2:0.1.0 # +########################################## + +# base image +FROM nvidia/cudagl:11.3.1-devel-ubuntu20.04 +ENV DEBIAN_FRONTEND=noninteractive + +# packages +RUN apt-get -y update && \ + apt-get install -y --no-install-recommends build-essential git nano rsync vim tree curl \ + wget unzip htop tmux xvfb patchelf ca-certificates bash-completion libjpeg-dev libpng-dev \ + ffmpeg cmake swig libssl-dev libcurl4-openssl-dev libopenmpi-dev python3-dev zlib1g-dev \ + qtbase5-dev qtdeclarative5-dev libglib2.0-0 libglu1-mesa-dev libgl1-mesa-dev libvulkan1 \ + libgl1-mesa-glx libosmesa6 libosmesa6-dev libglew-dev mesa-utils && \ + apt-get clean && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* && \ + mkdir /root/.ssh + +# miniconda +RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \ + /bin/bash ~/miniconda.sh -b -p /opt/conda && \ + rm ~/miniconda.sh && \ + . /opt/conda/etc/profile.d/conda.sh && \ + conda init && \ + conda clean -ya +ENV PATH /opt/conda/bin:$PATH +SHELL ["/bin/bash", "-c"] + +# conda environment +COPY nvidia_icd.json /usr/share/vulkan/icd.d/nvidia_icd.json +COPY environment.yaml /root +RUN conda env update -n base -f /root/environment.yaml && \ + rm /root/environment.yaml && \ + cd /root && \ + python -m mani_skill2.utils.download_asset all -y && \ + conda clean -ya && \ + pip cache purge + +# environment variables +ENV MUJOCO_GL egl +ENV MS2_ASSET_DIR /root/data +ENV LD_LIBRARY_PATH /root/.mujoco/mujoco210/bin:${LD_LIBRARY_PATH} + +# mujoco (required for metaworld) +RUN mkdir -p /root/.mujoco && \ + wget https://www.tdmpc2.com/files/mjkey.txt && \ + wget https://github.com/deepmind/mujoco/releases/download/2.1.0/mujoco210-linux-x86_64.tar.gz && \ + tar -xzf mujoco210-linux-x86_64.tar.gz && \ + rm mujoco210-linux-x86_64.tar.gz && \ + mv mujoco210 /root/.mujoco/mujoco210 && \ + mv mjkey.txt /root/.mujoco/mjkey.txt && \ + python -c "import mujoco_py" diff --git a/docker/environment.yaml b/docker/environment.yaml new file mode 100644 index 0000000..9f0e6f1 --- /dev/null +++ b/docker/environment.yaml @@ -0,0 +1,67 @@ +name: tdmpc2 +channels: + - pytorch-nightly + - nvidia + - anaconda + - conda-forge + - defaults +dependencies: + - python=3.9.0 + - pytorch + - torchvision + - cudatoolkit=11.7 + - fluidsynth + - portaudio + - glew + - glib + - pillow + - pip + - pip: + - absl-py + - click + - cloudpickle + - gpustat + - glfw + - kornia + - termcolor + - gym==0.21.0 + - pandas + - moviepy + - ffmpeg + - imageio + - imageio-ffmpeg + - lxml + - pyparsing + - omegaconf + - hydra-core + - hydra-submitit-launcher + - submitit + - patchelf + - protobuf + - scipy + - tqdm + - xmltodict + - transforms3d + - joblib + - scikit-image + - einops + - opencv-python + - opencv-contrib-python + - filelock + - sapien==2.2.1 + - mani-skill2==0.4.1 + - tabulate + - h5py + - trimesh + - open3d + - rtree + - seaborn + - mujoco==2.3.1 + - mujoco-py==2.1.2.14 + - dm-control + - plotly + - pyquaternion + - git+https://github.com/Farama-Foundation/Metaworld.git@04be337a12305e393c0caf0cbf5ec7755c7c8feb + # - myosuite # MyoSuite requires gym==0.13.0 which conflicts with Meta-World & ManiSkill2, install separately if needed + - tensordict-nightly + - torchrl-nightly diff --git a/docker/nvidia_icd.json b/docker/nvidia_icd.json new file mode 100644 index 0000000..e7d75b2 --- /dev/null +++ b/docker/nvidia_icd.json @@ -0,0 +1,7 @@ +{ + "file_format_version" : "1.0.0", + "ICD": { + "library_path": "libGLX_nvidia.so.0", + "api_version" : "1.2.155" + } +} \ No newline at end of file diff --git a/results/acrobot-swingup.csv b/results/acrobot-swingup.csv new file mode 100644 index 0000000..455aa12 --- /dev/null +++ b/results/acrobot-swingup.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,2.4,3 +100000,275.1,3 +200000,246.4,3 +300000,330.8,3 +400000,321.6,3 +500000,334.5,3 +600000,336.1,3 +700000,450.4,3 +800000,549.5,3 +900000,368.3,3 +1000000,476.7,3 +1100000,410.7,3 +1200000,508.6,3 +1300000,422.2,3 +1400000,576.5,3 +1500000,621.8,3 +1600000,560.9,3 +1700000,562.0,3 +1800000,522.9,3 +1900000,512.0,3 +2000000,585.1,3 +2100000,672.0,3 +2200000,552.2,3 +2300000,603.5,3 +2400000,618.9,3 +2500000,438.4,3 +2600000,555.1,3 +2700000,578.9,3 +2800000,420.4,3 +2900000,664.0,3 +3000000,564.6,3 +3100000,596.9,3 +3200000,431.7,3 +3300000,483.5,3 +3400000,550.6,3 +3500000,598.4,3 +3600000,644.1,3 +3700000,500.0,3 +3800000,617.3,3 +3900000,682.2,3 +4000000,667.5,3 +0,5.6,2 +100000,150.2,2 +200000,423.1,2 +300000,358.9,2 +400000,361.2,2 +500000,446.8,2 +600000,377.1,2 +700000,492.5,2 +800000,560.5,2 +900000,476.1,2 +1000000,593.6,2 +1100000,420.5,2 +1200000,500.2,2 +1300000,548.0,2 +1400000,480.9,2 +1500000,470.4,2 +1600000,599.1,2 +1700000,707.8,2 +1800000,504.5,2 +1900000,484.7,2 +2000000,553.0,2 +2100000,550.1,2 +2200000,596.8,2 +2300000,502.1,2 +2400000,489.2,2 +2500000,439.3,2 +2600000,549.2,2 +2700000,535.0,2 +2800000,691.8,2 +2900000,519.1,2 +3000000,525.6,2 +3100000,564.8,2 +3200000,596.0,2 +3300000,510.4,2 +3400000,560.7,2 +3500000,473.8,2 +3600000,483.5,2 +3700000,628.0,2 +3800000,679.7,2 +3900000,546.2,2 +4000000,609.6,2 +0,7.6,1 +100000,112.1,1 +200000,219.4,1 +300000,350.9,1 +400000,283.2,1 +500000,301.9,1 +600000,343.1,1 +700000,455.5,1 +800000,410.1,1 +900000,476.2,1 +1000000,483.5,1 +1100000,544.5,1 +1200000,514.5,1 +1300000,585.2,1 +1400000,551.1,1 +1500000,554.4,1 +1600000,428.7,1 +1700000,459.6,1 +1800000,542.0,1 +1900000,602.7,1 +2000000,693.8,1 +2100000,514.7,1 +2200000,593.4,1 +2300000,344.7,1 +2400000,494.2,1 +2500000,641.4,1 +2600000,604.3,1 +2700000,510.1,1 +2800000,558.8,1 +2900000,445.7,1 +3000000,524.5,1 +3100000,510.3,1 +3200000,613.1,1 +3300000,594.1,1 +3400000,542.9,1 +3500000,647.1,1 +3600000,528.6,1 +3700000,490.0,1 +3800000,619.4,1 +3900000,644.7,1 +4000000,711.5,1 diff --git a/results/cartpole-balance-sparse.csv b/results/cartpole-balance-sparse.csv new file mode 100644 index 0000000..aa85652 --- /dev/null +++ b/results/cartpole-balance-sparse.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,49.9,2 +100000,1000.0,2 +200000,1000.0,2 +300000,1000.0,2 +400000,1000.0,2 +500000,1000.0,2 +600000,1000.0,2 +700000,1000.0,2 +800000,1000.0,2 +900000,1000.0,2 +1000000,1000.0,2 +1100000,1000.0,2 +1200000,1000.0,2 +1300000,1000.0,2 +1400000,1000.0,2 +1500000,1000.0,2 +1600000,1000.0,2 +1700000,1000.0,2 +1800000,1000.0,2 +1900000,1000.0,2 +2000000,1000.0,2 +2100000,1000.0,2 +2200000,1000.0,2 +2300000,1000.0,2 +2400000,1000.0,2 +2500000,1000.0,2 +2600000,1000.0,2 +2700000,1000.0,2 +2800000,1000.0,2 +2900000,1000.0,2 +3000000,1000.0,2 +3100000,1000.0,2 +3200000,1000.0,2 +3300000,1000.0,2 +3400000,1000.0,2 +3500000,1000.0,2 +3600000,1000.0,2 +3700000,1000.0,2 +3800000,1000.0,2 +3900000,1000.0,2 +4000000,1000.0,2 +0,21.9,1 +100000,1000.0,1 +200000,1000.0,1 +300000,1000.0,1 +400000,1000.0,1 +500000,1000.0,1 +600000,1000.0,1 +700000,1000.0,1 +800000,1000.0,1 +900000,1000.0,1 +1000000,1000.0,1 +1100000,1000.0,1 +1200000,1000.0,1 +1300000,1000.0,1 +1400000,1000.0,1 +1500000,1000.0,1 +1600000,1000.0,1 +1700000,1000.0,1 +1800000,967.7,1 +1900000,1000.0,1 +2000000,1000.0,1 +2100000,1000.0,1 +2200000,1000.0,1 +2300000,1000.0,1 +2400000,1000.0,1 +2500000,1000.0,1 +2600000,1000.0,1 +2700000,1000.0,1 +2800000,1000.0,1 +2900000,1000.0,1 +3000000,1000.0,1 +3100000,1000.0,1 +3200000,1000.0,1 +3300000,1000.0,1 +3400000,1000.0,1 +3500000,1000.0,1 +3600000,1000.0,1 +3700000,1000.0,1 +3800000,1000.0,1 +3900000,1000.0,1 +4000000,1000.0,1 +0,25.6,3 +100000,1000.0,3 +200000,1000.0,3 +300000,1000.0,3 +400000,1000.0,3 +500000,1000.0,3 +600000,1000.0,3 +700000,1000.0,3 +800000,1000.0,3 +900000,1000.0,3 +1000000,1000.0,3 +1100000,1000.0,3 +1200000,1000.0,3 +1300000,1000.0,3 +1400000,1000.0,3 +1500000,1000.0,3 +1600000,1000.0,3 +1700000,1000.0,3 +1800000,1000.0,3 +1900000,1000.0,3 +2000000,1000.0,3 +2100000,1000.0,3 +2200000,1000.0,3 +2300000,1000.0,3 +2400000,1000.0,3 +2500000,1000.0,3 +2600000,1000.0,3 +2700000,1000.0,3 +2800000,1000.0,3 +2900000,1000.0,3 +3000000,1000.0,3 +3100000,1000.0,3 +3200000,1000.0,3 +3300000,1000.0,3 +3400000,1000.0,3 +3500000,1000.0,3 +3600000,1000.0,3 +3700000,1000.0,3 +3800000,1000.0,3 +3900000,1000.0,3 +4000000,931.0,3 diff --git a/results/cartpole-balance.csv b/results/cartpole-balance.csv new file mode 100644 index 0000000..6434369 --- /dev/null +++ b/results/cartpole-balance.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,270.6,3 +100000,998.4,3 +200000,998.4,3 +300000,998.4,3 +400000,998.9,3 +500000,986.6,3 +600000,995.7,3 +700000,999.3,3 +800000,996.3,3 +900000,992.5,3 +1000000,996.8,3 +1100000,971.0,3 +1200000,996.8,3 +1300000,997.7,3 +1400000,993.5,3 +1500000,996.6,3 +1600000,998.7,3 +1700000,999.0,3 +1800000,997.0,3 +1900000,995.8,3 +2000000,998.0,3 +2100000,999.4,3 +2200000,999.0,3 +2300000,999.0,3 +2400000,999.3,3 +2500000,998.4,3 +2600000,987.0,3 +2700000,998.5,3 +2800000,990.5,3 +2900000,992.4,3 +3000000,996.6,3 +3100000,998.5,3 +3200000,998.6,3 +3300000,997.8,3 +3400000,997.8,3 +3500000,999.1,3 +3600000,999.1,3 +3700000,979.9,3 +3800000,999.2,3 +3900000,994.5,3 +4000000,996.0,3 +0,313.1,2 +100000,997.0,2 +200000,989.4,2 +300000,998.9,2 +400000,943.0,2 +500000,999.0,2 +600000,997.1,2 +700000,997.8,2 +800000,988.6,2 +900000,993.9,2 +1000000,928.0,2 +1100000,982.2,2 +1200000,998.2,2 +1300000,995.5,2 +1400000,996.2,2 +1500000,999.0,2 +1600000,992.4,2 +1700000,983.6,2 +1800000,998.2,2 +1900000,995.9,2 +2000000,998.6,2 +2100000,999.4,2 +2200000,999.0,2 +2300000,999.2,2 +2400000,999.4,2 +2500000,999.3,2 +2600000,999.1,2 +2700000,999.3,2 +2800000,999.5,2 +2900000,999.4,2 +3000000,999.4,2 +3100000,999.4,2 +3200000,999.2,2 +3300000,997.4,2 +3400000,998.5,2 +3500000,999.3,2 +3600000,999.2,2 +3700000,999.3,2 +3800000,999.5,2 +3900000,999.5,2 +4000000,999.4,2 +0,124.3,1 +100000,997.9,1 +200000,998.9,1 +300000,999.1,1 +400000,994.7,1 +500000,996.2,1 +600000,995.4,1 +700000,997.6,1 +800000,990.8,1 +900000,998.2,1 +1000000,999.3,1 +1100000,997.0,1 +1200000,998.6,1 +1300000,999.5,1 +1400000,999.2,1 +1500000,999.4,1 +1600000,997.1,1 +1700000,991.8,1 +1800000,992.8,1 +1900000,996.8,1 +2000000,999.3,1 +2100000,999.0,1 +2200000,999.4,1 +2300000,999.3,1 +2400000,998.4,1 +2500000,996.7,1 +2600000,998.9,1 +2700000,999.4,1 +2800000,998.6,1 +2900000,999.2,1 +3000000,999.3,1 +3100000,999.4,1 +3200000,999.4,1 +3300000,999.4,1 +3400000,999.4,1 +3500000,999.5,1 +3600000,999.3,1 +3700000,999.6,1 +3800000,999.6,1 +3900000,999.6,1 +4000000,999.6,1 diff --git a/results/cartpole-swingup-sparse.csv b/results/cartpole-swingup-sparse.csv new file mode 100644 index 0000000..7249d01 --- /dev/null +++ b/results/cartpole-swingup-sparse.csv @@ -0,0 +1,123 @@ +step,reward,seed +0,0.0,3 +100000,1.0,3 +200000,784.8,3 +300000,835.9,3 +400000,824.7,3 +500000,844.6,3 +600000,848.7,3 +700000,840.2,3 +800000,847.8,3 +900000,848.7,3 +1000000,849.8,3 +1100000,849.6,3 +1200000,848.0,3 +1300000,849.4,3 +1400000,849.2,3 +1500000,848.8,3 +1600000,845.7,3 +1700000,848.8,3 +1800000,837.2,3 +1900000,848.9,3 +2000000,849.9,3 +2100000,849.7,3 +2200000,850.0,3 +2300000,849.7,3 +2400000,848.3,3 +2500000,850.1,3 +2600000,850.0,3 +2700000,850.0,3 +2800000,259.5,3 +2900000,850.1,3 +3000000,850.0,3 +3100000,849.3,3 +3200000,849.9,3 +3300000,850.0,3 +3400000,848.9,3 +3500000,849.9,3 +3600000,850.0,3 +3700000,850.4,3 +3800000,849.8,3 +3900000,850.1,3 +0,0.0,2 +100000,0.0,2 +200000,21.6,2 +300000,707.7,2 +400000,843.3,2 +500000,844.9,2 +600000,844.6,2 +700000,846.3,2 +800000,845.7,2 +900000,847.9,2 +1000000,828.3,2 +1100000,847.6,2 +1200000,840.2,2 +1300000,848.2,2 +1400000,849.0,2 +1500000,847.9,2 +1600000,848.6,2 +1700000,848.8,2 +1800000,848.9,2 +1900000,849.6,2 +2000000,849.4,2 +2100000,848.9,2 +2200000,850.1,2 +2300000,849.5,2 +2400000,850.2,2 +2500000,850.2,2 +2600000,850.7,2 +2700000,850.7,2 +2800000,850.7,2 +2900000,848.2,2 +3000000,847.3,2 +3100000,849.0,2 +3200000,848.5,2 +3300000,850.0,2 +3400000,781.4,2 +3500000,849.2,2 +3600000,849.9,2 +3700000,850.1,2 +3800000,850.1,2 +3900000,849.6,2 +4000000,850.3,2 +0,0.0,1 +100000,0.0,1 +200000,118.3,1 +300000,842.3,1 +400000,847.0,1 +500000,844.8,1 +600000,848.1,1 +700000,847.1,1 +800000,849.1,1 +900000,847.2,1 +1000000,846.7,1 +1100000,847.5,1 +1200000,848.0,1 +1300000,849.3,1 +1400000,842.6,1 +1500000,154.2,1 +1600000,849.1,1 +1700000,848.3,1 +1800000,849.3,1 +1900000,849.7,1 +2000000,849.1,1 +2100000,411.1,1 +2200000,849.2,1 +2300000,849.6,1 +2400000,850.1,1 +2500000,849.8,1 +2600000,848.1,1 +2700000,849.5,1 +2800000,848.7,1 +2900000,850.0,1 +3000000,850.1,1 +3100000,849.5,1 +3200000,850.4,1 +3300000,850.2,1 +3400000,850.4,1 +3500000,848.8,1 +3600000,849.7,1 +3700000,849.9,1 +3800000,849.6,1 +3900000,850.2,1 +4000000,850.5,1 diff --git a/results/cartpole-swingup.csv b/results/cartpole-swingup.csv new file mode 100644 index 0000000..a8d1845 --- /dev/null +++ b/results/cartpole-swingup.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,2.0,2 +100000,789.8,2 +200000,877.6,2 +300000,879.1,2 +400000,880.4,2 +500000,882.0,2 +600000,882.0,2 +700000,881.7,2 +800000,882.3,2 +900000,882.0,2 +1000000,881.1,2 +1100000,879.9,2 +1200000,881.3,2 +1300000,882.3,2 +1400000,880.9,2 +1500000,879.7,2 +1600000,863.9,2 +1700000,879.0,2 +1800000,880.8,2 +1900000,882.6,2 +2000000,882.3,2 +2100000,883.0,2 +2200000,882.8,2 +2300000,882.7,2 +2400000,882.2,2 +2500000,882.8,2 +2600000,882.8,2 +2700000,882.7,2 +2800000,882.8,2 +2900000,882.8,2 +3000000,882.8,2 +3100000,883.0,2 +3200000,883.0,2 +3300000,882.6,2 +3400000,883.0,2 +3500000,882.8,2 +3600000,883.0,2 +3700000,882.4,2 +3800000,882.6,2 +3900000,882.5,2 +4000000,882.5,2 +0,4.8,3 +100000,867.9,3 +200000,879.0,3 +300000,877.4,3 +400000,867.4,3 +500000,880.2,3 +600000,881.2,3 +700000,881.8,3 +800000,879.9,3 +900000,879.7,3 +1000000,881.9,3 +1100000,881.9,3 +1200000,879.1,3 +1300000,880.5,3 +1400000,881.9,3 +1500000,881.8,3 +1600000,881.3,3 +1700000,880.2,3 +1800000,881.8,3 +1900000,882.2,3 +2000000,882.4,3 +2100000,882.8,3 +2200000,883.0,3 +2300000,883.2,3 +2400000,883.1,3 +2500000,883.0,3 +2600000,883.0,3 +2700000,882.5,3 +2800000,882.9,3 +2900000,882.8,3 +3000000,882.9,3 +3100000,883.0,3 +3200000,882.9,3 +3300000,882.9,3 +3400000,882.9,3 +3500000,882.8,3 +3600000,882.8,3 +3700000,882.7,3 +3800000,882.9,3 +3900000,882.9,3 +4000000,883.0,3 +0,1.5,1 +100000,860.5,1 +200000,859.1,1 +300000,864.4,1 +400000,864.2,1 +500000,866.0,1 +600000,860.5,1 +700000,865.7,1 +800000,865.4,1 +900000,864.4,1 +1000000,864.7,1 +1100000,866.8,1 +1200000,865.6,1 +1300000,864.8,1 +1400000,866.5,1 +1500000,866.6,1 +1600000,861.9,1 +1700000,866.6,1 +1800000,863.0,1 +1900000,865.2,1 +2000000,864.4,1 +2100000,866.8,1 +2200000,866.9,1 +2300000,864.3,1 +2400000,866.0,1 +2500000,867.4,1 +2600000,865.8,1 +2700000,867.3,1 +2800000,866.6,1 +2900000,751.6,1 +3000000,866.3,1 +3100000,867.3,1 +3200000,866.1,1 +3300000,867.3,1 +3400000,866.2,1 +3500000,866.9,1 +3600000,865.8,1 +3700000,866.7,1 +3800000,867.1,1 +3900000,867.3,1 +4000000,866.2,1 diff --git a/results/cheetah-jump.csv b/results/cheetah-jump.csv new file mode 100644 index 0000000..8000947 --- /dev/null +++ b/results/cheetah-jump.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,179.6,2 +100000,557.8,2 +200000,607.2,2 +300000,642.0,2 +400000,616.6,2 +500000,768.8,2 +600000,786.0,2 +700000,767.5,2 +800000,802.7,2 +900000,809.4,2 +1000000,806.6,2 +1100000,806.7,2 +1200000,808.0,2 +1300000,810.3,2 +1400000,808.1,2 +1500000,814.9,2 +1600000,808.8,2 +1700000,809.0,2 +1800000,813.5,2 +1900000,808.5,2 +2000000,799.6,2 +2100000,814.4,2 +2200000,820.4,2 +2300000,821.6,2 +2400000,821.6,2 +2500000,812.9,2 +2600000,824.2,2 +2700000,817.1,2 +2800000,823.3,2 +2900000,821.1,2 +3000000,825.4,2 +3100000,823.1,2 +3200000,813.2,2 +3300000,822.1,2 +3400000,827.9,2 +3500000,817.5,2 +3600000,823.5,2 +3700000,822.7,2 +3800000,830.1,2 +3900000,826.0,2 +4000000,816.0,2 +0,179.4,1 +100000,581.7,1 +200000,598.5,1 +300000,600.1,1 +400000,587.9,1 +500000,623.6,1 +600000,239.6,1 +700000,729.4,1 +800000,765.5,1 +900000,776.2,1 +1000000,791.4,1 +1100000,803.5,1 +1200000,810.1,1 +1300000,816.9,1 +1400000,819.4,1 +1500000,818.5,1 +1600000,808.8,1 +1700000,815.4,1 +1800000,817.4,1 +1900000,815.3,1 +2000000,809.9,1 +2100000,819.2,1 +2200000,822.2,1 +2300000,822.5,1 +2400000,822.9,1 +2500000,824.0,1 +2600000,825.2,1 +2700000,822.9,1 +2800000,823.6,1 +2900000,821.5,1 +3000000,822.7,1 +3100000,825.6,1 +3200000,823.4,1 +3300000,819.5,1 +3400000,821.7,1 +3500000,819.9,1 +3600000,818.4,1 +3700000,817.8,1 +3800000,822.6,1 +3900000,820.3,1 +4000000,820.4,1 +0,178.7,3 +100000,587.1,3 +200000,594.8,3 +300000,618.6,3 +400000,700.3,3 +500000,734.4,3 +600000,760.8,3 +700000,796.1,3 +800000,804.3,3 +900000,818.0,3 +1000000,811.9,3 +1100000,811.3,3 +1200000,814.7,3 +1300000,811.6,3 +1400000,814.2,3 +1500000,819.5,3 +1600000,812.2,3 +1700000,802.4,3 +1800000,812.8,3 +1900000,813.6,3 +2000000,818.5,3 +2100000,808.0,3 +2200000,812.1,3 +2300000,816.3,3 +2400000,817.5,3 +2500000,819.8,3 +2600000,827.2,3 +2700000,829.8,3 +2800000,830.0,3 +2900000,817.4,3 +3000000,829.8,3 +3100000,827.1,3 +3200000,831.2,3 +3300000,827.9,3 +3400000,822.5,3 +3500000,828.5,3 +3600000,832.6,3 +3700000,832.7,3 +3800000,828.0,3 +3900000,834.3,3 +4000000,832.4,3 diff --git a/results/cheetah-run-back.csv b/results/cheetah-run-back.csv new file mode 100644 index 0000000..bc8f376 --- /dev/null +++ b/results/cheetah-run-back.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,7.4,2 +100000,408.8,2 +200000,549.1,2 +300000,576.2,2 +400000,615.8,2 +500000,634.7,2 +600000,675.4,2 +700000,708.1,2 +800000,779.9,2 +900000,791.7,2 +1000000,813.6,2 +1100000,825.0,2 +1200000,812.9,2 +1300000,817.7,2 +1400000,819.9,2 +1500000,825.8,2 +1600000,822.9,2 +1700000,827.3,2 +1800000,833.1,2 +1900000,832.1,2 +2000000,833.7,2 +2100000,824.8,2 +2200000,832.6,2 +2300000,804.6,2 +2400000,719.1,2 +2500000,644.0,2 +2600000,761.5,2 +2700000,779.7,2 +2800000,829.8,2 +2900000,829.5,2 +3000000,827.8,2 +3100000,828.2,2 +3200000,803.8,2 +3300000,832.8,2 +3400000,761.7,2 +3500000,754.1,2 +3600000,830.9,2 +3700000,731.2,2 +3800000,834.4,2 +3900000,834.4,2 +4000000,834.5,2 +0,6.1,3 +100000,489.7,3 +200000,552.5,3 +300000,613.4,3 +400000,720.9,3 +500000,717.1,3 +600000,775.5,3 +700000,819.1,3 +800000,765.4,3 +900000,741.5,3 +1000000,807.2,3 +1100000,824.1,3 +1200000,747.2,3 +1300000,827.1,3 +1400000,831.0,3 +1500000,827.2,3 +1600000,831.4,3 +1700000,837.0,3 +1800000,833.4,3 +1900000,834.2,3 +2000000,835.7,3 +2100000,834.2,3 +2200000,771.0,3 +2300000,745.5,3 +2400000,779.3,3 +2500000,743.4,3 +2600000,834.0,3 +2700000,835.4,3 +2800000,838.1,3 +2900000,806.6,3 +3000000,836.0,3 +3100000,720.8,3 +3200000,837.1,3 +3300000,837.2,3 +3400000,823.1,3 +3500000,780.0,3 +3600000,842.3,3 +3700000,791.2,3 +3800000,837.5,3 +3900000,838.8,3 +4000000,842.2,3 +0,6.0,1 +100000,487.4,1 +200000,599.5,1 +300000,706.0,1 +400000,739.2,1 +500000,780.7,1 +600000,800.6,1 +700000,820.1,1 +800000,817.1,1 +900000,822.9,1 +1000000,780.2,1 +1100000,831.8,1 +1200000,822.0,1 +1300000,823.3,1 +1400000,748.2,1 +1500000,803.4,1 +1600000,826.9,1 +1700000,721.3,1 +1800000,835.4,1 +1900000,668.7,1 +2000000,829.6,1 +2100000,812.8,1 +2200000,728.9,1 +2300000,733.1,1 +2400000,788.2,1 +2500000,784.5,1 +2600000,813.0,1 +2700000,781.1,1 +2800000,770.9,1 +2900000,812.2,1 +3000000,845.9,1 +3100000,854.2,1 +3200000,854.5,1 +3300000,862.0,1 +3400000,861.6,1 +3500000,861.2,1 +3600000,862.3,1 +3700000,865.2,1 +3800000,863.5,1 +3900000,857.3,1 +4000000,860.5,1 diff --git a/results/cheetah-run-backwards.csv b/results/cheetah-run-backwards.csv new file mode 100644 index 0000000..dc1df3e --- /dev/null +++ b/results/cheetah-run-backwards.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,11.7,1 +100000,517.3,1 +200000,589.1,1 +300000,617.6,1 +400000,648.2,1 +500000,631.8,1 +600000,636.5,1 +700000,665.2,1 +800000,647.2,1 +900000,657.5,1 +1000000,649.2,1 +1100000,670.9,1 +1200000,661.6,1 +1300000,643.9,1 +1400000,677.1,1 +1500000,625.0,1 +1600000,680.6,1 +1700000,684.5,1 +1800000,689.4,1 +1900000,684.2,1 +2000000,656.5,1 +2100000,672.5,1 +2200000,677.6,1 +2300000,666.8,1 +2400000,682.3,1 +2500000,687.4,1 +2600000,684.8,1 +2700000,669.5,1 +2800000,683.5,1 +2900000,699.3,1 +3000000,692.4,1 +3100000,701.1,1 +3200000,690.3,1 +3300000,708.6,1 +3400000,715.5,1 +3500000,677.8,1 +3600000,717.1,1 +3700000,716.4,1 +3800000,710.0,1 +3900000,713.3,1 +4000000,688.8,1 +0,8.9,2 +100000,475.7,2 +200000,608.2,2 +300000,680.6,2 +400000,730.0,2 +500000,732.0,2 +600000,751.4,2 +700000,748.9,2 +800000,772.8,2 +900000,773.9,2 +1000000,787.3,2 +1100000,788.0,2 +1200000,802.6,2 +1300000,795.5,2 +1400000,820.1,2 +1500000,823.7,2 +1600000,786.4,2 +1700000,835.4,2 +1800000,832.1,2 +1900000,846.9,2 +2000000,826.8,2 +2100000,824.7,2 +2200000,852.9,2 +2300000,840.2,2 +2400000,722.3,2 +2500000,856.6,2 +2600000,868.1,2 +2700000,785.4,2 +2800000,859.1,2 +2900000,832.5,2 +3000000,837.1,2 +3100000,870.3,2 +3200000,860.0,2 +3300000,862.9,2 +3400000,871.6,2 +3500000,879.5,2 +3600000,871.4,2 +3700000,860.3,2 +3800000,873.8,2 +3900000,879.2,2 +4000000,875.6,2 +0,9.6,3 +100000,568.0,3 +200000,585.0,3 +300000,651.2,3 +400000,670.8,3 +500000,676.2,3 +600000,666.2,3 +700000,676.5,3 +800000,672.0,3 +900000,654.2,3 +1000000,681.1,3 +1100000,679.4,3 +1200000,684.5,3 +1300000,684.3,3 +1400000,656.6,3 +1500000,687.8,3 +1600000,685.9,3 +1700000,691.2,3 +1800000,688.8,3 +1900000,690.4,3 +2000000,688.4,3 +2100000,688.7,3 +2200000,694.2,3 +2300000,688.0,3 +2400000,692.1,3 +2500000,696.4,3 +2600000,697.2,3 +2700000,693.7,3 +2800000,689.8,3 +2900000,700.4,3 +3000000,694.0,3 +3100000,694.1,3 +3200000,695.0,3 +3300000,687.1,3 +3400000,696.4,3 +3500000,700.6,3 +3600000,698.8,3 +3700000,700.4,3 +3800000,690.7,3 +3900000,699.9,3 +4000000,696.3,3 diff --git a/results/cheetah-run-front.csv b/results/cheetah-run-front.csv new file mode 100644 index 0000000..5dd10d0 --- /dev/null +++ b/results/cheetah-run-front.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,7.5,2 +100000,318.6,2 +200000,491.8,2 +300000,595.9,2 +400000,539.8,2 +500000,647.9,2 +600000,681.0,2 +700000,674.8,2 +800000,696.5,2 +900000,696.3,2 +1000000,685.5,2 +1100000,658.2,2 +1200000,672.1,2 +1300000,661.2,2 +1400000,671.9,2 +1500000,663.4,2 +1600000,685.6,2 +1700000,666.0,2 +1800000,665.0,2 +1900000,689.3,2 +2000000,681.9,2 +2100000,649.4,2 +2200000,671.0,2 +2300000,677.9,2 +2400000,461.3,2 +2500000,653.0,2 +2600000,684.1,2 +2700000,689.7,2 +2800000,630.6,2 +2900000,657.5,2 +3000000,673.7,2 +3100000,698.7,2 +3200000,704.1,2 +3300000,702.0,2 +3400000,697.2,2 +3500000,692.5,2 +3600000,706.0,2 +3700000,689.0,2 +3800000,703.3,2 +3900000,725.7,2 +4000000,702.6,2 +0,6.2,1 +100000,306.7,1 +200000,509.0,1 +300000,607.6,1 +400000,637.9,1 +500000,689.8,1 +600000,690.6,1 +700000,701.8,1 +800000,692.9,1 +900000,693.5,1 +1000000,693.5,1 +1100000,673.2,1 +1200000,664.4,1 +1300000,701.0,1 +1400000,676.8,1 +1500000,637.2,1 +1600000,683.5,1 +1700000,699.8,1 +1800000,684.7,1 +1900000,664.6,1 +2000000,688.1,1 +2100000,661.3,1 +2200000,712.0,1 +2300000,676.5,1 +2400000,665.7,1 +2500000,641.3,1 +2600000,720.6,1 +2700000,723.2,1 +2800000,718.9,1 +2900000,721.6,1 +3000000,735.6,1 +3100000,749.7,1 +3200000,746.5,1 +3300000,729.7,1 +3400000,755.6,1 +3500000,747.8,1 +3600000,757.6,1 +3700000,749.0,1 +3800000,703.7,1 +3900000,692.1,1 +4000000,698.4,1 +0,6.2,3 +100000,307.1,3 +200000,473.4,3 +300000,592.0,3 +400000,650.8,3 +500000,661.2,3 +600000,697.0,3 +700000,682.2,3 +800000,672.4,3 +900000,674.0,3 +1000000,651.4,3 +1100000,671.4,3 +1200000,667.0,3 +1300000,665.7,3 +1400000,657.8,3 +1500000,475.8,3 +1600000,664.2,3 +1700000,652.7,3 +1800000,652.0,3 +1900000,654.7,3 +2000000,656.1,3 +2100000,640.9,3 +2200000,660.7,3 +2300000,647.9,3 +2400000,578.2,3 +2500000,611.5,3 +2600000,657.2,3 +2700000,658.0,3 +2800000,668.1,3 +2900000,678.4,3 +3000000,677.8,3 +3100000,648.5,3 +3200000,674.6,3 +3300000,669.6,3 +3400000,599.7,3 +3500000,663.6,3 +3600000,676.2,3 +3700000,661.1,3 +3800000,680.0,3 +3900000,655.7,3 +4000000,680.5,3 diff --git a/results/cheetah-run.csv b/results/cheetah-run.csv new file mode 100644 index 0000000..806e3ff --- /dev/null +++ b/results/cheetah-run.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,11.6,2 +100000,613.8,2 +200000,710.5,2 +300000,764.8,2 +400000,822.3,2 +500000,822.1,2 +600000,878.6,2 +700000,898.1,2 +800000,906.2,2 +900000,909.0,2 +1000000,896.9,2 +1100000,911.2,2 +1200000,904.9,2 +1300000,827.8,2 +1400000,906.5,2 +1500000,916.8,2 +1600000,914.3,2 +1700000,914.6,2 +1800000,913.4,2 +1900000,914.6,2 +2000000,918.3,2 +2100000,917.5,2 +2200000,916.4,2 +2300000,919.7,2 +2400000,913.8,2 +2500000,917.3,2 +2600000,913.0,2 +2700000,912.6,2 +2800000,917.0,2 +2900000,918.5,2 +3000000,917.7,2 +3100000,917.9,2 +3200000,847.8,2 +3300000,920.1,2 +3400000,917.5,2 +3500000,919.7,2 +3600000,920.1,2 +3700000,917.4,2 +3800000,921.1,2 +3900000,920.8,2 +4000000,922.9,2 +0,2.4,1 +100000,454.4,1 +200000,522.1,1 +300000,457.8,1 +400000,655.3,1 +500000,593.8,1 +600000,659.6,1 +700000,667.6,1 +800000,673.4,1 +900000,708.3,1 +1000000,725.4,1 +1100000,720.4,1 +1200000,721.9,1 +1300000,731.6,1 +1400000,692.6,1 +1500000,677.1,1 +1600000,727.0,1 +1700000,675.0,1 +1800000,698.2,1 +1900000,694.6,1 +2000000,747.3,1 +2100000,743.1,1 +2200000,751.7,1 +2300000,757.5,1 +2400000,756.4,1 +2500000,720.6,1 +2600000,763.9,1 +2700000,771.1,1 +2800000,778.5,1 +2900000,781.3,1 +3000000,784.2,1 +3100000,777.9,1 +3200000,793.0,1 +3300000,745.2,1 +3400000,762.1,1 +3500000,774.2,1 +3600000,801.8,1 +3700000,816.7,1 +3800000,824.8,1 +3900000,837.2,1 +4000000,848.3,1 +0,3.9,3 +100000,489.2,3 +200000,610.6,3 +300000,550.0,3 +400000,795.7,3 +500000,856.9,3 +600000,820.2,3 +700000,855.5,3 +800000,896.5,3 +900000,889.2,3 +1000000,910.8,3 +1100000,902.7,3 +1200000,912.1,3 +1300000,815.1,3 +1400000,913.2,3 +1500000,915.9,3 +1600000,911.0,3 +1700000,917.2,3 +1800000,915.6,3 +1900000,916.4,3 +2000000,914.3,3 +2100000,917.7,3 +2200000,916.3,3 +2300000,920.0,3 +2400000,913.8,3 +2500000,916.9,3 +2600000,921.3,3 +2700000,920.3,3 +2800000,917.4,3 +2900000,914.9,3 +3000000,919.0,3 +3100000,913.2,3 +3200000,921.3,3 +3300000,922.0,3 +3400000,920.6,3 +3500000,921.7,3 +3600000,921.6,3 +3700000,922.2,3 +3800000,911.8,3 +3900000,919.6,3 +4000000,916.3,3 diff --git a/results/cup-catch.csv b/results/cup-catch.csv new file mode 100644 index 0000000..d266bc3 --- /dev/null +++ b/results/cup-catch.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,0.0,3 +100000,973.3,3 +200000,972.1,3 +300000,978.0,3 +400000,979.0,3 +500000,981.8,3 +600000,983.2,3 +700000,986.4,3 +800000,979.9,3 +900000,980.8,3 +1000000,981.7,3 +1100000,987.8,3 +1200000,986.1,3 +1300000,980.2,3 +1400000,985.2,3 +1500000,981.0,3 +1600000,985.5,3 +1700000,987.0,3 +1800000,988.1,3 +1900000,986.8,3 +2000000,986.2,3 +2100000,981.3,3 +2200000,986.1,3 +2300000,981.3,3 +2400000,986.5,3 +2500000,986.8,3 +2600000,979.7,3 +2700000,986.0,3 +2800000,978.8,3 +2900000,983.3,3 +3000000,989.4,3 +3100000,977.5,3 +3200000,984.7,3 +3300000,985.2,3 +3400000,986.0,3 +3500000,987.4,3 +3600000,983.8,3 +3700000,989.2,3 +3800000,976.8,3 +3900000,991.3,3 +4000000,986.5,3 +0,0.0,2 +100000,983.7,2 +200000,977.0,2 +300000,980.2,2 +400000,976.0,2 +500000,985.8,2 +600000,985.8,2 +700000,983.6,2 +800000,984.5,2 +900000,983.1,2 +1000000,984.7,2 +1100000,977.9,2 +1200000,979.8,2 +1300000,976.6,2 +1400000,979.5,2 +1500000,982.7,2 +1600000,988.8,2 +1700000,983.0,2 +1800000,984.5,2 +1900000,984.0,2 +2000000,983.3,2 +2100000,987.3,2 +2200000,979.1,2 +2300000,984.9,2 +2400000,980.5,2 +2500000,984.2,2 +2600000,978.3,2 +2700000,983.5,2 +2800000,981.9,2 +2900000,982.9,2 +3000000,989.1,2 +3100000,983.2,2 +3200000,976.9,2 +3300000,982.9,2 +3400000,975.5,2 +3500000,979.7,2 +3600000,980.9,2 +3700000,980.7,2 +3800000,990.6,2 +3900000,983.1,2 +4000000,984.9,2 +0,0.0,1 +100000,986.7,1 +200000,980.6,1 +300000,975.1,1 +400000,983.8,1 +500000,986.5,1 +600000,982.9,1 +700000,986.1,1 +800000,984.1,1 +900000,981.3,1 +1000000,991.1,1 +1100000,984.0,1 +1200000,985.5,1 +1300000,986.2,1 +1400000,985.7,1 +1500000,983.7,1 +1600000,982.8,1 +1700000,986.0,1 +1800000,984.9,1 +1900000,981.7,1 +2000000,981.6,1 +2100000,982.9,1 +2200000,980.8,1 +2300000,988.0,1 +2400000,981.1,1 +2500000,977.7,1 +2600000,981.1,1 +2700000,977.7,1 +2800000,980.2,1 +2900000,988.5,1 +3000000,976.9,1 +3100000,983.5,1 +3200000,981.9,1 +3300000,985.7,1 +3400000,981.3,1 +3500000,988.4,1 +3600000,986.6,1 +3700000,988.1,1 +3800000,986.1,1 +3900000,981.4,1 +4000000,979.9,1 diff --git a/results/cup-spin.csv b/results/cup-spin.csv new file mode 100644 index 0000000..05991cd --- /dev/null +++ b/results/cup-spin.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,27.4,3 +100000,844.1,3 +200000,843.9,3 +300000,825.2,3 +400000,838.9,3 +500000,847.4,3 +600000,845.9,3 +700000,846.7,3 +800000,845.8,3 +900000,847.9,3 +1000000,847.2,3 +1100000,846.4,3 +1200000,837.9,3 +1300000,847.6,3 +1400000,847.2,3 +1500000,846.5,3 +1600000,848.3,3 +1700000,848.3,3 +1800000,846.9,3 +1900000,845.4,3 +2000000,848.6,3 +2100000,848.1,3 +2200000,848.4,3 +2300000,847.7,3 +2400000,848.0,3 +2500000,848.9,3 +2600000,848.2,3 +2700000,848.2,3 +2800000,846.2,3 +2900000,848.3,3 +3000000,848.0,3 +3100000,846.6,3 +3200000,848.9,3 +3300000,848.6,3 +3400000,847.5,3 +3500000,848.6,3 +3600000,849.0,3 +3700000,848.7,3 +3800000,848.0,3 +3900000,847.9,3 +4000000,848.9,3 +0,0.0,2 +100000,841.4,2 +200000,846.4,2 +300000,848.3,2 +400000,846.9,2 +500000,848.1,2 +600000,848.1,2 +700000,845.0,2 +800000,846.6,2 +900000,845.7,2 +1000000,846.5,2 +1100000,847.5,2 +1200000,847.8,2 +1300000,845.9,2 +1400000,846.7,2 +1500000,848.2,2 +1600000,845.0,2 +1700000,847.6,2 +1800000,848.3,2 +1900000,848.5,2 +2000000,847.7,2 +2100000,847.6,2 +2200000,846.3,2 +2300000,847.5,2 +2400000,847.3,2 +2500000,847.5,2 +2600000,848.3,2 +2700000,848.0,2 +2800000,846.9,2 +2900000,847.2,2 +3000000,847.8,2 +3100000,848.5,2 +3200000,848.5,2 +3300000,848.9,2 +3400000,846.6,2 +3500000,847.9,2 +3600000,849.1,2 +3700000,848.4,2 +3800000,846.4,2 +3900000,842.1,2 +4000000,845.9,2 +0,28.3,1 +100000,844.4,1 +200000,847.0,1 +300000,846.9,1 +400000,847.0,1 +500000,847.9,1 +600000,846.9,1 +700000,847.1,1 +800000,846.7,1 +900000,848.7,1 +1000000,848.3,1 +1100000,846.8,1 +1200000,847.9,1 +1300000,843.8,1 +1400000,848.4,1 +1500000,846.4,1 +1600000,847.4,1 +1700000,847.2,1 +1800000,848.6,1 +1900000,848.8,1 +2000000,847.6,1 +2100000,846.5,1 +2200000,848.9,1 +2300000,848.8,1 +2400000,849.0,1 +2500000,847.0,1 +2600000,847.6,1 +2700000,848.7,1 +2800000,848.4,1 +2900000,848.7,1 +3000000,849.0,1 +3100000,848.7,1 +3200000,847.9,1 +3300000,849.1,1 +3400000,847.5,1 +3500000,848.5,1 +3600000,848.8,1 +3700000,848.2,1 +3800000,849.0,1 +3900000,848.3,1 +4000000,848.5,1 diff --git a/results/dog-run.csv b/results/dog-run.csv new file mode 100644 index 0000000..8541209 --- /dev/null +++ b/results/dog-run.csv @@ -0,0 +1,423 @@ +step,reward,seed +0,4.5,2 +100000,7.6,2 +200000,86.8,2 +300000,143.8,2 +400000,161.9,2 +500000,142.0,2 +600000,133.2,2 +700000,153.9,2 +800000,159.3,2 +900000,172.7,2 +1000000,234.7,2 +1100000,226.3,2 +1200000,287.8,2 +1300000,304.8,2 +1400000,384.0,2 +1500000,380.6,2 +1600000,345.5,2 +1700000,411.1,2 +1800000,421.8,2 +1900000,468.7,2 +2000000,503.2,2 +2100000,503.2,2 +2200000,518.9,2 +2300000,510.9,2 +2400000,531.4,2 +2500000,537.3,2 +2600000,551.1,2 +2700000,562.1,2 +2800000,572.9,2 +2900000,586.8,2 +3000000,600.0,2 +3100000,560.3,2 +3200000,602.9,2 +3300000,611.5,2 +3400000,626.3,2 +3500000,629.3,2 +3600000,634.0,2 +3700000,648.8,2 +3800000,623.8,2 +3900000,643.9,2 +4000000,655.6,2 +4100000,669.7,2 +4200000,666.4,2 +4300000,668.2,2 +4400000,670.7,2 +4500000,683.0,2 +4600000,700.3,2 +4700000,691.5,2 +4800000,691.1,2 +4900000,702.4,2 +5000000,701.8,2 +5100000,679.9,2 +5200000,709.2,2 +5300000,715.6,2 +5400000,733.2,2 +5500000,725.2,2 +5600000,725.8,2 +5700000,721.6,2 +5800000,747.1,2 +5900000,750.1,2 +6000000,743.6,2 +6100000,701.2,2 +6200000,743.6,2 +6300000,768.2,2 +6400000,756.2,2 +6500000,783.1,2 +6600000,806.5,2 +6700000,740.4,2 +6800000,781.7,2 +6900000,806.4,2 +7000000,802.4,2 +7100000,797.2,2 +7200000,799.0,2 +7300000,817.3,2 +7400000,821.9,2 +7500000,809.4,2 +7600000,847.8,2 +7700000,861.6,2 +7800000,825.3,2 +7900000,853.4,2 +8000000,857.5,2 +8100000,855.0,2 +8200000,868.9,2 +8300000,881.1,2 +8400000,832.6,2 +8500000,807.9,2 +8600000,832.1,2 +8700000,843.4,2 +8800000,836.3,2 +8900000,837.4,2 +9000000,867.5,2 +9100000,860.2,2 +9200000,876.6,2 +9300000,849.5,2 +9400000,878.5,2 +9500000,874.0,2 +9600000,860.9,2 +9700000,901.8,2 +9800000,896.1,2 +9900000,866.1,2 +10000000,873.3,2 +10100000,882.8,2 +10200000,872.6,2 +10300000,896.2,2 +10400000,861.1,2 +10500000,895.9,2 +10600000,878.6,2 +10700000,891.2,2 +10800000,885.4,2 +10900000,890.4,2 +11000000,878.3,2 +11100000,895.6,2 +11200000,882.7,2 +11300000,891.3,2 +11400000,837.0,2 +11500000,870.3,2 +11600000,889.4,2 +11700000,890.4,2 +11800000,872.1,2 +11900000,883.8,2 +12000000,878.4,2 +12100000,882.4,2 +12200000,888.6,2 +12300000,900.0,2 +12400000,891.4,2 +12500000,905.6,2 +12600000,882.9,2 +12700000,888.8,2 +12800000,871.5,2 +12900000,905.9,2 +13000000,894.2,2 +13100000,897.6,2 +13200000,900.3,2 +13300000,893.5,2 +13400000,889.5,2 +13500000,889.3,2 +13600000,898.5,2 +13700000,872.8,2 +13800000,894.2,2 +13900000,895.2,2 +14000000,881.1,2 +0,6.5,1 +100000,39.1,1 +200000,100.9,1 +300000,138.2,1 +400000,136.8,1 +500000,130.1,1 +600000,181.4,1 +700000,153.1,1 +800000,196.1,1 +900000,207.1,1 +1000000,247.7,1 +1100000,245.8,1 +1200000,291.4,1 +1300000,316.9,1 +1400000,56.4,1 +1500000,296.4,1 +1600000,403.0,1 +1700000,420.4,1 +1800000,408.1,1 +1900000,456.0,1 +2000000,414.9,1 +2100000,493.8,1 +2200000,503.9,1 +2300000,506.3,1 +2400000,496.8,1 +2500000,538.9,1 +2600000,546.3,1 +2700000,508.4,1 +2800000,565.0,1 +2900000,566.0,1 +3000000,585.3,1 +3100000,588.1,1 +3200000,592.4,1 +3300000,619.6,1 +3400000,625.3,1 +3500000,625.3,1 +3600000,620.4,1 +3700000,647.8,1 +3800000,644.8,1 +3900000,652.5,1 +4000000,706.2,1 +4100000,692.4,1 +4200000,685.7,1 +4300000,702.0,1 +4400000,699.4,1 +4500000,692.1,1 +4600000,710.5,1 +4700000,741.7,1 +4800000,708.2,1 +4900000,728.9,1 +5000000,721.1,1 +5100000,726.2,1 +5200000,692.4,1 +5300000,718.0,1 +5400000,729.2,1 +5500000,748.7,1 +5600000,749.9,1 +5700000,760.7,1 +5800000,764.9,1 +5900000,785.3,1 +6000000,740.1,1 +6100000,739.5,1 +6200000,799.8,1 +6300000,781.7,1 +6400000,802.1,1 +6500000,792.5,1 +6600000,822.7,1 +6700000,805.6,1 +6800000,829.5,1 +6900000,818.6,1 +7000000,814.5,1 +7100000,788.1,1 +7200000,832.1,1 +7300000,810.6,1 +7400000,815.4,1 +7500000,837.7,1 +7600000,842.2,1 +7700000,840.3,1 +7800000,791.3,1 +7900000,831.2,1 +8000000,798.5,1 +8100000,824.4,1 +8200000,838.2,1 +8300000,815.7,1 +8400000,830.2,1 +8500000,841.9,1 +8600000,847.3,1 +8700000,843.9,1 +8800000,850.2,1 +8900000,842.8,1 +9000000,848.9,1 +9100000,845.9,1 +9200000,866.2,1 +9300000,860.5,1 +9400000,863.4,1 +9500000,826.2,1 +9600000,850.6,1 +9700000,861.1,1 +9800000,848.1,1 +9900000,851.4,1 +10000000,843.8,1 +10100000,861.4,1 +10200000,852.1,1 +10300000,860.3,1 +10400000,844.6,1 +10500000,848.6,1 +10600000,860.9,1 +10700000,856.6,1 +10800000,850.8,1 +10900000,854.6,1 +11000000,845.8,1 +11100000,788.5,1 +11200000,837.7,1 +11300000,846.3,1 +11400000,847.0,1 +11500000,869.9,1 +11600000,861.3,1 +11700000,847.4,1 +11800000,841.6,1 +11900000,856.3,1 +12000000,827.8,1 +12100000,839.8,1 +12200000,863.9,1 +12300000,859.3,1 +12400000,857.4,1 +12500000,872.0,1 +12600000,852.0,1 +12700000,849.6,1 +12800000,853.9,1 +12900000,854.9,1 +13000000,827.8,1 +13100000,848.9,1 +13200000,863.5,1 +13300000,848.3,1 +13400000,873.4,1 +13500000,878.6,1 +13600000,865.5,1 +13700000,855.7,1 +13800000,864.8,1 +13900000,847.2,1 +14000000,859.1,1 +0,6.4,3 +100000,33.1,3 +200000,13.0,3 +300000,141.7,3 +400000,140.0,3 +500000,126.3,3 +600000,204.0,3 +700000,182.4,3 +800000,21.6,3 +900000,27.7,3 +1000000,27.2,3 +1100000,53.9,3 +1200000,79.8,3 +1300000,43.8,3 +1400000,41.0,3 +1500000,245.1,3 +1600000,250.7,3 +1700000,325.9,3 +1800000,235.0,3 +1900000,326.6,3 +2000000,362.9,3 +2100000,427.7,3 +2200000,482.3,3 +2300000,467.6,3 +2400000,455.0,3 +2500000,485.8,3 +2600000,543.9,3 +2700000,527.2,3 +2800000,523.2,3 +2900000,576.5,3 +3000000,413.3,3 +3100000,530.1,3 +3200000,592.5,3 +3300000,515.1,3 +3400000,598.3,3 +3500000,513.9,3 +3600000,661.6,3 +3700000,653.9,3 +3800000,676.8,3 +3900000,652.4,3 +4000000,685.3,3 +4100000,653.8,3 +4200000,705.3,3 +4300000,656.8,3 +4400000,628.7,3 +4500000,721.3,3 +4600000,731.9,3 +4700000,770.5,3 +4800000,739.4,3 +4900000,716.3,3 +5000000,778.7,3 +5100000,787.4,3 +5200000,791.5,3 +5300000,791.5,3 +5400000,796.4,3 +5500000,754.3,3 +5600000,804.4,3 +5700000,791.9,3 +5800000,791.1,3 +5900000,811.8,3 +6000000,781.7,3 +6100000,787.4,3 +6200000,790.8,3 +6300000,774.2,3 +6400000,820.6,3 +6500000,817.9,3 +6600000,784.3,3 +6700000,829.4,3 +6800000,773.1,3 +6900000,821.5,3 +7000000,800.1,3 +7100000,831.4,3 +7200000,804.0,3 +7300000,819.5,3 +7400000,807.0,3 +7500000,850.3,3 +7600000,832.3,3 +7700000,856.1,3 +7800000,846.5,3 +7900000,830.2,3 +8000000,800.4,3 +8100000,843.9,3 +8200000,859.8,3 +8300000,858.0,3 +8400000,864.8,3 +8500000,842.2,3 +8600000,844.1,3 +8700000,852.1,3 +8800000,858.2,3 +8900000,830.2,3 +9000000,851.9,3 +9100000,842.8,3 +9200000,798.7,3 +9300000,861.9,3 +9400000,847.2,3 +9500000,851.6,3 +9600000,870.4,3 +9700000,850.0,3 +9800000,852.3,3 +9900000,846.5,3 +10000000,865.8,3 +10100000,876.8,3 +10200000,843.0,3 +10300000,776.8,3 +10400000,861.0,3 +10500000,871.8,3 +10600000,841.2,3 +10700000,859.3,3 +10800000,876.6,3 +10900000,836.2,3 +11000000,849.6,3 +11100000,847.0,3 +11200000,863.5,3 +11300000,848.0,3 +11400000,871.4,3 +11500000,856.9,3 +11600000,831.0,3 +11700000,880.2,3 +11800000,872.9,3 +11900000,866.9,3 +12000000,868.8,3 +12100000,855.2,3 +12200000,807.0,3 +12300000,863.2,3 +12400000,836.8,3 +12500000,876.0,3 +12600000,856.1,3 +12700000,873.3,3 +12800000,874.7,3 +12900000,883.7,3 +13000000,872.1,3 +13100000,885.1,3 +13200000,853.0,3 +13300000,850.9,3 +13400000,874.5,3 +13500000,851.0,3 +13600000,885.7,3 +13700000,870.0,3 +13800000,868.6,3 +13900000,862.4,3 diff --git a/results/dog-stand.csv b/results/dog-stand.csv new file mode 100644 index 0000000..c1ddbda --- /dev/null +++ b/results/dog-stand.csv @@ -0,0 +1,423 @@ +step,reward,seed +0,19.4,2 +100000,48.3,2 +200000,543.1,2 +300000,701.5,2 +400000,648.1,2 +500000,673.6,2 +600000,640.9,2 +700000,685.1,2 +800000,760.3,2 +900000,811.2,2 +1000000,830.8,2 +1100000,834.7,2 +1200000,812.1,2 +1300000,829.8,2 +1400000,797.2,2 +1500000,862.6,2 +1600000,890.4,2 +1700000,894.6,2 +1800000,925.7,2 +1900000,909.3,2 +2000000,899.0,2 +2100000,849.5,2 +2200000,962.4,2 +2300000,971.7,2 +2400000,973.5,2 +2500000,967.8,2 +2600000,959.4,2 +2700000,963.5,2 +2800000,936.0,2 +2900000,974.4,2 +3000000,954.8,2 +3100000,961.9,2 +3200000,975.8,2 +3300000,869.3,2 +3400000,939.0,2 +3500000,897.8,2 +3600000,972.4,2 +3700000,978.7,2 +3800000,972.6,2 +3900000,969.4,2 +4000000,965.3,2 +4100000,959.5,2 +4200000,936.4,2 +4300000,971.4,2 +4400000,935.1,2 +4500000,975.4,2 +4600000,976.4,2 +4700000,981.5,2 +4800000,970.8,2 +4900000,968.5,2 +5000000,977.8,2 +5100000,972.8,2 +5200000,968.5,2 +5300000,959.6,2 +5400000,972.9,2 +5500000,981.2,2 +5600000,946.1,2 +5700000,980.7,2 +5800000,956.0,2 +5900000,966.5,2 +6000000,967.9,2 +6100000,978.0,2 +6200000,979.3,2 +6300000,973.1,2 +6400000,979.6,2 +6500000,967.4,2 +6600000,981.8,2 +6700000,975.6,2 +6800000,967.9,2 +6900000,964.0,2 +7000000,974.0,2 +7100000,981.5,2 +7200000,976.7,2 +7300000,984.9,2 +7400000,961.4,2 +7500000,969.0,2 +7600000,979.5,2 +7700000,971.1,2 +7800000,958.1,2 +7900000,977.1,2 +8000000,965.5,2 +8100000,984.5,2 +8200000,736.2,2 +8300000,965.3,2 +8400000,981.3,2 +8500000,983.4,2 +8600000,983.6,2 +8700000,976.0,2 +8800000,981.0,2 +8900000,965.3,2 +9000000,972.5,2 +9100000,980.6,2 +9200000,942.9,2 +9300000,932.5,2 +9400000,972.7,2 +9500000,979.1,2 +9600000,947.6,2 +9700000,960.6,2 +9800000,982.0,2 +9900000,933.9,2 +10000000,897.3,2 +10100000,912.8,2 +10200000,940.1,2 +10300000,980.0,2 +10400000,945.2,2 +10500000,976.0,2 +10600000,974.2,2 +10700000,980.6,2 +10800000,985.3,2 +10900000,983.5,2 +11000000,982.8,2 +11100000,974.9,2 +11200000,952.8,2 +11300000,965.2,2 +11400000,980.5,2 +11500000,966.1,2 +11600000,954.6,2 +11700000,965.1,2 +11800000,959.9,2 +11900000,979.4,2 +12000000,920.8,2 +12100000,965.9,2 +12200000,974.6,2 +12300000,972.2,2 +12400000,968.1,2 +12500000,974.3,2 +12600000,968.8,2 +12700000,976.7,2 +12800000,978.7,2 +12900000,982.1,2 +13000000,982.6,2 +13100000,955.4,2 +13200000,954.6,2 +13300000,958.6,2 +13400000,928.9,2 +13500000,972.6,2 +13600000,951.2,2 +13700000,980.5,2 +13800000,948.8,2 +13900000,925.8,2 +14000000,966.9,2 +0,27.7,1 +100000,274.0,1 +200000,562.1,1 +300000,782.0,1 +400000,787.1,1 +500000,765.0,1 +600000,830.9,1 +700000,796.2,1 +800000,757.5,1 +900000,796.4,1 +1000000,775.6,1 +1100000,816.6,1 +1200000,925.3,1 +1300000,902.6,1 +1400000,934.1,1 +1500000,783.5,1 +1600000,932.5,1 +1700000,897.7,1 +1800000,942.3,1 +1900000,944.0,1 +2000000,944.6,1 +2100000,944.3,1 +2200000,957.1,1 +2300000,907.3,1 +2400000,938.2,1 +2500000,906.3,1 +2600000,964.2,1 +2700000,957.2,1 +2800000,888.0,1 +2900000,967.0,1 +3000000,967.6,1 +3100000,912.4,1 +3200000,960.0,1 +3300000,926.8,1 +3400000,953.2,1 +3500000,963.0,1 +3600000,937.9,1 +3700000,934.8,1 +3800000,952.7,1 +3900000,965.1,1 +4000000,952.7,1 +4100000,980.4,1 +4200000,934.2,1 +4300000,962.5,1 +4400000,950.8,1 +4500000,970.8,1 +4600000,952.9,1 +4700000,967.1,1 +4800000,925.7,1 +4900000,983.3,1 +5000000,956.3,1 +5100000,936.2,1 +5200000,960.6,1 +5300000,973.5,1 +5400000,983.5,1 +5500000,940.4,1 +5600000,979.4,1 +5700000,981.5,1 +5800000,985.1,1 +5900000,981.6,1 +6000000,957.9,1 +6100000,957.1,1 +6200000,976.3,1 +6300000,897.7,1 +6400000,976.6,1 +6500000,975.6,1 +6600000,982.9,1 +6700000,984.1,1 +6800000,976.3,1 +6900000,986.8,1 +7000000,944.0,1 +7100000,975.7,1 +7200000,935.4,1 +7300000,885.8,1 +7400000,935.2,1 +7500000,975.5,1 +7600000,985.8,1 +7700000,922.1,1 +7800000,983.4,1 +7900000,989.7,1 +8000000,972.3,1 +8100000,950.2,1 +8200000,928.0,1 +8300000,963.0,1 +8400000,987.7,1 +8500000,961.3,1 +8600000,975.4,1 +8700000,954.5,1 +8800000,962.6,1 +8900000,964.4,1 +9000000,830.2,1 +9100000,986.4,1 +9200000,804.8,1 +9300000,771.5,1 +9400000,892.2,1 +9500000,962.1,1 +9600000,966.7,1 +9700000,367.7,1 +9800000,961.0,1 +9900000,977.0,1 +10000000,891.7,1 +10100000,841.9,1 +10200000,941.9,1 +10300000,955.1,1 +10400000,908.7,1 +10500000,771.3,1 +10600000,848.8,1 +10700000,955.3,1 +10800000,799.4,1 +10900000,965.5,1 +11000000,963.1,1 +11100000,973.7,1 +11200000,952.2,1 +11300000,960.4,1 +11400000,924.0,1 +11500000,932.3,1 +11600000,969.0,1 +11700000,935.9,1 +11800000,946.5,1 +11900000,948.7,1 +12000000,983.0,1 +12100000,978.4,1 +12200000,983.5,1 +12300000,915.5,1 +12400000,755.0,1 +12500000,877.3,1 +12600000,934.9,1 +12700000,938.3,1 +12800000,883.1,1 +12900000,938.6,1 +13000000,922.0,1 +13100000,955.2,1 +13200000,941.8,1 +13300000,964.1,1 +13400000,964.1,1 +13500000,970.4,1 +13600000,958.9,1 +13700000,937.8,1 +13800000,992.2,1 +13900000,983.4,1 +14000000,992.7,1 +0,29.2,3 +100000,117.3,3 +200000,472.4,3 +300000,719.6,3 +400000,586.0,3 +500000,661.4,3 +600000,601.4,3 +700000,632.4,3 +800000,745.8,3 +900000,570.8,3 +1000000,790.4,3 +1100000,773.1,3 +1200000,843.9,3 +1300000,866.8,3 +1400000,938.4,3 +1500000,887.3,3 +1600000,952.6,3 +1700000,948.8,3 +1800000,941.2,3 +1900000,937.0,3 +2000000,956.4,3 +2100000,922.2,3 +2200000,959.1,3 +2300000,967.1,3 +2400000,950.9,3 +2500000,941.1,3 +2600000,963.7,3 +2700000,963.3,3 +2800000,968.9,3 +2900000,973.4,3 +3000000,968.3,3 +3100000,954.7,3 +3200000,960.8,3 +3300000,962.9,3 +3400000,973.3,3 +3500000,969.2,3 +3600000,975.2,3 +3700000,956.6,3 +3800000,880.4,3 +3900000,967.5,3 +4000000,930.1,3 +4100000,980.6,3 +4200000,964.5,3 +4300000,440.5,3 +4400000,966.0,3 +4500000,977.7,3 +4600000,973.5,3 +4700000,968.6,3 +4800000,979.6,3 +4900000,972.8,3 +5000000,982.3,3 +5100000,960.2,3 +5200000,980.1,3 +5300000,980.0,3 +5400000,971.3,3 +5500000,712.5,3 +5600000,977.1,3 +5700000,975.0,3 +5800000,807.6,3 +5900000,958.3,3 +6000000,972.3,3 +6100000,961.6,3 +6200000,977.1,3 +6300000,844.1,3 +6400000,969.0,3 +6500000,952.7,3 +6600000,956.7,3 +6700000,976.4,3 +6800000,978.8,3 +6900000,449.8,3 +7000000,976.9,3 +7100000,980.3,3 +7200000,962.5,3 +7300000,973.1,3 +7400000,964.9,3 +7500000,977.2,3 +7600000,977.5,3 +7700000,977.7,3 +7800000,967.6,3 +7900000,977.3,3 +8000000,962.6,3 +8100000,980.8,3 +8200000,948.2,3 +8300000,963.2,3 +8400000,823.4,3 +8500000,971.8,3 +8600000,972.2,3 +8700000,974.2,3 +8800000,973.9,3 +8900000,975.3,3 +9000000,979.0,3 +9100000,960.7,3 +9200000,972.4,3 +9300000,982.8,3 +9400000,957.0,3 +9500000,484.3,3 +9600000,923.4,3 +9700000,968.2,3 +9800000,980.6,3 +9900000,978.3,3 +10000000,982.6,3 +10100000,978.6,3 +10200000,978.3,3 +10300000,964.2,3 +10400000,980.7,3 +10500000,984.1,3 +10600000,976.0,3 +10700000,968.6,3 +10800000,778.3,3 +10900000,975.8,3 +11000000,940.0,3 +11100000,584.9,3 +11200000,965.2,3 +11300000,964.9,3 +11400000,981.9,3 +11500000,976.5,3 +11600000,952.8,3 +11700000,979.5,3 +11800000,975.6,3 +11900000,962.7,3 +12000000,976.1,3 +12100000,813.0,3 +12200000,965.7,3 +12300000,970.2,3 +12400000,980.4,3 +12500000,983.0,3 +12600000,972.9,3 +12700000,973.5,3 +12800000,984.9,3 +12900000,981.7,3 +13000000,981.0,3 +13100000,971.7,3 +13200000,982.1,3 +13300000,985.2,3 +13400000,976.2,3 +13500000,984.8,3 +13600000,966.7,3 +13700000,979.6,3 +13800000,910.8,3 +13900000,984.9,3 diff --git a/results/dog-trot.csv b/results/dog-trot.csv new file mode 100644 index 0000000..4539d99 --- /dev/null +++ b/results/dog-trot.csv @@ -0,0 +1,423 @@ +step,reward,seed +0,6.2,2 +100000,5.0,2 +200000,89.2,2 +300000,205.5,2 +400000,237.0,2 +500000,66.2,2 +600000,283.3,2 +700000,319.7,2 +800000,400.3,2 +900000,342.8,2 +1000000,481.2,2 +1100000,552.0,2 +1200000,567.0,2 +1300000,698.5,2 +1400000,730.2,2 +1500000,712.6,2 +1600000,858.0,2 +1700000,886.4,2 +1800000,893.2,2 +1900000,889.4,2 +2000000,906.8,2 +2100000,905.4,2 +2200000,876.7,2 +2300000,380.4,2 +2400000,914.4,2 +2500000,915.5,2 +2600000,910.4,2 +2700000,901.0,2 +2800000,896.8,2 +2900000,917.4,2 +3000000,909.6,2 +3100000,917.9,2 +3200000,741.7,2 +3300000,926.8,2 +3400000,912.5,2 +3500000,924.6,2 +3600000,899.1,2 +3700000,908.7,2 +3800000,923.2,2 +3900000,933.3,2 +4000000,949.3,2 +4100000,902.4,2 +4200000,940.7,2 +4300000,935.4,2 +4400000,938.5,2 +4500000,932.6,2 +4600000,931.3,2 +4700000,904.1,2 +4800000,926.7,2 +4900000,931.0,2 +5000000,942.1,2 +5100000,941.5,2 +5200000,935.4,2 +5300000,940.0,2 +5400000,951.6,2 +5500000,940.0,2 +5600000,939.7,2 +5700000,930.3,2 +5800000,925.7,2 +5900000,946.4,2 +6000000,954.2,2 +6100000,936.6,2 +6200000,919.9,2 +6300000,956.2,2 +6400000,932.2,2 +6500000,945.2,2 +6600000,953.3,2 +6700000,940.0,2 +6800000,930.0,2 +6900000,946.0,2 +7000000,947.6,2 +7100000,954.0,2 +7200000,956.9,2 +7300000,951.3,2 +7400000,942.5,2 +7500000,949.4,2 +7600000,928.2,2 +7700000,939.3,2 +7800000,957.4,2 +7900000,955.5,2 +8000000,935.6,2 +8100000,948.1,2 +8200000,937.6,2 +8300000,944.0,2 +8400000,922.9,2 +8500000,952.7,2 +8600000,953.5,2 +8700000,937.5,2 +8800000,937.0,2 +8900000,898.2,2 +9000000,953.8,2 +9100000,941.4,2 +9200000,918.1,2 +9300000,956.3,2 +9400000,947.5,2 +9500000,958.3,2 +9600000,949.1,2 +9700000,959.5,2 +9800000,961.4,2 +9900000,948.2,2 +10000000,952.1,2 +10100000,957.1,2 +10200000,960.9,2 +10300000,945.0,2 +10400000,965.7,2 +10500000,952.5,2 +10600000,948.6,2 +10700000,967.1,2 +10800000,955.0,2 +10900000,955.2,2 +11000000,961.3,2 +11100000,927.4,2 +11200000,958.4,2 +11300000,952.8,2 +11400000,950.7,2 +11500000,950.9,2 +11600000,957.5,2 +11700000,934.0,2 +11800000,969.7,2 +11900000,953.5,2 +12000000,936.9,2 +12100000,956.4,2 +12200000,952.2,2 +12300000,952.5,2 +12400000,959.4,2 +12500000,969.5,2 +12600000,944.9,2 +12700000,962.5,2 +12800000,962.3,2 +12900000,957.5,2 +13000000,962.9,2 +13100000,954.2,2 +13200000,946.5,2 +13300000,955.7,2 +13400000,962.0,2 +13500000,965.3,2 +13600000,962.2,2 +13700000,951.1,2 +13800000,960.5,2 +13900000,946.2,2 +14000000,923.5,2 +0,7.9,1 +100000,10.4,1 +200000,97.2,1 +300000,100.9,1 +400000,180.2,1 +500000,319.2,1 +600000,330.5,1 +700000,385.9,1 +800000,55.9,1 +900000,364.9,1 +1000000,512.7,1 +1100000,618.9,1 +1200000,597.0,1 +1300000,577.6,1 +1400000,616.0,1 +1500000,775.3,1 +1600000,712.2,1 +1700000,800.4,1 +1800000,861.5,1 +1900000,860.2,1 +2000000,854.1,1 +2100000,844.9,1 +2200000,857.0,1 +2300000,886.9,1 +2400000,871.7,1 +2500000,883.1,1 +2600000,857.4,1 +2700000,901.9,1 +2800000,903.4,1 +2900000,919.1,1 +3000000,879.8,1 +3100000,933.1,1 +3200000,927.0,1 +3300000,941.1,1 +3400000,936.4,1 +3500000,946.8,1 +3600000,931.9,1 +3700000,964.8,1 +3800000,945.9,1 +3900000,957.7,1 +4000000,955.6,1 +4100000,972.4,1 +4200000,955.1,1 +4300000,944.0,1 +4400000,965.0,1 +4500000,949.7,1 +4600000,962.7,1 +4700000,967.8,1 +4800000,969.2,1 +4900000,951.2,1 +5000000,956.1,1 +5100000,950.2,1 +5200000,969.7,1 +5300000,963.2,1 +5400000,954.2,1 +5500000,952.0,1 +5600000,963.0,1 +5700000,949.1,1 +5800000,958.3,1 +5900000,967.1,1 +6000000,948.7,1 +6100000,965.8,1 +6200000,962.0,1 +6300000,950.3,1 +6400000,934.8,1 +6500000,945.3,1 +6600000,959.1,1 +6700000,963.5,1 +6800000,963.2,1 +6900000,955.0,1 +7000000,963.9,1 +7100000,981.1,1 +7200000,959.5,1 +7300000,970.3,1 +7400000,962.3,1 +7500000,952.0,1 +7600000,970.2,1 +7700000,961.3,1 +7800000,950.9,1 +7900000,962.9,1 +8000000,970.2,1 +8100000,957.1,1 +8200000,968.6,1 +8300000,957.3,1 +8400000,970.4,1 +8500000,967.5,1 +8600000,946.5,1 +8700000,985.0,1 +8800000,962.9,1 +8900000,963.0,1 +9000000,979.4,1 +9100000,974.5,1 +9200000,967.1,1 +9300000,962.8,1 +9400000,953.4,1 +9500000,962.3,1 +9600000,964.7,1 +9700000,978.4,1 +9800000,972.7,1 +9900000,978.8,1 +10000000,980.2,1 +10100000,967.4,1 +10200000,956.2,1 +10300000,973.3,1 +10400000,967.1,1 +10500000,946.9,1 +10600000,965.0,1 +10700000,977.9,1 +10800000,965.2,1 +10900000,962.8,1 +11000000,969.8,1 +11100000,967.3,1 +11200000,961.1,1 +11300000,976.8,1 +11400000,971.7,1 +11500000,980.8,1 +11600000,977.5,1 +11700000,970.3,1 +11800000,976.3,1 +11900000,984.2,1 +12000000,956.0,1 +12100000,968.9,1 +12200000,972.5,1 +12300000,970.6,1 +12400000,978.7,1 +12500000,970.5,1 +12600000,969.9,1 +12700000,976.4,1 +12800000,985.7,1 +12900000,974.6,1 +13000000,953.3,1 +13100000,971.4,1 +13200000,979.2,1 +13300000,979.0,1 +13400000,963.3,1 +13500000,942.2,1 +13600000,976.3,1 +13700000,972.2,1 +13800000,967.1,1 +13900000,978.7,1 +14000000,977.7,1 +0,7.8,3 +100000,21.3,3 +200000,26.6,3 +300000,110.6,3 +400000,27.2,3 +500000,11.3,3 +600000,18.4,3 +700000,307.3,3 +800000,348.9,3 +900000,483.2,3 +1000000,506.2,3 +1100000,560.3,3 +1200000,658.6,3 +1300000,620.5,3 +1400000,646.9,3 +1500000,829.0,3 +1600000,840.5,3 +1700000,859.7,3 +1800000,857.1,3 +1900000,855.7,3 +2000000,892.3,3 +2100000,873.9,3 +2200000,886.7,3 +2300000,860.6,3 +2400000,875.2,3 +2500000,908.1,3 +2600000,912.8,3 +2700000,918.9,3 +2800000,912.7,3 +2900000,839.0,3 +3000000,909.8,3 +3100000,901.0,3 +3200000,917.7,3 +3300000,835.5,3 +3400000,930.9,3 +3500000,573.2,3 +3600000,822.6,3 +3700000,914.4,3 +3800000,922.1,3 +3900000,913.6,3 +4000000,935.8,3 +4100000,932.7,3 +4200000,863.2,3 +4300000,659.9,3 +4400000,943.4,3 +4500000,904.9,3 +4600000,915.3,3 +4700000,936.5,3 +4800000,920.8,3 +4900000,946.6,3 +5000000,951.0,3 +5100000,950.7,3 +5200000,935.5,3 +5300000,865.6,3 +5400000,839.9,3 +5500000,880.2,3 +5600000,861.9,3 +5700000,947.2,3 +5800000,933.2,3 +5900000,956.0,3 +6000000,854.7,3 +6100000,587.7,3 +6200000,966.4,3 +6300000,927.8,3 +6400000,966.6,3 +6500000,959.3,3 +6600000,940.5,3 +6700000,960.0,3 +6800000,879.2,3 +6900000,941.0,3 +7000000,856.5,3 +7100000,965.0,3 +7200000,813.5,3 +7300000,851.1,3 +7400000,674.0,3 +7500000,952.5,3 +7600000,859.0,3 +7700000,808.6,3 +7800000,914.9,3 +7900000,944.1,3 +8000000,957.2,3 +8100000,669.0,3 +8200000,954.2,3 +8300000,860.4,3 +8400000,958.5,3 +8500000,768.2,3 +8600000,933.6,3 +8700000,951.7,3 +8800000,920.4,3 +8900000,854.2,3 +9000000,958.2,3 +9100000,961.0,3 +9200000,950.7,3 +9300000,956.1,3 +9400000,954.5,3 +9500000,955.9,3 +9600000,954.4,3 +9700000,913.0,3 +9800000,961.0,3 +9900000,923.8,3 +10000000,946.0,3 +10100000,932.3,3 +10200000,930.7,3 +10300000,941.4,3 +10400000,950.2,3 +10500000,862.1,3 +10600000,964.4,3 +10700000,866.4,3 +10800000,931.8,3 +10900000,913.5,3 +11000000,946.8,3 +11100000,955.4,3 +11200000,964.0,3 +11300000,953.0,3 +11400000,950.3,3 +11500000,960.0,3 +11600000,963.3,3 +11700000,938.7,3 +11800000,953.4,3 +11900000,952.2,3 +12000000,937.9,3 +12100000,932.7,3 +12200000,915.5,3 +12300000,949.4,3 +12400000,941.5,3 +12500000,920.2,3 +12600000,943.7,3 +12700000,954.1,3 +12800000,960.1,3 +12900000,964.4,3 +13000000,915.6,3 +13100000,962.2,3 +13200000,940.1,3 +13300000,953.0,3 +13400000,955.3,3 +13500000,954.0,3 +13600000,945.5,3 +13700000,944.8,3 +13800000,954.9,3 +13900000,957.2,3 diff --git a/results/dog-walk.csv b/results/dog-walk.csv new file mode 100644 index 0000000..8835a29 --- /dev/null +++ b/results/dog-walk.csv @@ -0,0 +1,423 @@ +step,reward,seed +0,6.7,2 +100000,36.2,2 +200000,110.3,2 +300000,349.0,2 +400000,453.6,2 +500000,554.0,2 +600000,568.2,2 +700000,692.7,2 +800000,714.1,2 +900000,793.3,2 +1000000,787.0,2 +1100000,851.3,2 +1200000,908.4,2 +1300000,926.6,2 +1400000,930.1,2 +1500000,943.3,2 +1600000,942.4,2 +1700000,943.1,2 +1800000,946.8,2 +1900000,944.2,2 +2000000,929.0,2 +2100000,946.9,2 +2200000,952.9,2 +2300000,958.0,2 +2400000,959.7,2 +2500000,953.5,2 +2600000,952.3,2 +2700000,962.8,2 +2800000,967.8,2 +2900000,953.7,2 +3000000,961.7,2 +3100000,955.1,2 +3200000,957.8,2 +3300000,963.7,2 +3400000,959.6,2 +3500000,952.4,2 +3600000,950.5,2 +3700000,959.2,2 +3800000,938.9,2 +3900000,925.2,2 +4000000,964.3,2 +4100000,960.8,2 +4200000,947.3,2 +4300000,942.3,2 +4400000,945.0,2 +4500000,940.8,2 +4600000,953.9,2 +4700000,941.2,2 +4800000,949.9,2 +4900000,971.4,2 +5000000,967.5,2 +5100000,958.8,2 +5200000,965.3,2 +5300000,915.7,2 +5400000,950.1,2 +5500000,959.4,2 +5600000,962.4,2 +5700000,967.3,2 +5800000,938.5,2 +5900000,949.7,2 +6000000,960.9,2 +6100000,955.5,2 +6200000,950.7,2 +6300000,948.2,2 +6400000,943.8,2 +6500000,967.4,2 +6600000,948.3,2 +6700000,953.7,2 +6800000,962.2,2 +6900000,952.9,2 +7000000,964.3,2 +7100000,956.8,2 +7200000,963.3,2 +7300000,967.1,2 +7400000,956.6,2 +7500000,949.5,2 +7600000,965.8,2 +7700000,944.2,2 +7800000,956.7,2 +7900000,946.9,2 +8000000,961.6,2 +8100000,957.2,2 +8200000,965.6,2 +8300000,951.5,2 +8400000,958.6,2 +8500000,957.8,2 +8600000,960.7,2 +8700000,961.7,2 +8800000,957.6,2 +8900000,957.9,2 +9000000,959.1,2 +9100000,962.0,2 +9200000,963.9,2 +9300000,962.9,2 +9400000,963.2,2 +9500000,936.8,2 +9600000,940.8,2 +9700000,972.9,2 +9800000,971.8,2 +9900000,963.8,2 +10000000,967.0,2 +10100000,965.1,2 +10200000,960.4,2 +10300000,957.5,2 +10400000,968.4,2 +10500000,953.7,2 +10600000,952.9,2 +10700000,966.9,2 +10800000,937.2,2 +10900000,958.9,2 +11000000,966.1,2 +11100000,947.8,2 +11200000,971.9,2 +11300000,966.9,2 +11400000,967.4,2 +11500000,948.2,2 +11600000,967.8,2 +11700000,973.5,2 +11800000,959.3,2 +11900000,963.5,2 +12000000,975.1,2 +12100000,959.8,2 +12200000,978.4,2 +12300000,978.5,2 +12400000,962.4,2 +12500000,971.9,2 +12600000,955.3,2 +12700000,853.4,2 +12800000,980.0,2 +12900000,973.4,2 +13000000,972.9,2 +13100000,968.9,2 +13200000,954.2,2 +13300000,967.7,2 +13400000,973.5,2 +13500000,967.2,2 +13600000,962.9,2 +13700000,964.6,2 +13800000,975.4,2 +13900000,967.5,2 +14000000,976.8,2 +0,8.8,1 +100000,48.6,1 +200000,232.3,1 +300000,334.4,1 +400000,405.7,1 +500000,439.7,1 +600000,658.5,1 +700000,628.2,1 +800000,638.0,1 +900000,558.6,1 +1000000,632.0,1 +1100000,831.4,1 +1200000,908.1,1 +1300000,918.1,1 +1400000,916.3,1 +1500000,933.3,1 +1600000,939.8,1 +1700000,949.8,1 +1800000,949.4,1 +1900000,944.2,1 +2000000,946.3,1 +2100000,942.1,1 +2200000,949.7,1 +2300000,941.6,1 +2400000,940.1,1 +2500000,935.3,1 +2600000,948.7,1 +2700000,946.6,1 +2800000,942.8,1 +2900000,945.0,1 +3000000,955.1,1 +3100000,953.6,1 +3200000,944.2,1 +3300000,938.2,1 +3400000,958.8,1 +3500000,947.3,1 +3600000,965.5,1 +3700000,963.9,1 +3800000,946.6,1 +3900000,962.9,1 +4000000,919.9,1 +4100000,941.4,1 +4200000,931.4,1 +4300000,943.9,1 +4400000,913.2,1 +4500000,946.0,1 +4600000,940.2,1 +4700000,954.5,1 +4800000,954.9,1 +4900000,951.3,1 +5000000,899.5,1 +5100000,958.9,1 +5200000,847.1,1 +5300000,960.7,1 +5400000,957.8,1 +5500000,952.6,1 +5600000,888.1,1 +5700000,960.3,1 +5800000,943.0,1 +5900000,950.2,1 +6000000,972.5,1 +6100000,961.5,1 +6200000,952.3,1 +6300000,955.0,1 +6400000,956.6,1 +6500000,892.8,1 +6600000,940.3,1 +6700000,959.9,1 +6800000,967.9,1 +6900000,942.4,1 +7000000,865.7,1 +7100000,962.5,1 +7200000,969.7,1 +7300000,966.4,1 +7400000,958.6,1 +7500000,970.3,1 +7600000,962.0,1 +7700000,962.8,1 +7800000,965.4,1 +7900000,973.4,1 +8000000,956.3,1 +8100000,964.1,1 +8200000,958.2,1 +8300000,975.6,1 +8400000,967.2,1 +8500000,970.5,1 +8600000,914.2,1 +8700000,967.2,1 +8800000,969.6,1 +8900000,968.1,1 +9000000,968.7,1 +9100000,970.6,1 +9200000,950.9,1 +9300000,968.2,1 +9400000,969.6,1 +9500000,967.4,1 +9600000,963.4,1 +9700000,975.4,1 +9800000,969.0,1 +9900000,975.9,1 +10000000,961.9,1 +10100000,969.5,1 +10200000,969.4,1 +10300000,944.1,1 +10400000,969.7,1 +10500000,973.0,1 +10600000,962.8,1 +10700000,977.7,1 +10800000,938.5,1 +10900000,967.4,1 +11000000,971.8,1 +11100000,964.1,1 +11200000,969.4,1 +11300000,962.1,1 +11400000,968.8,1 +11500000,975.6,1 +11600000,971.4,1 +11700000,968.2,1 +11800000,973.3,1 +11900000,913.7,1 +12000000,970.3,1 +12100000,910.0,1 +12200000,972.4,1 +12300000,975.6,1 +12400000,959.0,1 +12500000,955.0,1 +12600000,948.2,1 +12700000,965.9,1 +12800000,953.1,1 +12900000,967.3,1 +13000000,960.5,1 +13100000,921.0,1 +13200000,963.8,1 +13300000,950.3,1 +13400000,965.3,1 +13500000,958.4,1 +13600000,966.4,1 +13700000,966.6,1 +13800000,939.8,1 +13900000,953.3,1 +14000000,974.5,1 +0,8.9,3 +100000,19.1,3 +200000,280.5,3 +300000,427.0,3 +400000,436.3,3 +500000,454.2,3 +600000,707.7,3 +700000,43.9,3 +800000,31.1,3 +900000,139.8,3 +1000000,62.8,3 +1100000,95.6,3 +1200000,98.9,3 +1300000,54.6,3 +1400000,28.9,3 +1500000,284.0,3 +1600000,93.2,3 +1700000,32.3,3 +1800000,165.3,3 +1900000,732.5,3 +2000000,779.7,3 +2100000,373.8,3 +2200000,604.4,3 +2300000,49.0,3 +2400000,633.5,3 +2500000,600.5,3 +2600000,290.0,3 +2700000,673.8,3 +2800000,403.4,3 +2900000,529.8,3 +3000000,642.2,3 +3100000,937.0,3 +3200000,941.1,3 +3300000,957.6,3 +3400000,960.2,3 +3500000,941.5,3 +3600000,960.7,3 +3700000,946.1,3 +3800000,957.0,3 +3900000,884.3,3 +4000000,945.3,3 +4100000,951.3,3 +4200000,894.8,3 +4300000,942.8,3 +4400000,927.1,3 +4500000,939.2,3 +4600000,850.0,3 +4700000,963.8,3 +4800000,869.1,3 +4900000,843.2,3 +5000000,951.1,3 +5100000,943.7,3 +5200000,868.1,3 +5300000,950.3,3 +5400000,939.0,3 +5500000,948.4,3 +5600000,949.9,3 +5700000,966.0,3 +5800000,961.5,3 +5900000,957.4,3 +6000000,962.6,3 +6100000,955.3,3 +6200000,973.1,3 +6300000,953.4,3 +6400000,974.1,3 +6500000,956.1,3 +6600000,936.1,3 +6700000,950.3,3 +6800000,959.8,3 +6900000,966.7,3 +7000000,956.4,3 +7100000,915.2,3 +7200000,967.0,3 +7300000,957.0,3 +7400000,950.9,3 +7500000,847.8,3 +7600000,935.4,3 +7700000,957.6,3 +7800000,957.3,3 +7900000,950.7,3 +8000000,956.6,3 +8100000,967.3,3 +8200000,960.6,3 +8300000,944.6,3 +8400000,969.1,3 +8500000,962.0,3 +8600000,925.1,3 +8700000,937.0,3 +8800000,955.5,3 +8900000,949.2,3 +9000000,947.5,3 +9100000,957.1,3 +9200000,953.5,3 +9300000,960.5,3 +9400000,938.6,3 +9500000,941.7,3 +9600000,972.4,3 +9700000,960.8,3 +9800000,954.9,3 +9900000,958.4,3 +10000000,974.8,3 +10100000,942.1,3 +10200000,957.4,3 +10300000,925.6,3 +10400000,948.5,3 +10500000,971.0,3 +10600000,953.8,3 +10700000,966.3,3 +10800000,934.9,3 +10900000,927.7,3 +11000000,945.7,3 +11100000,943.2,3 +11200000,954.8,3 +11300000,932.0,3 +11400000,948.3,3 +11500000,953.2,3 +11600000,938.5,3 +11700000,972.1,3 +11800000,934.8,3 +11900000,957.9,3 +12000000,943.5,3 +12100000,964.1,3 +12200000,958.2,3 +12300000,946.5,3 +12400000,949.8,3 +12500000,964.8,3 +12600000,954.4,3 +12700000,968.5,3 +12800000,974.0,3 +12900000,967.8,3 +13000000,951.8,3 +13100000,977.3,3 +13200000,952.3,3 +13300000,946.5,3 +13400000,964.6,3 +13500000,968.7,3 +13600000,953.3,3 +13700000,916.5,3 +13800000,954.1,3 +13900000,925.6,3 diff --git a/results/finger-spin.csv b/results/finger-spin.csv new file mode 100644 index 0000000..88d427f --- /dev/null +++ b/results/finger-spin.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,0.0,3 +100000,985.6,3 +200000,978.6,3 +300000,980.3,3 +400000,987.5,3 +500000,982.7,3 +600000,982.5,3 +700000,977.8,3 +800000,985.1,3 +900000,983.7,3 +1000000,984.8,3 +1100000,987.0,3 +1200000,987.4,3 +1300000,986.2,3 +1400000,984.7,3 +1500000,987.5,3 +1600000,981.9,3 +1700000,987.0,3 +1800000,983.2,3 +1900000,986.8,3 +2000000,988.2,3 +2100000,988.3,3 +2200000,985.7,3 +2300000,985.6,3 +2400000,985.3,3 +2500000,988.1,3 +2600000,988.0,3 +2700000,989.8,3 +2800000,983.4,3 +2900000,983.7,3 +3000000,990.1,3 +3100000,981.7,3 +3200000,985.9,3 +3300000,988.5,3 +3400000,988.3,3 +3500000,988.7,3 +3600000,983.8,3 +3700000,986.5,3 +3800000,986.8,3 +3900000,987.9,3 +4000000,991.0,3 +0,0.0,2 +100000,937.3,2 +200000,980.7,2 +300000,982.4,2 +400000,987.0,2 +500000,982.1,2 +600000,983.1,2 +700000,984.1,2 +800000,985.1,2 +900000,989.9,2 +1000000,987.0,2 +1100000,990.3,2 +1200000,986.5,2 +1300000,987.4,2 +1400000,986.9,2 +1500000,990.9,2 +1600000,986.0,2 +1700000,987.3,2 +1800000,988.5,2 +1900000,987.5,2 +2000000,989.7,2 +2100000,989.1,2 +2200000,987.0,2 +2300000,985.8,2 +2400000,988.1,2 +2500000,989.9,2 +2600000,989.4,2 +2700000,989.5,2 +2800000,989.7,2 +2900000,991.4,2 +3000000,991.3,2 +3100000,982.8,2 +3200000,987.5,2 +3300000,990.3,2 +3400000,987.9,2 +3500000,991.0,2 +3600000,991.4,2 +3700000,985.3,2 +3800000,993.2,2 +3900000,989.5,2 +4000000,989.5,2 +0,0.0,1 +100000,919.6,1 +200000,963.9,1 +300000,979.2,1 +400000,987.5,1 +500000,989.1,1 +600000,985.3,1 +700000,985.9,1 +800000,987.1,1 +900000,990.3,1 +1000000,985.4,1 +1100000,987.5,1 +1200000,889.4,1 +1300000,988.7,1 +1400000,989.3,1 +1500000,989.0,1 +1600000,989.1,1 +1700000,989.5,1 +1800000,986.4,1 +1900000,989.5,1 +2000000,989.2,1 +2100000,904.6,1 +2200000,988.5,1 +2300000,987.3,1 +2400000,986.2,1 +2500000,987.8,1 +2600000,990.4,1 +2700000,989.6,1 +2800000,992.7,1 +2900000,989.1,1 +3000000,989.7,1 +3100000,989.6,1 +3200000,989.5,1 +3300000,988.9,1 +3400000,989.3,1 +3500000,989.7,1 +3600000,989.0,1 +3700000,990.8,1 +3800000,987.7,1 +3900000,984.5,1 +4000000,991.1,1 diff --git a/results/finger-turn-easy.csv b/results/finger-turn-easy.csv new file mode 100644 index 0000000..cec2700 --- /dev/null +++ b/results/finger-turn-easy.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,204.2,3 +100000,485.7,3 +200000,684.7,3 +300000,883.2,3 +400000,823.5,3 +500000,802.0,3 +600000,936.5,3 +700000,883.6,3 +800000,885.3,3 +900000,888.5,3 +1000000,982.1,3 +1100000,984.1,3 +1200000,976.8,3 +1300000,982.3,3 +1400000,978.5,3 +1500000,984.9,3 +1600000,978.1,3 +1700000,981.6,3 +1800000,990.8,3 +1900000,983.6,3 +2000000,980.7,3 +2100000,986.6,3 +2200000,978.9,3 +2300000,979.0,3 +2400000,984.7,3 +2500000,989.3,3 +2600000,980.6,3 +2700000,989.8,3 +2800000,980.7,3 +2900000,986.1,3 +3000000,979.8,3 +3100000,988.5,3 +3200000,986.1,3 +3300000,975.6,3 +3400000,982.3,3 +3500000,986.8,3 +3600000,987.9,3 +3700000,988.8,3 +3800000,892.9,3 +3900000,982.9,3 +4000000,983.4,3 +0,213.4,2 +100000,496.2,2 +200000,743.6,2 +300000,881.2,2 +400000,985.9,2 +500000,885.1,2 +600000,874.6,2 +700000,967.1,2 +800000,958.3,2 +900000,884.9,2 +1000000,977.2,2 +1100000,967.1,2 +1200000,876.8,2 +1300000,979.6,2 +1400000,982.2,2 +1500000,973.1,2 +1600000,983.1,2 +1700000,988.1,2 +1800000,986.3,2 +1900000,892.7,2 +2000000,984.6,2 +2100000,988.6,2 +2200000,981.3,2 +2300000,983.8,2 +2400000,987.5,2 +2500000,980.2,2 +2600000,978.2,2 +2700000,984.1,2 +2800000,982.7,2 +2900000,981.7,2 +3000000,985.1,2 +3100000,983.7,2 +3200000,983.1,2 +3300000,987.2,2 +3400000,992.5,2 +3500000,983.7,2 +3600000,984.5,2 +3700000,984.9,2 +3800000,979.6,2 +3900000,986.1,2 +4000000,977.3,2 +0,100.0,1 +100000,482.0,1 +200000,875.7,1 +300000,937.6,1 +400000,878.5,1 +500000,876.9,1 +600000,974.3,1 +700000,878.9,1 +800000,975.9,1 +900000,977.1,1 +1000000,796.2,1 +1100000,840.3,1 +1200000,978.2,1 +1300000,869.6,1 +1400000,985.5,1 +1500000,978.6,1 +1600000,977.0,1 +1700000,984.4,1 +1800000,795.2,1 +1900000,984.3,1 +2000000,977.1,1 +2100000,887.9,1 +2200000,977.8,1 +2300000,989.3,1 +2400000,979.1,1 +2500000,984.7,1 +2600000,986.4,1 +2700000,979.1,1 +2800000,979.5,1 +2900000,983.9,1 +3000000,881.8,1 +3100000,937.7,1 +3200000,898.9,1 +3300000,981.1,1 +3400000,978.7,1 +3500000,976.3,1 +3600000,980.7,1 +3700000,885.5,1 +3800000,961.8,1 +3900000,991.0,1 +4000000,986.7,1 diff --git a/results/finger-turn-hard.csv b/results/finger-turn-hard.csv new file mode 100644 index 0000000..42faa61 --- /dev/null +++ b/results/finger-turn-hard.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,0.0,3 +100000,386.5,3 +200000,775.1,3 +300000,378.8,3 +400000,560.8,3 +500000,871.4,3 +600000,833.3,3 +700000,882.6,3 +800000,971.9,3 +900000,970.9,3 +1000000,979.0,3 +1100000,966.7,3 +1200000,974.2,3 +1300000,982.5,3 +1400000,977.9,3 +1500000,981.5,3 +1600000,971.0,3 +1700000,978.5,3 +1800000,968.7,3 +1900000,978.4,3 +2000000,979.1,3 +2100000,982.5,3 +2200000,887.0,3 +2300000,976.3,3 +2400000,984.3,3 +2500000,968.1,3 +2600000,976.5,3 +2700000,985.5,3 +2800000,984.8,3 +2900000,960.4,3 +3000000,976.5,3 +3100000,884.7,3 +3200000,983.9,3 +3300000,973.1,3 +3400000,989.6,3 +3500000,983.1,3 +3600000,983.3,3 +3700000,984.5,3 +3800000,887.5,3 +3900000,978.3,3 +4000000,979.8,3 +0,124.3,2 +100000,580.4,2 +200000,742.9,2 +300000,775.5,2 +400000,963.0,2 +500000,882.1,2 +600000,790.5,2 +700000,863.3,2 +800000,874.7,2 +900000,966.0,2 +1000000,959.5,2 +1100000,867.4,2 +1200000,971.7,2 +1300000,890.1,2 +1400000,882.1,2 +1500000,973.5,2 +1600000,978.5,2 +1700000,982.5,2 +1800000,977.5,2 +1900000,888.3,2 +2000000,791.3,2 +2100000,987.0,2 +2200000,976.6,2 +2300000,988.3,2 +2400000,980.4,2 +2500000,977.5,2 +2600000,983.8,2 +2700000,883.3,2 +2800000,954.4,2 +2900000,968.3,2 +3000000,981.5,2 +3100000,979.5,2 +3200000,973.2,2 +3300000,981.1,2 +3400000,989.2,2 +3500000,982.4,2 +3600000,984.7,2 +3700000,982.8,2 +3800000,777.8,2 +3900000,982.1,2 +4000000,974.8,2 +0,100.0,1 +100000,484.0,1 +200000,873.3,1 +300000,960.9,1 +400000,877.6,1 +500000,875.3,1 +600000,904.3,1 +700000,934.2,1 +800000,972.2,1 +900000,956.4,1 +1000000,985.3,1 +1100000,978.4,1 +1200000,979.2,1 +1300000,957.6,1 +1400000,932.7,1 +1500000,978.6,1 +1600000,901.3,1 +1700000,978.9,1 +1800000,980.7,1 +1900000,805.1,1 +2000000,983.2,1 +2100000,969.9,1 +2200000,970.1,1 +2300000,985.9,1 +2400000,983.3,1 +2500000,969.7,1 +2600000,983.6,1 +2700000,979.4,1 +2800000,964.6,1 +2900000,878.7,1 +3000000,979.9,1 +3100000,979.5,1 +3200000,984.6,1 +3300000,964.9,1 +3400000,882.0,1 +3500000,884.1,1 +3600000,962.2,1 +3700000,984.7,1 +3800000,976.8,1 +3900000,983.2,1 +4000000,976.8,1 diff --git a/results/fish-swim.csv b/results/fish-swim.csv new file mode 100644 index 0000000..bb3a0e3 --- /dev/null +++ b/results/fish-swim.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,69.9,3 +100000,176.3,3 +200000,218.2,3 +300000,442.7,3 +400000,519.2,3 +500000,579.9,3 +600000,587.9,3 +700000,593.8,3 +800000,667.7,3 +900000,692.8,3 +1000000,606.0,3 +1100000,248.5,3 +1200000,654.8,3 +1300000,583.5,3 +1400000,801.5,3 +1500000,729.2,3 +1600000,819.3,3 +1700000,746.4,3 +1800000,807.1,3 +1900000,649.5,3 +2000000,698.5,3 +2100000,786.0,3 +2200000,808.3,3 +2300000,799.8,3 +2400000,672.1,3 +2500000,825.8,3 +2600000,809.9,3 +2700000,772.8,3 +2800000,859.2,3 +2900000,800.4,3 +3000000,801.7,3 +3100000,751.2,3 +3200000,809.2,3 +3300000,801.0,3 +3400000,818.1,3 +3500000,798.8,3 +3600000,833.9,3 +3700000,768.6,3 +3800000,767.4,3 +3900000,814.8,3 +4000000,847.5,3 +0,64.3,2 +100000,265.6,2 +200000,599.4,2 +300000,737.3,2 +400000,689.9,2 +500000,632.1,2 +600000,675.2,2 +700000,682.6,2 +800000,754.6,2 +900000,685.2,2 +1000000,695.1,2 +1100000,711.1,2 +1200000,674.7,2 +1300000,757.2,2 +1400000,807.4,2 +1500000,768.0,2 +1600000,809.5,2 +1700000,774.2,2 +1800000,796.1,2 +1900000,790.1,2 +2000000,680.9,2 +2100000,800.5,2 +2200000,757.3,2 +2300000,748.3,2 +2400000,761.8,2 +2500000,738.6,2 +2600000,784.0,2 +2700000,799.0,2 +2800000,714.5,2 +2900000,790.1,2 +3000000,666.9,2 +3100000,719.4,2 +3200000,782.5,2 +3300000,633.4,2 +3400000,832.3,2 +3500000,806.9,2 +3600000,807.3,2 +3700000,795.0,2 +3800000,758.3,2 +3900000,791.5,2 +4000000,780.3,2 +0,69.4,1 +100000,173.9,1 +200000,319.0,1 +300000,633.3,1 +400000,594.6,1 +500000,618.7,1 +600000,693.6,1 +700000,622.7,1 +800000,679.1,1 +900000,771.3,1 +1000000,770.1,1 +1100000,747.8,1 +1200000,762.2,1 +1300000,735.6,1 +1400000,791.9,1 +1500000,655.1,1 +1600000,757.4,1 +1700000,717.6,1 +1800000,731.8,1 +1900000,799.8,1 +2000000,739.2,1 +2100000,775.2,1 +2200000,685.6,1 +2300000,804.8,1 +2400000,796.0,1 +2500000,828.5,1 +2600000,761.6,1 +2700000,731.9,1 +2800000,790.6,1 +2900000,776.7,1 +3000000,801.7,1 +3100000,701.2,1 +3200000,829.3,1 +3300000,739.2,1 +3400000,761.5,1 +3500000,784.7,1 +3600000,842.1,1 +3700000,697.2,1 +3800000,740.1,1 +3900000,833.6,1 +4000000,655.1,1 diff --git a/results/hopper-hop-backwards.csv b/results/hopper-hop-backwards.csv new file mode 100644 index 0000000..0b50cc3 --- /dev/null +++ b/results/hopper-hop-backwards.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,0.0,2 +100000,151.7,2 +200000,883.7,2 +300000,863.9,2 +400000,937.2,2 +500000,943.5,2 +600000,950.6,2 +700000,952.6,2 +800000,938.9,2 +900000,951.9,2 +1000000,914.9,2 +1100000,852.5,2 +1200000,953.9,2 +1300000,958.1,2 +1400000,957.0,2 +1500000,954.4,2 +1600000,939.3,2 +1700000,958.9,2 +1800000,944.4,2 +1900000,961.2,2 +2000000,959.5,2 +2100000,858.9,2 +2200000,965.4,2 +2300000,861.8,2 +2400000,965.5,2 +2500000,960.5,2 +2600000,948.7,2 +2700000,955.6,2 +2800000,960.6,2 +2900000,958.3,2 +3000000,959.5,2 +3100000,959.3,2 +3200000,946.0,2 +3300000,957.8,2 +3400000,959.9,2 +3500000,960.0,2 +3600000,955.2,2 +3700000,962.5,2 +3800000,858.1,2 +3900000,960.6,2 +4000000,954.3,2 +0,0.0,1 +100000,2.6,1 +200000,657.0,1 +300000,749.7,1 +400000,942.7,1 +500000,936.7,1 +600000,955.2,1 +700000,959.7,1 +800000,856.8,1 +900000,954.4,1 +1000000,963.2,1 +1100000,962.8,1 +1200000,956.4,1 +1300000,956.7,1 +1400000,857.0,1 +1500000,958.5,1 +1600000,960.3,1 +1700000,965.0,1 +1800000,956.5,1 +1900000,959.0,1 +2000000,960.2,1 +2100000,962.1,1 +2200000,917.3,1 +2300000,960.2,1 +2400000,957.0,1 +2500000,955.8,1 +2600000,960.0,1 +2700000,962.2,1 +2800000,955.6,1 +2900000,954.1,1 +3000000,962.5,1 +3100000,958.8,1 +3200000,963.3,1 +3300000,863.4,1 +3400000,954.2,1 +3500000,957.9,1 +3600000,964.4,1 +3700000,963.7,1 +3800000,958.1,1 +3900000,962.8,1 +4000000,956.8,1 +0,0.0,3 +100000,664.5,3 +200000,734.0,3 +300000,759.2,3 +400000,630.0,3 +500000,792.2,3 +600000,810.2,3 +700000,818.8,3 +800000,813.8,3 +900000,826.4,3 +1000000,827.6,3 +1100000,837.7,3 +1200000,838.0,3 +1300000,844.1,3 +1400000,818.1,3 +1500000,836.3,3 +1600000,835.1,3 +1700000,849.2,3 +1800000,850.5,3 +1900000,894.8,3 +2000000,818.8,3 +2100000,887.3,3 +2200000,902.4,3 +2300000,898.9,3 +2400000,896.8,3 +2500000,909.2,3 +2600000,875.9,3 +2700000,898.9,3 +2800000,865.8,3 +2900000,868.3,3 +3000000,888.5,3 +3100000,873.7,3 +3200000,832.7,3 +3300000,885.5,3 +3400000,797.8,3 +3500000,885.0,3 +3600000,895.2,3 +3700000,873.5,3 +3800000,832.2,3 +3900000,790.9,3 +4000000,885.6,3 diff --git a/results/hopper-hop.csv b/results/hopper-hop.csv new file mode 100644 index 0000000..7ea281f --- /dev/null +++ b/results/hopper-hop.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,0.0,3 +100000,0.7,3 +200000,145.7,3 +300000,191.9,3 +400000,210.3,3 +500000,267.9,3 +600000,354.9,3 +700000,361.6,3 +800000,353.3,3 +900000,372.0,3 +1000000,373.1,3 +1100000,357.7,3 +1200000,371.6,3 +1300000,362.0,3 +1400000,330.7,3 +1500000,358.0,3 +1600000,372.9,3 +1700000,376.9,3 +1800000,377.9,3 +1900000,372.6,3 +2000000,382.8,3 +2100000,369.7,3 +2200000,333.8,3 +2300000,366.2,3 +2400000,350.4,3 +2500000,370.0,3 +2600000,376.9,3 +2700000,386.5,3 +2800000,363.4,3 +2900000,432.9,3 +3000000,439.6,3 +3100000,454.1,3 +3200000,452.0,3 +3300000,563.0,3 +3400000,521.1,3 +3500000,532.9,3 +3600000,600.8,3 +3700000,589.0,3 +3800000,553.7,3 +3900000,603.3,3 +4000000,594.2,3 +0,0.0,2 +100000,14.0,2 +200000,215.8,2 +300000,316.3,2 +400000,346.3,2 +500000,306.3,2 +600000,364.2,2 +700000,280.9,2 +800000,365.3,2 +900000,376.3,2 +1000000,271.2,2 +1100000,330.9,2 +1200000,369.7,2 +1300000,376.4,2 +1400000,364.1,2 +1500000,379.7,2 +1600000,373.7,2 +1700000,377.3,2 +1800000,377.5,2 +1900000,383.7,2 +2000000,374.7,2 +2100000,328.6,2 +2200000,377.2,2 +2300000,317.7,2 +2400000,346.7,2 +2500000,383.1,2 +2600000,375.3,2 +2700000,383.4,2 +2800000,377.1,2 +2900000,385.2,2 +3000000,379.7,2 +3100000,377.7,2 +3200000,371.5,2 +3300000,303.0,2 +3400000,380.1,2 +3500000,307.2,2 +3600000,385.1,2 +3700000,380.1,2 +3800000,336.9,2 +3900000,318.2,2 +4000000,373.2,2 +0,0.0,1 +100000,26.2,1 +200000,138.4,1 +300000,271.0,1 +400000,299.7,1 +500000,335.6,1 +600000,354.2,1 +700000,362.2,1 +800000,328.4,1 +900000,247.7,1 +1000000,368.8,1 +1100000,373.6,1 +1200000,276.5,1 +1300000,364.7,1 +1400000,334.9,1 +1500000,302.1,1 +1600000,372.0,1 +1700000,357.4,1 +1800000,366.1,1 +1900000,370.0,1 +2000000,367.5,1 +2100000,375.8,1 +2200000,349.5,1 +2300000,378.7,1 +2400000,369.5,1 +2500000,370.0,1 +2600000,369.9,1 +2700000,372.7,1 +2800000,381.0,1 +2900000,379.5,1 +3000000,370.5,1 +3100000,362.6,1 +3200000,374.0,1 +3300000,343.5,1 +3400000,316.1,1 +3500000,365.2,1 +3600000,373.8,1 +3700000,365.0,1 +3800000,379.5,1 +3900000,378.0,1 +4000000,380.1,1 diff --git a/results/hopper-stand.csv b/results/hopper-stand.csv new file mode 100644 index 0000000..ea8f49d --- /dev/null +++ b/results/hopper-stand.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,1.2,2 +100000,6.4,2 +200000,326.4,2 +300000,904.0,2 +400000,916.9,2 +500000,932.9,2 +600000,948.9,2 +700000,952.2,2 +800000,948.5,2 +900000,961.6,2 +1000000,950.6,2 +1100000,857.8,2 +1200000,956.3,2 +1300000,960.5,2 +1400000,963.9,2 +1500000,958.0,2 +1600000,961.1,2 +1700000,959.9,2 +1800000,957.6,2 +1900000,963.3,2 +2000000,960.3,2 +2100000,858.5,2 +2200000,966.3,2 +2300000,866.4,2 +2400000,961.3,2 +2500000,956.7,2 +2600000,955.7,2 +2700000,948.3,2 +2800000,958.1,2 +2900000,953.6,2 +3000000,959.7,2 +3100000,954.0,2 +3200000,944.0,2 +3300000,962.9,2 +3400000,961.5,2 +3500000,960.9,2 +3600000,960.7,2 +3700000,971.8,2 +3800000,865.2,2 +3900000,957.4,2 +4000000,951.4,2 +0,1.2,1 +100000,10.1,1 +200000,19.6,1 +300000,534.1,1 +400000,914.2,1 +500000,933.1,1 +600000,941.3,1 +700000,943.9,1 +800000,842.9,1 +900000,940.0,1 +1000000,954.1,1 +1100000,955.1,1 +1200000,947.4,1 +1300000,947.0,1 +1400000,754.6,1 +1500000,955.7,1 +1600000,946.6,1 +1700000,939.2,1 +1800000,954.9,1 +1900000,961.6,1 +2000000,950.7,1 +2100000,940.3,1 +2200000,859.4,1 +2300000,954.5,1 +2400000,954.9,1 +2500000,957.4,1 +2600000,945.0,1 +2700000,961.9,1 +2800000,962.4,1 +2900000,955.5,1 +3000000,959.0,1 +3100000,959.1,1 +3200000,964.6,1 +3300000,856.4,1 +3400000,964.4,1 +3500000,953.2,1 +3600000,962.7,1 +3700000,963.9,1 +3800000,958.6,1 +3900000,916.0,1 +4000000,954.1,1 +0,2.5,3 +100000,68.0,3 +200000,438.5,3 +300000,927.2,3 +400000,750.1,3 +500000,943.4,3 +600000,944.9,3 +700000,954.8,3 +800000,931.8,3 +900000,957.9,3 +1000000,958.3,3 +1100000,959.5,3 +1200000,962.2,3 +1300000,910.7,3 +1400000,940.7,3 +1500000,953.0,3 +1600000,963.7,3 +1700000,965.7,3 +1800000,957.0,3 +1900000,960.9,3 +2000000,951.3,3 +2100000,961.6,3 +2200000,957.2,3 +2300000,964.6,3 +2400000,968.2,3 +2500000,963.7,3 +2600000,955.2,3 +2700000,962.8,3 +2800000,961.6,3 +2900000,966.1,3 +3000000,962.7,3 +3100000,950.4,3 +3200000,959.6,3 +3300000,960.0,3 +3400000,866.6,3 +3500000,953.1,3 +3600000,955.5,3 +3700000,957.0,3 +3800000,961.6,3 +3900000,946.0,3 +4000000,958.9,3 diff --git a/results/humanoid-run.csv b/results/humanoid-run.csv new file mode 100644 index 0000000..b7f149d --- /dev/null +++ b/results/humanoid-run.csv @@ -0,0 +1,424 @@ +step,reward,seed +0,1.1,3 +100000,1.1,3 +200000,1.2,3 +300000,31.5,3 +400000,111.0,3 +500000,110.7,3 +600000,85.3,3 +700000,126.5,3 +800000,153.6,3 +900000,178.8,3 +1000000,201.0,3 +1100000,219.7,3 +1200000,247.5,3 +1300000,254.9,3 +1400000,254.0,3 +1500000,261.1,3 +1600000,309.5,3 +1700000,306.0,3 +1800000,311.8,3 +1900000,322.5,3 +2000000,312.0,3 +2100000,331.3,3 +2200000,357.4,3 +2300000,359.0,3 +2400000,391.5,3 +2500000,382.9,3 +2600000,400.1,3 +2700000,445.6,3 +2800000,419.5,3 +2900000,450.8,3 +3000000,460.6,3 +3100000,450.0,3 +3200000,438.5,3 +3300000,468.8,3 +3400000,473.2,3 +3500000,501.0,3 +3600000,488.5,3 +3700000,521.3,3 +3800000,549.1,3 +3900000,529.4,3 +4000000,533.0,3 +4100000,521.6,3 +4200000,490.1,3 +4300000,548.1,3 +4400000,525.5,3 +4500000,573.4,3 +4600000,525.4,3 +4700000,527.2,3 +4800000,570.3,3 +4900000,560.0,3 +5000000,571.9,3 +5100000,593.0,3 +5200000,573.7,3 +5300000,577.4,3 +5400000,551.7,3 +5500000,566.1,3 +5600000,568.9,3 +5700000,590.3,3 +5800000,593.0,3 +5900000,599.8,3 +6000000,598.4,3 +6100000,570.4,3 +6200000,612.0,3 +6300000,625.8,3 +6400000,605.6,3 +6500000,579.7,3 +6600000,597.7,3 +6700000,610.7,3 +6800000,603.8,3 +6900000,618.5,3 +7000000,605.8,3 +7100000,554.1,3 +7200000,598.5,3 +7300000,603.2,3 +7400000,594.9,3 +7500000,623.1,3 +7600000,621.4,3 +7700000,640.7,3 +7800000,624.2,3 +7900000,605.8,3 +8000000,617.3,3 +8100000,653.7,3 +8200000,617.3,3 +8300000,637.3,3 +8400000,626.6,3 +8500000,648.1,3 +8600000,645.3,3 +8700000,654.9,3 +8800000,636.2,3 +8900000,620.5,3 +9000000,647.9,3 +9100000,593.9,3 +9200000,657.1,3 +9300000,573.6,3 +9400000,642.3,3 +9500000,610.5,3 +9600000,657.1,3 +9700000,656.1,3 +9800000,638.2,3 +9900000,611.1,3 +10000000,654.0,3 +10100000,632.8,3 +10200000,635.8,3 +10300000,668.0,3 +10400000,607.4,3 +10500000,642.1,3 +10600000,607.1,3 +10700000,670.4,3 +10800000,678.4,3 +10900000,657.4,3 +11000000,650.6,3 +11100000,661.7,3 +11200000,655.4,3 +11300000,685.4,3 +11400000,656.2,3 +11500000,627.6,3 +11600000,638.4,3 +11700000,672.5,3 +11800000,654.9,3 +11900000,654.4,3 +12000000,646.3,3 +12100000,646.7,3 +12200000,632.5,3 +12300000,653.8,3 +12400000,645.7,3 +12500000,625.3,3 +12600000,603.7,3 +12700000,620.8,3 +12800000,622.0,3 +12900000,654.2,3 +13000000,635.0,3 +13100000,648.9,3 +13200000,666.7,3 +13300000,695.9,3 +13400000,674.7,3 +13500000,683.6,3 +13600000,668.1,3 +13700000,653.0,3 +13800000,637.9,3 +13900000,663.6,3 +14000000,664.6,3 +0,0.9,2 +100000,1.6,2 +200000,1.2,2 +300000,80.8,2 +400000,93.4,2 +500000,118.1,2 +600000,131.0,2 +700000,117.6,2 +800000,163.8,2 +900000,174.3,2 +1000000,157.4,2 +1100000,193.0,2 +1200000,215.1,2 +1300000,226.2,2 +1400000,252.2,2 +1500000,264.0,2 +1600000,252.1,2 +1700000,246.5,2 +1800000,290.7,2 +1900000,295.9,2 +2000000,328.9,2 +2100000,348.3,2 +2200000,326.6,2 +2300000,375.4,2 +2400000,401.8,2 +2500000,393.3,2 +2600000,414.3,2 +2700000,443.0,2 +2800000,446.9,2 +2900000,423.2,2 +3000000,444.6,2 +3100000,434.1,2 +3200000,444.3,2 +3300000,439.9,2 +3400000,440.2,2 +3500000,441.5,2 +3600000,445.8,2 +3700000,472.0,2 +3800000,469.2,2 +3900000,457.1,2 +4000000,459.8,2 +4100000,453.8,2 +4200000,463.7,2 +4300000,398.5,2 +4400000,494.6,2 +4500000,512.3,2 +4600000,501.4,2 +4700000,472.5,2 +4800000,499.7,2 +4900000,512.3,2 +5000000,497.6,2 +5100000,477.5,2 +5200000,570.6,2 +5300000,554.1,2 +5400000,586.9,2 +5500000,545.3,2 +5600000,552.7,2 +5700000,585.6,2 +5800000,578.5,2 +5900000,589.6,2 +6000000,536.1,2 +6100000,549.1,2 +6200000,559.6,2 +6300000,565.0,2 +6400000,563.2,2 +6500000,442.3,2 +6600000,543.4,2 +6700000,541.9,2 +6800000,544.0,2 +6900000,531.9,2 +7000000,588.9,2 +7100000,609.4,2 +7200000,549.6,2 +7300000,605.3,2 +7400000,546.1,2 +7500000,558.8,2 +7600000,541.5,2 +7700000,557.5,2 +7800000,555.1,2 +7900000,548.0,2 +8000000,601.0,2 +8100000,552.9,2 +8200000,593.2,2 +8300000,603.1,2 +8400000,587.5,2 +8500000,556.5,2 +8600000,583.7,2 +8700000,580.9,2 +8800000,635.6,2 +8900000,551.0,2 +9000000,597.9,2 +9100000,585.3,2 +9200000,514.0,2 +9300000,588.6,2 +9400000,577.4,2 +9500000,579.0,2 +9600000,542.4,2 +9700000,560.2,2 +9800000,562.6,2 +9900000,584.4,2 +10000000,562.4,2 +10100000,572.2,2 +10200000,571.3,2 +10300000,592.9,2 +10400000,627.5,2 +10500000,571.0,2 +10600000,540.4,2 +10700000,574.4,2 +10800000,541.0,2 +10900000,593.8,2 +11000000,627.7,2 +11100000,609.6,2 +11200000,613.7,2 +11300000,573.7,2 +11400000,591.7,2 +11500000,615.3,2 +11600000,572.2,2 +11700000,626.3,2 +11800000,621.4,2 +11900000,584.9,2 +12000000,595.6,2 +12100000,625.8,2 +12200000,591.1,2 +12300000,594.1,2 +12400000,523.3,2 +12500000,613.2,2 +12600000,646.0,2 +12700000,535.3,2 +12800000,610.7,2 +12900000,594.8,2 +13000000,592.1,2 +13100000,620.3,2 +13200000,632.3,2 +13300000,607.1,2 +13400000,620.6,2 +13500000,585.0,2 +13600000,618.0,2 +13700000,490.2,2 +13800000,599.3,2 +13900000,605.7,2 +14000000,603.5,2 +0,0.8,1 +100000,1.0,1 +200000,0.9,1 +300000,57.1,1 +400000,77.6,1 +500000,96.7,1 +600000,144.2,1 +700000,155.5,1 +800000,155.8,1 +900000,181.4,1 +1000000,195.3,1 +1100000,191.3,1 +1200000,185.3,1 +1300000,237.6,1 +1400000,228.1,1 +1500000,239.1,1 +1600000,260.8,1 +1700000,295.3,1 +1800000,271.1,1 +1900000,290.9,1 +2000000,307.7,1 +2100000,305.5,1 +2200000,332.4,1 +2300000,316.3,1 +2400000,361.8,1 +2500000,342.0,1 +2600000,370.6,1 +2700000,370.1,1 +2800000,369.3,1 +2900000,382.8,1 +3000000,411.2,1 +3100000,392.7,1 +3200000,394.9,1 +3300000,375.4,1 +3400000,395.4,1 +3500000,385.1,1 +3600000,366.5,1 +3700000,400.5,1 +3800000,387.0,1 +3900000,351.3,1 +4000000,390.6,1 +4100000,396.1,1 +4200000,427.9,1 +4300000,386.5,1 +4400000,461.5,1 +4500000,431.7,1 +4600000,434.8,1 +4700000,462.8,1 +4800000,492.8,1 +4900000,416.2,1 +5000000,433.9,1 +5100000,495.6,1 +5200000,445.0,1 +5300000,446.1,1 +5400000,493.0,1 +5500000,445.8,1 +5600000,437.8,1 +5700000,392.6,1 +5800000,469.4,1 +5900000,484.3,1 +6000000,497.8,1 +6100000,445.0,1 +6200000,497.1,1 +6300000,533.3,1 +6400000,480.6,1 +6500000,531.1,1 +6600000,496.3,1 +6700000,475.3,1 +6800000,511.9,1 +6900000,474.0,1 +7000000,513.5,1 +7100000,505.5,1 +7200000,484.0,1 +7300000,521.8,1 +7400000,487.6,1 +7500000,511.6,1 +7600000,522.5,1 +7700000,483.2,1 +7800000,473.1,1 +7900000,462.8,1 +8000000,481.1,1 +8100000,528.6,1 +8200000,504.6,1 +8300000,514.6,1 +8400000,510.4,1 +8500000,527.3,1 +8600000,510.5,1 +8700000,524.8,1 +8800000,520.9,1 +8900000,442.8,1 +9000000,532.7,1 +9100000,466.1,1 +9200000,544.4,1 +9300000,529.0,1 +9400000,549.6,1 +9500000,535.8,1 +9600000,539.7,1 +9700000,504.3,1 +9800000,550.8,1 +9900000,533.0,1 +10000000,561.8,1 +10100000,522.6,1 +10200000,542.6,1 +10300000,526.9,1 +10400000,551.1,1 +10500000,545.3,1 +10600000,482.8,1 +10700000,541.0,1 +10800000,537.9,1 +10900000,541.5,1 +11000000,533.3,1 +11100000,512.0,1 +11200000,518.5,1 +11300000,524.6,1 +11400000,555.7,1 +11500000,540.3,1 +11600000,459.5,1 +11700000,540.9,1 +11800000,540.4,1 +11900000,534.2,1 +12000000,542.8,1 +12100000,511.3,1 +12200000,554.0,1 +12300000,561.8,1 +12400000,544.7,1 +12500000,548.9,1 +12600000,555.9,1 +12700000,511.2,1 +12800000,553.9,1 +12900000,548.4,1 +13000000,542.5,1 +13100000,559.0,1 +13200000,558.0,1 +13300000,555.5,1 +13400000,518.3,1 +13500000,549.5,1 +13600000,545.8,1 +13700000,543.8,1 +13800000,521.8,1 +13900000,542.4,1 +14000000,542.2,1 diff --git a/results/humanoid-stand.csv b/results/humanoid-stand.csv new file mode 100644 index 0000000..2137842 --- /dev/null +++ b/results/humanoid-stand.csv @@ -0,0 +1,424 @@ +step,reward,seed +0,6.3,3 +100000,6.1,3 +200000,7.3,3 +300000,39.4,3 +400000,344.6,3 +500000,357.2,3 +600000,491.0,3 +700000,483.6,3 +800000,522.8,3 +900000,564.9,3 +1000000,628.3,3 +1100000,699.3,3 +1200000,730.6,3 +1300000,700.1,3 +1400000,777.6,3 +1500000,785.0,3 +1600000,788.7,3 +1700000,862.8,3 +1800000,874.1,3 +1900000,895.2,3 +2000000,892.6,3 +2100000,910.1,3 +2200000,910.9,3 +2300000,898.7,3 +2400000,914.0,3 +2500000,919.6,3 +2600000,918.9,3 +2700000,915.6,3 +2800000,921.8,3 +2900000,927.4,3 +3000000,902.0,3 +3100000,925.7,3 +3200000,932.0,3 +3300000,929.7,3 +3400000,908.9,3 +3500000,903.6,3 +3600000,910.5,3 +3700000,919.3,3 +3800000,903.9,3 +3900000,909.3,3 +4000000,910.5,3 +4100000,903.3,3 +4200000,895.9,3 +4300000,896.4,3 +4400000,917.9,3 +4500000,902.1,3 +4600000,902.4,3 +4700000,921.4,3 +4800000,901.1,3 +4900000,821.7,3 +5000000,896.5,3 +5100000,896.3,3 +5200000,907.2,3 +5300000,885.3,3 +5400000,875.5,3 +5500000,898.7,3 +5600000,871.2,3 +5700000,861.5,3 +5800000,897.4,3 +5900000,910.6,3 +6000000,905.4,3 +6100000,874.9,3 +6200000,898.6,3 +6300000,894.9,3 +6400000,868.3,3 +6500000,853.2,3 +6600000,846.6,3 +6700000,820.3,3 +6800000,821.7,3 +6900000,815.7,3 +7000000,810.9,3 +7100000,831.9,3 +7200000,880.3,3 +7300000,851.9,3 +7400000,892.4,3 +7500000,907.5,3 +7600000,900.7,3 +7700000,891.5,3 +7800000,844.5,3 +7900000,893.0,3 +8000000,877.1,3 +8100000,879.1,3 +8200000,880.3,3 +8300000,902.6,3 +8400000,889.1,3 +8500000,912.4,3 +8600000,904.1,3 +8700000,900.3,3 +8800000,882.0,3 +8900000,893.7,3 +9000000,905.0,3 +9100000,909.7,3 +9200000,890.7,3 +9300000,887.4,3 +9400000,910.1,3 +9500000,923.4,3 +9600000,917.3,3 +9700000,906.2,3 +9800000,920.3,3 +9900000,901.6,3 +10000000,904.4,3 +10100000,914.0,3 +10200000,894.3,3 +10300000,914.2,3 +10400000,909.3,3 +10500000,898.8,3 +10600000,857.9,3 +10700000,835.8,3 +10800000,862.8,3 +10900000,818.2,3 +11000000,785.3,3 +11100000,796.7,3 +11200000,795.4,3 +11300000,820.2,3 +11400000,805.2,3 +11500000,804.4,3 +11600000,892.7,3 +11700000,841.3,3 +11800000,754.7,3 +11900000,842.3,3 +12000000,830.6,3 +12100000,803.2,3 +12200000,833.2,3 +12300000,820.4,3 +12400000,870.6,3 +12500000,850.4,3 +12600000,856.3,3 +12700000,870.8,3 +12800000,836.0,3 +12900000,863.8,3 +13000000,847.0,3 +13100000,885.2,3 +13200000,881.4,3 +13300000,912.4,3 +13400000,905.1,3 +13500000,892.6,3 +13600000,889.1,3 +13700000,888.1,3 +13800000,888.4,3 +13900000,885.4,3 +14000000,891.5,3 +0,4.5,2 +100000,6.2,2 +200000,6.5,2 +300000,160.2,2 +400000,423.9,2 +500000,428.8,2 +600000,417.8,2 +700000,588.7,2 +800000,644.2,2 +900000,634.1,2 +1000000,670.7,2 +1100000,805.5,2 +1200000,800.6,2 +1300000,781.4,2 +1400000,845.5,2 +1500000,854.6,2 +1600000,861.4,2 +1700000,848.3,2 +1800000,887.0,2 +1900000,899.3,2 +2000000,921.5,2 +2100000,909.5,2 +2200000,929.1,2 +2300000,930.8,2 +2400000,908.1,2 +2500000,927.5,2 +2600000,936.1,2 +2700000,938.6,2 +2800000,935.7,2 +2900000,928.2,2 +3000000,932.9,2 +3100000,919.0,2 +3200000,927.6,2 +3300000,927.7,2 +3400000,926.8,2 +3500000,921.9,2 +3600000,909.6,2 +3700000,901.9,2 +3800000,905.6,2 +3900000,918.6,2 +4000000,918.8,2 +4100000,910.1,2 +4200000,896.0,2 +4300000,895.5,2 +4400000,920.3,2 +4500000,852.1,2 +4600000,888.4,2 +4700000,895.2,2 +4800000,884.8,2 +4900000,870.8,2 +5000000,882.2,2 +5100000,872.7,2 +5200000,860.1,2 +5300000,858.4,2 +5400000,859.7,2 +5500000,858.8,2 +5600000,873.3,2 +5700000,830.0,2 +5800000,912.0,2 +5900000,925.8,2 +6000000,930.3,2 +6100000,890.7,2 +6200000,900.8,2 +6300000,917.6,2 +6400000,923.0,2 +6500000,915.2,2 +6600000,903.0,2 +6700000,881.0,2 +6800000,910.1,2 +6900000,909.5,2 +7000000,901.5,2 +7100000,919.7,2 +7200000,908.3,2 +7300000,890.3,2 +7400000,896.9,2 +7500000,913.7,2 +7600000,910.5,2 +7700000,922.2,2 +7800000,916.3,2 +7900000,903.3,2 +8000000,901.3,2 +8100000,912.2,2 +8200000,897.6,2 +8300000,919.4,2 +8400000,879.5,2 +8500000,896.3,2 +8600000,874.7,2 +8700000,874.8,2 +8800000,906.1,2 +8900000,913.6,2 +9000000,899.7,2 +9100000,896.1,2 +9200000,865.3,2 +9300000,908.9,2 +9400000,886.2,2 +9500000,889.1,2 +9600000,844.5,2 +9700000,899.4,2 +9800000,899.8,2 +9900000,900.4,2 +10000000,904.5,2 +10100000,887.1,2 +10200000,885.7,2 +10300000,900.0,2 +10400000,894.1,2 +10500000,909.9,2 +10600000,902.6,2 +10700000,894.3,2 +10800000,899.8,2 +10900000,899.1,2 +11000000,901.3,2 +11100000,875.5,2 +11200000,920.3,2 +11300000,912.6,2 +11400000,918.5,2 +11500000,915.4,2 +11600000,907.4,2 +11700000,906.1,2 +11800000,902.2,2 +11900000,913.3,2 +12000000,912.1,2 +12100000,918.6,2 +12200000,912.6,2 +12300000,909.3,2 +12400000,899.3,2 +12500000,916.2,2 +12600000,905.7,2 +12700000,900.5,2 +12800000,896.2,2 +12900000,906.7,2 +13000000,882.3,2 +13100000,863.1,2 +13200000,850.5,2 +13300000,829.7,2 +13400000,802.4,2 +13500000,820.3,2 +13600000,817.1,2 +13700000,812.9,2 +13800000,871.0,2 +13900000,919.3,2 +14000000,912.7,2 +0,4.5,1 +100000,5.3,1 +200000,96.9,1 +300000,281.8,1 +400000,437.1,1 +500000,480.7,1 +600000,518.7,1 +700000,554.6,1 +800000,561.9,1 +900000,518.7,1 +1000000,692.2,1 +1100000,719.7,1 +1200000,779.4,1 +1300000,765.5,1 +1400000,873.5,1 +1500000,877.2,1 +1600000,871.0,1 +1700000,892.9,1 +1800000,899.3,1 +1900000,918.7,1 +2000000,925.8,1 +2100000,893.1,1 +2200000,910.6,1 +2300000,920.5,1 +2400000,940.3,1 +2500000,937.4,1 +2600000,931.6,1 +2700000,925.7,1 +2800000,936.0,1 +2900000,940.3,1 +3000000,923.0,1 +3100000,919.8,1 +3200000,941.3,1 +3300000,927.3,1 +3400000,919.6,1 +3500000,914.5,1 +3600000,919.9,1 +3700000,923.9,1 +3800000,929.8,1 +3900000,918.1,1 +4000000,924.1,1 +4100000,925.1,1 +4200000,899.7,1 +4300000,923.0,1 +4400000,928.6,1 +4500000,927.9,1 +4600000,923.4,1 +4700000,920.7,1 +4800000,915.4,1 +4900000,857.6,1 +5000000,873.3,1 +5100000,921.3,1 +5200000,897.3,1 +5300000,888.5,1 +5400000,899.2,1 +5500000,913.4,1 +5600000,913.0,1 +5700000,895.3,1 +5800000,917.2,1 +5900000,917.1,1 +6000000,914.2,1 +6100000,912.5,1 +6200000,910.7,1 +6300000,911.4,1 +6400000,933.6,1 +6500000,882.6,1 +6600000,893.0,1 +6700000,883.8,1 +6800000,910.7,1 +6900000,892.1,1 +7000000,909.6,1 +7100000,916.2,1 +7200000,898.6,1 +7300000,882.4,1 +7400000,884.7,1 +7500000,881.1,1 +7600000,884.6,1 +7700000,851.8,1 +7800000,888.4,1 +7900000,891.4,1 +8000000,894.2,1 +8100000,906.2,1 +8200000,889.4,1 +8300000,859.5,1 +8400000,912.0,1 +8500000,873.0,1 +8600000,897.2,1 +8700000,880.7,1 +8800000,877.8,1 +8900000,921.9,1 +9000000,912.4,1 +9100000,894.7,1 +9200000,929.7,1 +9300000,909.6,1 +9400000,917.0,1 +9500000,897.6,1 +9600000,901.7,1 +9700000,898.8,1 +9800000,897.6,1 +9900000,913.2,1 +10000000,894.6,1 +10100000,884.5,1 +10200000,884.8,1 +10300000,884.0,1 +10400000,843.6,1 +10500000,904.8,1 +10600000,908.8,1 +10700000,888.2,1 +10800000,877.3,1 +10900000,892.6,1 +11000000,902.7,1 +11100000,918.5,1 +11200000,899.0,1 +11300000,872.0,1 +11400000,909.0,1 +11500000,889.8,1 +11600000,899.4,1 +11700000,918.9,1 +11800000,891.1,1 +11900000,913.4,1 +12000000,916.2,1 +12100000,898.7,1 +12200000,908.2,1 +12300000,862.1,1 +12400000,876.6,1 +12500000,909.9,1 +12600000,919.1,1 +12700000,922.8,1 +12800000,897.6,1 +12900000,923.7,1 +13000000,928.3,1 +13100000,912.7,1 +13200000,906.0,1 +13300000,922.2,1 +13400000,914.6,1 +13500000,932.0,1 +13600000,918.8,1 +13700000,881.2,1 +13800000,901.1,1 +13900000,916.1,1 +14000000,914.1,1 diff --git a/results/humanoid-walk.csv b/results/humanoid-walk.csv new file mode 100644 index 0000000..ca6fba1 --- /dev/null +++ b/results/humanoid-walk.csv @@ -0,0 +1,424 @@ +step,reward,seed +0,1.3,3 +100000,1.4,3 +200000,1.7,3 +300000,1.4,3 +400000,188.7,3 +500000,389.8,3 +600000,459.7,3 +700000,493.2,3 +800000,520.0,3 +900000,555.7,3 +1000000,610.4,3 +1100000,666.0,3 +1200000,709.2,3 +1300000,767.1,3 +1400000,781.4,3 +1500000,749.4,3 +1600000,776.6,3 +1700000,797.7,3 +1800000,820.0,3 +1900000,874.0,3 +2000000,809.9,3 +2100000,893.2,3 +2200000,877.4,3 +2300000,886.9,3 +2400000,897.8,3 +2500000,890.5,3 +2600000,899.1,3 +2700000,894.1,3 +2800000,912.6,3 +2900000,915.9,3 +3000000,909.3,3 +3100000,925.0,3 +3200000,888.8,3 +3300000,918.4,3 +3400000,921.6,3 +3500000,919.6,3 +3600000,923.8,3 +3700000,919.4,3 +3800000,924.7,3 +3900000,927.0,3 +4000000,915.7,3 +4100000,918.2,3 +4200000,930.1,3 +4300000,911.5,3 +4400000,919.6,3 +4500000,917.9,3 +4600000,912.1,3 +4700000,921.3,3 +4800000,864.6,3 +4900000,911.2,3 +5000000,904.1,3 +5100000,920.7,3 +5200000,919.2,3 +5300000,921.3,3 +5400000,924.1,3 +5500000,901.1,3 +5600000,909.6,3 +5700000,911.2,3 +5800000,904.3,3 +5900000,919.6,3 +6000000,901.5,3 +6100000,901.6,3 +6200000,924.1,3 +6300000,920.2,3 +6400000,901.9,3 +6500000,907.7,3 +6600000,891.1,3 +6700000,915.6,3 +6800000,918.2,3 +6900000,912.6,3 +7000000,915.2,3 +7100000,904.6,3 +7200000,911.4,3 +7300000,899.5,3 +7400000,918.5,3 +7500000,910.4,3 +7600000,901.9,3 +7700000,918.4,3 +7800000,918.2,3 +7900000,881.9,3 +8000000,905.4,3 +8100000,894.8,3 +8200000,897.3,3 +8300000,847.6,3 +8400000,886.8,3 +8500000,896.0,3 +8600000,902.3,3 +8700000,895.9,3 +8800000,905.8,3 +8900000,900.2,3 +9000000,897.3,3 +9100000,909.1,3 +9200000,906.0,3 +9300000,905.2,3 +9400000,877.9,3 +9500000,886.1,3 +9600000,900.8,3 +9700000,905.1,3 +9800000,901.8,3 +9900000,896.6,3 +10000000,896.2,3 +10100000,894.3,3 +10200000,901.8,3 +10300000,888.4,3 +10400000,895.6,3 +10500000,897.8,3 +10600000,874.4,3 +10700000,892.2,3 +10800000,896.0,3 +10900000,886.8,3 +11000000,888.1,3 +11100000,886.8,3 +11200000,867.7,3 +11300000,885.6,3 +11400000,887.6,3 +11500000,875.5,3 +11600000,884.8,3 +11700000,890.9,3 +11800000,896.9,3 +11900000,882.2,3 +12000000,888.7,3 +12100000,867.7,3 +12200000,891.1,3 +12300000,881.6,3 +12400000,879.1,3 +12500000,881.6,3 +12600000,879.3,3 +12700000,878.3,3 +12800000,874.8,3 +12900000,898.4,3 +13000000,890.7,3 +13100000,873.8,3 +13200000,891.1,3 +13300000,889.1,3 +13400000,897.3,3 +13500000,860.8,3 +13600000,893.6,3 +13700000,898.7,3 +13800000,892.5,3 +13900000,894.4,3 +14000000,892.6,3 +0,0.9,1 +100000,1.4,1 +200000,1.2,1 +300000,3.6,1 +400000,203.0,1 +500000,26.6,1 +600000,391.5,1 +700000,461.6,1 +800000,577.9,1 +900000,590.6,1 +1000000,642.3,1 +1100000,695.9,1 +1200000,702.9,1 +1300000,753.7,1 +1400000,799.2,1 +1500000,822.6,1 +1600000,841.6,1 +1700000,871.8,1 +1800000,897.8,1 +1900000,886.1,1 +2000000,889.6,1 +2100000,893.9,1 +2200000,900.9,1 +2300000,894.1,1 +2400000,914.0,1 +2500000,913.4,1 +2600000,911.1,1 +2700000,900.8,1 +2800000,899.2,1 +2900000,911.6,1 +3000000,922.6,1 +3100000,925.9,1 +3200000,930.8,1 +3300000,928.9,1 +3400000,931.6,1 +3500000,921.7,1 +3600000,918.8,1 +3700000,926.5,1 +3800000,921.3,1 +3900000,912.4,1 +4000000,917.2,1 +4100000,921.8,1 +4200000,916.3,1 +4300000,895.7,1 +4400000,911.7,1 +4500000,906.1,1 +4600000,905.4,1 +4700000,902.9,1 +4800000,906.4,1 +4900000,894.4,1 +5000000,900.7,1 +5100000,885.9,1 +5200000,884.6,1 +5300000,897.0,1 +5400000,897.8,1 +5500000,901.5,1 +5600000,884.2,1 +5700000,875.0,1 +5800000,849.5,1 +5900000,881.5,1 +6000000,889.6,1 +6100000,846.9,1 +6200000,873.2,1 +6300000,882.9,1 +6400000,878.6,1 +6500000,865.4,1 +6600000,873.5,1 +6700000,875.0,1 +6800000,850.3,1 +6900000,883.9,1 +7000000,858.9,1 +7100000,877.0,1 +7200000,864.8,1 +7300000,867.6,1 +7400000,880.9,1 +7500000,898.6,1 +7600000,913.9,1 +7700000,893.2,1 +7800000,878.9,1 +7900000,916.3,1 +8000000,924.0,1 +8100000,916.7,1 +8200000,903.9,1 +8300000,875.3,1 +8400000,915.5,1 +8500000,904.3,1 +8600000,900.5,1 +8700000,897.3,1 +8800000,912.0,1 +8900000,906.5,1 +9000000,893.3,1 +9100000,908.3,1 +9200000,894.7,1 +9300000,912.0,1 +9400000,893.4,1 +9500000,893.6,1 +9600000,887.9,1 +9700000,888.9,1 +9800000,900.0,1 +9900000,887.4,1 +10000000,901.3,1 +10100000,917.1,1 +10200000,910.6,1 +10300000,903.7,1 +10400000,908.8,1 +10500000,912.0,1 +10600000,913.2,1 +10700000,906.6,1 +10800000,898.6,1 +10900000,898.5,1 +11000000,907.5,1 +11100000,905.0,1 +11200000,895.6,1 +11300000,890.7,1 +11400000,890.6,1 +11500000,893.5,1 +11600000,900.0,1 +11700000,895.0,1 +11800000,903.8,1 +11900000,878.0,1 +12000000,907.9,1 +12100000,904.4,1 +12200000,908.8,1 +12300000,907.3,1 +12400000,884.9,1 +12500000,899.6,1 +12600000,909.8,1 +12700000,917.3,1 +12800000,875.0,1 +12900000,906.3,1 +13000000,908.1,1 +13100000,898.3,1 +13200000,912.4,1 +13300000,882.0,1 +13400000,903.0,1 +13500000,905.9,1 +13600000,900.3,1 +13700000,914.2,1 +13800000,908.2,1 +13900000,900.0,1 +14000000,901.8,1 +0,0.9,2 +100000,1.6,2 +200000,1.9,2 +300000,4.0,2 +400000,174.7,2 +500000,417.8,2 +600000,459.4,2 +700000,497.4,2 +800000,522.2,2 +900000,570.0,2 +1000000,632.0,2 +1100000,678.6,2 +1200000,734.7,2 +1300000,835.6,2 +1400000,851.6,2 +1500000,871.1,2 +1600000,872.1,2 +1700000,890.1,2 +1800000,901.5,2 +1900000,894.3,2 +2000000,879.5,2 +2100000,892.8,2 +2200000,899.8,2 +2300000,888.5,2 +2400000,885.9,2 +2500000,915.9,2 +2600000,913.8,2 +2700000,915.7,2 +2800000,916.9,2 +2900000,919.6,2 +3000000,921.4,2 +3100000,911.4,2 +3200000,922.8,2 +3300000,925.1,2 +3400000,927.0,2 +3500000,928.2,2 +3600000,913.7,2 +3700000,911.9,2 +3800000,923.2,2 +3900000,930.5,2 +4000000,904.3,2 +4100000,917.4,2 +4200000,910.5,2 +4300000,915.6,2 +4400000,916.0,2 +4500000,901.0,2 +4600000,920.5,2 +4700000,908.3,2 +4800000,897.4,2 +4900000,899.3,2 +5000000,887.3,2 +5100000,909.6,2 +5200000,916.2,2 +5300000,885.1,2 +5400000,889.7,2 +5500000,883.8,2 +5600000,867.0,2 +5700000,879.3,2 +5800000,875.6,2 +5900000,871.8,2 +6000000,865.6,2 +6100000,876.3,2 +6200000,869.0,2 +6300000,868.2,2 +6400000,882.7,2 +6500000,893.9,2 +6600000,831.6,2 +6700000,861.2,2 +6800000,877.0,2 +6900000,877.6,2 +7000000,919.1,2 +7100000,902.7,2 +7200000,883.8,2 +7300000,922.8,2 +7400000,917.7,2 +7500000,904.8,2 +7600000,892.9,2 +7700000,904.9,2 +7800000,882.7,2 +7900000,899.9,2 +8000000,923.5,2 +8100000,909.7,2 +8200000,909.8,2 +8300000,905.5,2 +8400000,891.7,2 +8500000,894.1,2 +8600000,880.1,2 +8700000,911.9,2 +8800000,897.6,2 +8900000,914.7,2 +9000000,906.9,2 +9100000,901.5,2 +9200000,896.9,2 +9300000,899.5,2 +9400000,886.8,2 +9500000,896.7,2 +9600000,886.0,2 +9700000,903.7,2 +9800000,906.4,2 +9900000,898.0,2 +10000000,889.3,2 +10100000,907.9,2 +10200000,911.9,2 +10300000,889.2,2 +10400000,908.4,2 +10500000,907.9,2 +10600000,904.9,2 +10700000,883.3,2 +10800000,889.2,2 +10900000,886.2,2 +11000000,867.3,2 +11100000,854.2,2 +11200000,862.8,2 +11300000,861.3,2 +11400000,852.1,2 +11500000,857.3,2 +11600000,849.2,2 +11700000,854.6,2 +11800000,854.0,2 +11900000,863.6,2 +12000000,864.4,2 +12100000,815.6,2 +12200000,859.7,2 +12300000,860.0,2 +12400000,845.5,2 +12500000,863.5,2 +12600000,844.4,2 +12700000,861.1,2 +12800000,860.9,2 +12900000,864.5,2 +13000000,915.4,2 +13100000,904.3,2 +13200000,896.1,2 +13300000,906.4,2 +13400000,911.4,2 +13500000,886.9,2 +13600000,890.4,2 +13700000,905.9,2 +13800000,892.8,2 +13900000,911.8,2 +14000000,906.2,2 diff --git a/results/lift-cube.csv b/results/lift-cube.csv new file mode 100644 index 0000000..973599e --- /dev/null +++ b/results/lift-cube.csv @@ -0,0 +1,124 @@ +step,success,seed +0,0.0,2 +100000,1.0,2 +200000,0.9,2 +300000,1.0,2 +400000,0.8,2 +500000,1.0,2 +600000,1.0,2 +700000,0.9,2 +800000,1.0,2 +900000,0.9,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,0.9,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +2100000,1.0,2 +2200000,1.0,2 +2300000,0.9,2 +2400000,1.0,2 +2500000,1.0,2 +2600000,1.0,2 +2700000,1.0,2 +2800000,1.0,2 +2900000,1.0,2 +3000000,1.0,2 +3100000,1.0,2 +3200000,0.9,2 +3300000,0.9,2 +3400000,1.0,2 +3500000,1.0,2 +3600000,0.7,2 +3700000,1.0,2 +3800000,1.0,2 +3900000,1.0,2 +4000000,1.0,2 +0,0.0,1 +100000,0.9,1 +200000,0.8,1 +300000,0.9,1 +400000,1.0,1 +500000,0.9,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 +2100000,1.0,1 +2200000,1.0,1 +2300000,0.9,1 +2400000,1.0,1 +2500000,1.0,1 +2600000,1.0,1 +2700000,1.0,1 +2800000,1.0,1 +2900000,0.9,1 +3000000,1.0,1 +3100000,1.0,1 +3200000,1.0,1 +3300000,1.0,1 +3400000,1.0,1 +3500000,1.0,1 +3600000,1.0,1 +3700000,1.0,1 +3800000,0.8,1 +3900000,0.9,1 +4000000,1.0,1 +0,0.0,3 +100000,0.8,3 +200000,1.0,3 +300000,0.9,3 +400000,1.0,3 +500000,0.9,3 +600000,1.0,3 +700000,1.0,3 +800000,0.9,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,0.9,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,0.9,3 +2000000,1.0,3 +2100000,1.0,3 +2200000,1.0,3 +2300000,1.0,3 +2400000,1.0,3 +2500000,1.0,3 +2600000,1.0,3 +2700000,1.0,3 +2800000,1.0,3 +2900000,1.0,3 +3000000,1.0,3 +3100000,1.0,3 +3200000,1.0,3 +3300000,1.0,3 +3400000,1.0,3 +3500000,1.0,3 +3600000,1.0,3 +3700000,0.9,3 +3800000,1.0,3 +3900000,1.0,3 +4000000,1.0,3 diff --git a/results/mw-assembly.csv b/results/mw-assembly.csv new file mode 100644 index 0000000..ffb3af1 --- /dev/null +++ b/results/mw-assembly.csv @@ -0,0 +1,64 @@ +step,success,seed +0,0.0,3 +100000,0.1,3 +200000,0.8,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +2000000,1.0,3 +0,0.0,2 +100000,0.0,2 +200000,1.0,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,0.0,1 +200000,0.0,1 +300000,0.0,1 +400000,0.0,1 +500000,0.0,1 +600000,0.0,1 +700000,0.0,1 +800000,0.0,1 +900000,0.0,1 +1000000,0.0,1 +1100000,0.0,1 +1200000,0.2,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-basketball.csv b/results/mw-basketball.csv new file mode 100644 index 0000000..2777a24 --- /dev/null +++ b/results/mw-basketball.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.2,3 +200000,0.5,3 +300000,0.9,3 +400000,1.0,3 +500000,0.6,3 +600000,1.0,3 +700000,1.0,3 +800000,0.8,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,0.9,3 +1300000,0.9,3 +1400000,0.7,3 +1500000,1.0,3 +1600000,0.9,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,0.0,2 +200000,0.0,2 +300000,0.5,2 +400000,0.7,2 +500000,1.0,2 +600000,0.9,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,0.9,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,0.9,2 +1800000,0.9,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,0.4,1 +200000,0.8,1 +300000,0.6,1 +400000,1.0,1 +500000,0.9,1 +600000,0.9,1 +700000,0.9,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,0.9,1 +1300000,0.8,1 +1400000,0.9,1 +1500000,0.9,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-bin-picking.csv b/results/mw-bin-picking.csv new file mode 100644 index 0000000..1a9fc1f --- /dev/null +++ b/results/mw-bin-picking.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.6,3 +200000,1.0,3 +300000,0.8,3 +400000,0.9,3 +500000,1.0,3 +600000,0.9,3 +700000,0.9,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,0.9,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,0.9,3 +1900000,1.0,3 +0,0.0,2 +100000,1.0,2 +200000,0.9,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,0.9,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,0.9,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,0.9,2 +0,0.0,1 +100000,0.0,1 +200000,1.0,1 +300000,0.9,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,0.9,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-box-close.csv b/results/mw-box-close.csv new file mode 100644 index 0000000..8a4d362 --- /dev/null +++ b/results/mw-box-close.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.7,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,0.9,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,0.8,2 +200000,1.0,2 +300000,0.9,2 +400000,1.0,2 +500000,1.0,2 +600000,0.9,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,0.9,2 +2000000,0.9,2 +0,0.0,1 +100000,1.0,1 +200000,0.8,1 +300000,0.9,1 +400000,0.9,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,0.9,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,0.9,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-button-press-topdown-wall.csv b/results/mw-button-press-topdown-wall.csv new file mode 100644 index 0000000..5c0c3a7 --- /dev/null +++ b/results/mw-button-press-topdown-wall.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.8,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,1.0,2 +200000,1.0,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,1.0,1 +200000,1.0,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,0.7,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-button-press-topdown.csv b/results/mw-button-press-topdown.csv new file mode 100644 index 0000000..cde7e65 --- /dev/null +++ b/results/mw-button-press-topdown.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,1.0,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,1 +100000,1.0,1 +200000,1.0,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 +0,0.0,2 +100000,1.0,2 +200000,1.0,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 diff --git a/results/mw-button-press-wall.csv b/results/mw-button-press-wall.csv new file mode 100644 index 0000000..11b5931 --- /dev/null +++ b/results/mw-button-press-wall.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.6,3 +200000,0.7,3 +300000,0.9,3 +400000,0.9,3 +500000,1.0,3 +600000,0.7,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,0.9,3 +1900000,1.0,3 +0,0.0,2 +100000,0.7,2 +200000,1.0,2 +300000,1.0,2 +400000,0.9,2 +500000,0.8,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,0.9,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,0.7,1 +200000,1.0,1 +300000,0.7,1 +400000,1.0,1 +500000,1.0,1 +600000,0.9,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,0.5,1 +1300000,1.0,1 +1400000,0.7,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-button-press.csv b/results/mw-button-press.csv new file mode 100644 index 0000000..f7a4f18 --- /dev/null +++ b/results/mw-button-press.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.0,3 +200000,0.5,3 +300000,0.5,3 +400000,0.6,3 +500000,1.0,3 +600000,0.5,3 +700000,1.0,3 +800000,0.3,3 +900000,0.6,3 +1000000,0.8,3 +1100000,0.7,3 +1200000,0.9,3 +1300000,0.7,3 +1400000,0.7,3 +1500000,0.2,3 +1600000,0.6,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,0.0,2 +200000,0.1,2 +300000,0.4,2 +400000,0.6,2 +500000,0.9,2 +600000,0.4,2 +700000,0.3,2 +800000,1.0,2 +900000,0.5,2 +1000000,0.7,2 +1100000,0.8,2 +1200000,1.0,2 +1300000,0.7,2 +1400000,1.0,2 +1500000,0.9,2 +1600000,0.9,2 +1700000,0.9,2 +1800000,1.0,2 +1900000,0.6,2 +2000000,0.8,2 +0,0.0,1 +100000,0.8,1 +200000,0.0,1 +300000,0.5,1 +400000,1.0,1 +500000,0.8,1 +600000,0.9,1 +700000,0.7,1 +800000,1.0,1 +900000,0.5,1 +1000000,0.8,1 +1100000,0.7,1 +1200000,0.7,1 +1300000,1.0,1 +1400000,0.8,1 +1500000,0.9,1 +1600000,1.0,1 +1700000,0.9,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-coffee-button.csv b/results/mw-coffee-button.csv new file mode 100644 index 0000000..001f8e4 --- /dev/null +++ b/results/mw-coffee-button.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,1.0,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.4,2 +100000,1.0,2 +200000,1.0,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,1.0,1 +200000,1.0,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-coffee-pull.csv b/results/mw-coffee-pull.csv new file mode 100644 index 0000000..7d594c9 --- /dev/null +++ b/results/mw-coffee-pull.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.8,3 +200000,0.9,3 +300000,0.7,3 +400000,1.0,3 +500000,0.9,3 +600000,0.9,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,0.9,3 +1200000,0.9,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,0.3,2 +200000,0.9,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,0.9,2 +700000,0.8,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,0.9,2 +1300000,0.8,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,0.0,1 +200000,0.8,1 +300000,1.0,1 +400000,1.0,1 +500000,0.8,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,0.8,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,0.9,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,0.9,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-coffee-push.csv b/results/mw-coffee-push.csv new file mode 100644 index 0000000..ac53242 --- /dev/null +++ b/results/mw-coffee-push.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.6,3 +200000,0.8,3 +300000,0.8,3 +400000,0.8,3 +500000,1.0,3 +600000,0.8,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,0.9,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,0.6,2 +200000,0.8,2 +300000,0.9,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,0.8,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,0.9,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,0.2,1 +200000,0.4,1 +300000,0.7,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,0.9,1 +900000,0.9,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,0.9,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,0.9,1 diff --git a/results/mw-dial-turn.csv b/results/mw-dial-turn.csv new file mode 100644 index 0000000..fc62674 --- /dev/null +++ b/results/mw-dial-turn.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.3,3 +200000,0.5,3 +300000,0.4,3 +400000,0.4,3 +500000,0.4,3 +600000,0.5,3 +700000,0.9,3 +800000,0.7,3 +900000,0.8,3 +1000000,0.9,3 +1100000,0.9,3 +1200000,0.8,3 +1300000,0.9,3 +1400000,0.8,3 +1500000,0.8,3 +1600000,0.7,3 +1700000,1.0,3 +1800000,0.6,3 +1900000,0.8,3 +0,0.0,2 +100000,0.3,2 +200000,0.9,2 +300000,0.9,2 +400000,0.7,2 +500000,0.9,2 +600000,0.7,2 +700000,0.9,2 +800000,0.7,2 +900000,0.8,2 +1000000,0.8,2 +1100000,0.6,2 +1200000,0.8,2 +1300000,0.9,2 +1400000,0.5,2 +1500000,0.6,2 +1600000,0.8,2 +1700000,0.7,2 +1800000,0.9,2 +1900000,0.8,2 +2000000,0.7,2 +0,0.0,1 +100000,0.8,1 +200000,0.7,1 +300000,0.8,1 +400000,0.5,1 +500000,0.8,1 +600000,0.8,1 +700000,0.4,1 +800000,0.9,1 +900000,0.8,1 +1000000,1.0,1 +1100000,0.9,1 +1200000,0.8,1 +1300000,0.9,1 +1400000,0.7,1 +1500000,0.8,1 +1600000,0.9,1 +1700000,0.7,1 +1800000,0.9,1 +1900000,1.0,1 +2000000,0.9,1 diff --git a/results/mw-disassemble.csv b/results/mw-disassemble.csv new file mode 100644 index 0000000..1b8bcad --- /dev/null +++ b/results/mw-disassemble.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.0,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,0.9,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,0.7,2 +200000,1.0,2 +300000,0.8,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,0.9,2 +900000,1.0,2 +1000000,0.9,2 +1100000,0.9,2 +1200000,0.9,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,0.0,1 +200000,0.0,1 +300000,0.0,1 +400000,0.0,1 +500000,0.0,1 +600000,0.0,1 +700000,0.0,1 +800000,0.0,1 +900000,0.0,1 +1000000,0.0,1 +1100000,0.0,1 +1200000,0.0,1 +1300000,0.0,1 +1400000,0.0,1 +1500000,0.0,1 +1600000,0.0,1 +1700000,0.0,1 +1800000,0.0,1 +1900000,0.0,1 +2000000,0.0,1 diff --git a/results/mw-door-close.csv b/results/mw-door-close.csv new file mode 100644 index 0000000..2a9802f --- /dev/null +++ b/results/mw-door-close.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0,3 +100000,1,3 +200000,1,3 +300000,1,3 +400000,1,3 +500000,1,3 +600000,1,3 +700000,1,3 +800000,1,3 +900000,1,3 +1000000,1,3 +1100000,1,3 +1200000,1,3 +1300000,1,3 +1400000,1,3 +1500000,1,3 +1600000,1,3 +1700000,1,3 +1800000,1,3 +1900000,1,3 +0,0,1 +100000,1,1 +200000,1,1 +300000,1,1 +400000,1,1 +500000,1,1 +600000,1,1 +700000,1,1 +800000,1,1 +900000,1,1 +1000000,1,1 +1100000,1,1 +1200000,1,1 +1300000,1,1 +1400000,1,1 +1500000,1,1 +1600000,1,1 +1700000,1,1 +1800000,1,1 +1900000,1,1 +2000000,1,1 +0,0,2 +100000,1,2 +200000,1,2 +300000,1,2 +400000,1,2 +500000,1,2 +600000,1,2 +700000,1,2 +800000,1,2 +900000,1,2 +1000000,1,2 +1100000,1,2 +1200000,1,2 +1300000,1,2 +1400000,1,2 +1500000,1,2 +1600000,1,2 +1700000,1,2 +1800000,1,2 +1900000,1,2 +2000000,1,2 diff --git a/results/mw-door-lock.csv b/results/mw-door-lock.csv new file mode 100644 index 0000000..37e1dd1 --- /dev/null +++ b/results/mw-door-lock.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,1.0,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,1 +100000,1.0,1 +200000,1.0,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,0.8,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 +0,0.0,2 +100000,0.5,2 +200000,1.0,2 +300000,0.9,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,0.9,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 diff --git a/results/mw-door-open.csv b/results/mw-door-open.csv new file mode 100644 index 0000000..33147bf --- /dev/null +++ b/results/mw-door-open.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.7,3 +200000,0.7,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,0.9,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,0.9,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,0.7,3 +1700000,1.0,3 +1800000,0.9,3 +1900000,1.0,3 +0,0.0,2 +100000,0.6,2 +200000,1.0,2 +300000,0.6,2 +400000,1.0,2 +500000,0.6,2 +600000,0.8,2 +700000,0.9,2 +800000,1.0,2 +900000,0.9,2 +1000000,0.9,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,0.7,2 +1500000,0.8,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,0.9,2 +2000000,0.9,2 +0,0.0,1 +100000,0.7,1 +200000,0.5,1 +300000,1.0,1 +400000,1.0,1 +500000,0.8,1 +600000,0.4,1 +700000,1.0,1 +800000,0.7,1 +900000,0.9,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,0.7,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,0.8,1 +1600000,0.7,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,0.8,1 diff --git a/results/mw-door-unlock.csv b/results/mw-door-unlock.csv new file mode 100644 index 0000000..9e28368 --- /dev/null +++ b/results/mw-door-unlock.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.0,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,1 +100000,1.0,1 +200000,1.0,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 +0,0.0,2 +100000,0.9,2 +200000,0.7,2 +300000,0.9,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 diff --git a/results/mw-drawer-close.csv b/results/mw-drawer-close.csv new file mode 100644 index 0000000..d16a1dc --- /dev/null +++ b/results/mw-drawer-close.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.1,3 +100000,0.9,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,1 +100000,1.0,1 +200000,1.0,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 +0,0.1,2 +100000,1.0,2 +200000,1.0,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,0.9,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,0.9,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 diff --git a/results/mw-drawer-open.csv b/results/mw-drawer-open.csv new file mode 100644 index 0000000..5f5138a --- /dev/null +++ b/results/mw-drawer-open.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,1.0,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,0.8,3 +600000,0.9,3 +700000,1.0,3 +800000,0.9,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,1.0,2 +200000,0.8,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,0.6,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,1.0,1 +200000,0.9,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,0.9,1 +700000,1.0,1 +800000,0.8,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-faucet-close.csv b/results/mw-faucet-close.csv new file mode 100644 index 0000000..1404e20 --- /dev/null +++ b/results/mw-faucet-close.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,1.0,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,1 +100000,1.0,1 +200000,0.9,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 +0,0.0,2 +100000,1.0,2 +200000,1.0,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 diff --git a/results/mw-faucet-open.csv b/results/mw-faucet-open.csv new file mode 100644 index 0000000..e89699f --- /dev/null +++ b/results/mw-faucet-open.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,1.0,3 +200000,0.8,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,0.9,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,0.9,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,0.9,2 +200000,1.0,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,0.9,2 +1000000,1.0,2 +1100000,0.9,2 +1200000,0.9,2 +1300000,0.9,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,0.9,1 +200000,1.0,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,0.9,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-hammer.csv b/results/mw-hammer.csv new file mode 100644 index 0000000..c66aa4c --- /dev/null +++ b/results/mw-hammer.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.8,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,0.9,2 +200000,1.0,2 +300000,0.0,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,0.3,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,1.0,1 +200000,1.0,1 +300000,1.0,1 +400000,1.0,1 +500000,0.9,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-hand-insert.csv b/results/mw-hand-insert.csv new file mode 100644 index 0000000..a31ea68 --- /dev/null +++ b/results/mw-hand-insert.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.8,3 +200000,1.0,3 +300000,1.0,3 +400000,0.9,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,1.0,2 +200000,0.9,2 +300000,0.7,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,0.9,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,0.9,1 +200000,1.0,1 +300000,0.9,1 +400000,0.9,1 +500000,0.8,1 +600000,0.9,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-handle-press-side.csv b/results/mw-handle-press-side.csv new file mode 100644 index 0000000..cde7e65 --- /dev/null +++ b/results/mw-handle-press-side.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,1.0,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,1 +100000,1.0,1 +200000,1.0,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 +0,0.0,2 +100000,1.0,2 +200000,1.0,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 diff --git a/results/mw-handle-press.csv b/results/mw-handle-press.csv new file mode 100644 index 0000000..85a742e --- /dev/null +++ b/results/mw-handle-press.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,1.0,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,0.9,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,1.0,2 +200000,1.0,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,0.9,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,1.0,1 +200000,0.9,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-handle-pull-side.csv b/results/mw-handle-pull-side.csv new file mode 100644 index 0000000..a66cb0f --- /dev/null +++ b/results/mw-handle-pull-side.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,1.0,3 +200000,0.9,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,0.9,2 +200000,1.0,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,1.0,1 +200000,0.9,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-handle-pull.csv b/results/mw-handle-pull.csv new file mode 100644 index 0000000..9d9ca47 --- /dev/null +++ b/results/mw-handle-pull.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.0,3 +200000,0.0,3 +300000,0.8,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,0.0,2 +200000,0.0,2 +300000,0.9,2 +400000,0.8,2 +500000,1.0,2 +600000,1.0,2 +700000,0.8,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,0.9,2 +1200000,0.9,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,1.0,1 +200000,1.0,1 +300000,0.9,1 +400000,0.9,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-lever-pull.csv b/results/mw-lever-pull.csv new file mode 100644 index 0000000..61015e8 --- /dev/null +++ b/results/mw-lever-pull.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.4,3 +200000,0.8,3 +300000,0.8,3 +400000,0.7,3 +500000,0.3,3 +600000,1.0,3 +700000,0.2,3 +800000,0.8,3 +900000,0.8,3 +1000000,0.8,3 +1100000,0.7,3 +1200000,1.0,3 +1300000,0.9,3 +1400000,0.6,3 +1500000,0.8,3 +1600000,0.8,3 +1700000,1.0,3 +1800000,0.1,3 +1900000,0.9,3 +0,0.0,2 +100000,0.9,2 +200000,0.6,2 +300000,0.7,2 +400000,0.8,2 +500000,0.3,2 +600000,0.9,2 +700000,0.6,2 +800000,0.8,2 +900000,0.9,2 +1000000,0.9,2 +1100000,0.7,2 +1200000,0.9,2 +1300000,0.9,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,0.9,2 +1700000,0.7,2 +1800000,1.0,2 +1900000,0.6,2 +2000000,0.8,2 +0,0.0,1 +100000,0.7,1 +200000,0.2,1 +300000,0.8,1 +400000,0.9,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,0.8,1 +1100000,0.5,1 +1200000,0.8,1 +1300000,0.8,1 +1400000,0.7,1 +1500000,0.9,1 +1600000,0.8,1 +1700000,0.7,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,0.7,1 diff --git a/results/mw-peg-insert-side.csv b/results/mw-peg-insert-side.csv new file mode 100644 index 0000000..47364de --- /dev/null +++ b/results/mw-peg-insert-side.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.9,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,0.9,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,1 +100000,1.0,1 +200000,0.8,1 +300000,0.9,1 +400000,1.0,1 +500000,1.0,1 +600000,0.9,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,0.9,1 +1600000,1.0,1 +1700000,0.9,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 +0,0.0,2 +100000,0.8,2 +200000,0.7,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 diff --git a/results/mw-peg-unplug-side.csv b/results/mw-peg-unplug-side.csv new file mode 100644 index 0000000..269a6a7 --- /dev/null +++ b/results/mw-peg-unplug-side.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.7,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,0.9,3 +1100000,0.9,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,0.9,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,0.6,2 +200000,1.0,2 +300000,0.8,2 +400000,0.9,2 +500000,0.9,2 +600000,1.0,2 +700000,0.9,2 +800000,0.9,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,0.9,2 +1500000,0.9,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,0.8,1 +200000,0.9,1 +300000,0.8,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,0.9,1 +1100000,0.8,1 +1200000,1.0,1 +1300000,0.9,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-pick-out-of-hole.csv b/results/mw-pick-out-of-hole.csv new file mode 100644 index 0000000..17fa040 --- /dev/null +++ b/results/mw-pick-out-of-hole.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.5,3 +200000,0.9,3 +300000,0.8,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,0.9,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,0.9,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,0.5,2 +200000,1.0,2 +300000,0.7,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,0.9,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,0.8,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,0.8,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,0.9,2 +2000000,1.0,2 +0,0.0,1 +100000,0.6,1 +200000,0.7,1 +300000,0.6,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,0.9,1 +800000,1.0,1 +900000,0.9,1 +1000000,0.9,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,0.8,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-pick-place-wall.csv b/results/mw-pick-place-wall.csv new file mode 100644 index 0000000..db7e0d9 --- /dev/null +++ b/results/mw-pick-place-wall.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.0,3 +200000,0.0,3 +300000,0.1,3 +400000,0.3,3 +500000,0.7,3 +600000,0.8,3 +700000,0.8,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,0.6,3 +1300000,0.5,3 +1400000,1.0,3 +1500000,0.8,3 +1600000,0.8,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,0.5,3 +0,0.0,2 +100000,0.0,2 +200000,0.0,2 +300000,0.0,2 +400000,0.1,2 +500000,0.6,2 +600000,0.8,2 +700000,0.8,2 +800000,1.0,2 +900000,0.9,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,0.9,2 +1300000,0.3,2 +1400000,0.9,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,0.9,2 +1900000,0.9,2 +2000000,1.0,2 +0,0.0,1 +100000,0.0,1 +200000,0.0,1 +300000,0.5,1 +400000,0.6,1 +500000,0.7,1 +600000,1.0,1 +700000,0.9,1 +800000,0.9,1 +900000,1.0,1 +1000000,1.0,1 +1100000,0.4,1 +1200000,0.9,1 +1300000,0.9,1 +1400000,0.8,1 +1500000,0.8,1 +1600000,1.0,1 +1700000,0.9,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,0.7,1 diff --git a/results/mw-pick-place.csv b/results/mw-pick-place.csv new file mode 100644 index 0000000..87d4d8b --- /dev/null +++ b/results/mw-pick-place.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.0,3 +200000,0.6,3 +300000,0.6,3 +400000,0.9,3 +500000,0.9,3 +600000,1.0,3 +700000,0.9,3 +800000,0.7,3 +900000,0.9,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,0.9,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,0.9,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,1 +100000,0.1,1 +200000,0.7,1 +300000,0.8,1 +400000,0.8,1 +500000,0.5,1 +600000,1.0,1 +700000,1.0,1 +800000,0.7,1 +900000,1.0,1 +1000000,0.5,1 +1100000,0.9,1 +1200000,0.9,1 +1300000,1.0,1 +1400000,0.4,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 +0,0.0,2 +100000,0.2,2 +200000,0.6,2 +300000,0.8,2 +400000,0.8,2 +500000,0.9,2 +600000,1.0,2 +700000,0.9,2 +800000,0.8,2 +900000,0.9,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,0.9,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,0.9,2 +1600000,1.0,2 +1700000,0.9,2 +1800000,0.8,2 +1900000,1.0,2 +2000000,0.6,2 diff --git a/results/mw-plate-slide-back-side.csv b/results/mw-plate-slide-back-side.csv new file mode 100644 index 0000000..88c5a18 --- /dev/null +++ b/results/mw-plate-slide-back-side.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,1.0,3 +200000,0.3,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,1.0,2 +200000,1.0,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,1.0,1 +200000,1.0,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-plate-slide-back.csv b/results/mw-plate-slide-back.csv new file mode 100644 index 0000000..2a9802f --- /dev/null +++ b/results/mw-plate-slide-back.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0,3 +100000,1,3 +200000,1,3 +300000,1,3 +400000,1,3 +500000,1,3 +600000,1,3 +700000,1,3 +800000,1,3 +900000,1,3 +1000000,1,3 +1100000,1,3 +1200000,1,3 +1300000,1,3 +1400000,1,3 +1500000,1,3 +1600000,1,3 +1700000,1,3 +1800000,1,3 +1900000,1,3 +0,0,1 +100000,1,1 +200000,1,1 +300000,1,1 +400000,1,1 +500000,1,1 +600000,1,1 +700000,1,1 +800000,1,1 +900000,1,1 +1000000,1,1 +1100000,1,1 +1200000,1,1 +1300000,1,1 +1400000,1,1 +1500000,1,1 +1600000,1,1 +1700000,1,1 +1800000,1,1 +1900000,1,1 +2000000,1,1 +0,0,2 +100000,1,2 +200000,1,2 +300000,1,2 +400000,1,2 +500000,1,2 +600000,1,2 +700000,1,2 +800000,1,2 +900000,1,2 +1000000,1,2 +1100000,1,2 +1200000,1,2 +1300000,1,2 +1400000,1,2 +1500000,1,2 +1600000,1,2 +1700000,1,2 +1800000,1,2 +1900000,1,2 +2000000,1,2 diff --git a/results/mw-plate-slide-side.csv b/results/mw-plate-slide-side.csv new file mode 100644 index 0000000..b537d57 --- /dev/null +++ b/results/mw-plate-slide-side.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.9,3 +200000,0.7,3 +300000,0.9,3 +400000,0.8,3 +500000,0.3,3 +600000,0.8,3 +700000,0.8,3 +800000,0.6,3 +900000,1.0,3 +1000000,0.2,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,0.9,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,1.0,2 +200000,1.0,2 +300000,1.0,2 +400000,0.8,2 +500000,0.9,2 +600000,0.9,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,0.9,2 +1700000,0.9,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,0.4,1 +200000,0.2,1 +300000,0.6,1 +400000,0.8,1 +500000,0.7,1 +600000,0.9,1 +700000,0.9,1 +800000,0.8,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,0.8,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,0.8,1 diff --git a/results/mw-plate-slide.csv b/results/mw-plate-slide.csv new file mode 100644 index 0000000..062a9c0 --- /dev/null +++ b/results/mw-plate-slide.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,1.0,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,1.0,2 +200000,1.0,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,1.0,1 +200000,1.0,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-push-back.csv b/results/mw-push-back.csv new file mode 100644 index 0000000..84a31aa --- /dev/null +++ b/results/mw-push-back.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.0,3 +200000,0.0,3 +300000,0.0,3 +400000,0.0,3 +500000,0.0,3 +600000,0.0,3 +700000,0.0,3 +800000,0.0,3 +900000,0.0,3 +1000000,0.0,3 +1100000,0.0,3 +1200000,0.0,3 +1300000,0.0,3 +1400000,0.0,3 +1500000,0.0,3 +1600000,0.0,3 +1700000,0.0,3 +1800000,0.0,3 +1900000,0.0,3 +0,0.0,1 +100000,0.1,1 +200000,0.7,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,0.9,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 +0,0.0,2 +100000,1.0,2 +200000,1.0,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 diff --git a/results/mw-push-wall.csv b/results/mw-push-wall.csv new file mode 100644 index 0000000..a31840b --- /dev/null +++ b/results/mw-push-wall.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.9,3 +200000,1.0,3 +300000,1.0,3 +400000,0.7,3 +500000,1.0,3 +600000,1.0,3 +700000,0.5,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,1 +100000,0.6,1 +200000,1.0,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,0.9,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 +0,0.0,2 +100000,0.5,2 +200000,0.9,2 +300000,0.9,2 +400000,0.8,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 diff --git a/results/mw-push.csv b/results/mw-push.csv new file mode 100644 index 0000000..13bbe48 --- /dev/null +++ b/results/mw-push.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.6,3 +200000,0.9,3 +300000,0.9,3 +400000,0.4,3 +500000,0.7,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,0.7,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,1 +100000,0.9,1 +200000,1.0,1 +300000,0.5,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,0.9,1 +1200000,1.0,1 +1300000,0.9,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 +0,0.0,2 +100000,0.4,2 +200000,1.0,2 +300000,0.9,2 +400000,0.8,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 diff --git a/results/mw-reach-wall.csv b/results/mw-reach-wall.csv new file mode 100644 index 0000000..edf23a7 --- /dev/null +++ b/results/mw-reach-wall.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,1.0,3 +200000,0.8,3 +300000,1.0,3 +400000,1.0,3 +500000,0.8,3 +600000,0.9,3 +700000,0.9,3 +800000,1.0,3 +900000,0.9,3 +1000000,1.0,3 +1100000,0.9,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,0.6,2 +200000,0.9,2 +300000,0.8,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,0.7,2 +1700000,1.0,2 +1800000,0.9,2 +1900000,1.0,2 +2000000,0.9,2 +0,0.0,1 +100000,1.0,1 +200000,1.0,1 +300000,0.9,1 +400000,0.1,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,0.8,1 +1000000,0.9,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,0.9,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,0.7,1 +1900000,0.9,1 +2000000,0.9,1 diff --git a/results/mw-reach.csv b/results/mw-reach.csv new file mode 100644 index 0000000..9045bfb --- /dev/null +++ b/results/mw-reach.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,1.0,3 +200000,1.0,3 +300000,0.2,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,0.8,3 +1700000,0.9,3 +1800000,1.0,3 +1900000,0.6,3 +0,0.0,1 +100000,0.0,1 +200000,1.0,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,0.8,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,0.6,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,0.9,1 +1600000,0.9,1 +1700000,1.0,1 +1800000,0.9,1 +1900000,1.0,1 +2000000,1.0,1 +0,0.0,2 +100000,0.9,2 +200000,1.0,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,0.9,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,0.9,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,0.6,2 +1400000,1.0,2 +1500000,0.8,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,0.9,2 diff --git a/results/mw-shelf-place.csv b/results/mw-shelf-place.csv new file mode 100644 index 0000000..7db31a7 --- /dev/null +++ b/results/mw-shelf-place.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.0,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,0.9,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,0.0,2 +200000,0.6,2 +300000,1.0,2 +400000,0.9,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,0.0,1 +200000,0.9,1 +300000,0.6,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,0.9,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-soccer.csv b/results/mw-soccer.csv new file mode 100644 index 0000000..6562fc2 --- /dev/null +++ b/results/mw-soccer.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.3,3 +200000,0.9,3 +300000,0.6,3 +400000,0.6,3 +500000,0.8,3 +600000,0.9,3 +700000,0.9,3 +800000,0.9,3 +900000,1.0,3 +1000000,0.8,3 +1100000,1.0,3 +1200000,0.7,3 +1300000,0.9,3 +1400000,1.0,3 +1500000,0.9,3 +1600000,0.7,3 +1700000,1.0,3 +1800000,0.9,3 +1900000,0.8,3 +0,0.0,2 +100000,0.3,2 +200000,0.5,2 +300000,0.8,2 +400000,1.0,2 +500000,1.0,2 +600000,0.9,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,0.1,1 +200000,0.7,1 +300000,0.6,1 +400000,0.8,1 +500000,0.8,1 +600000,0.7,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,0.8,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,0.7,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-stick-pull.csv b/results/mw-stick-pull.csv new file mode 100644 index 0000000..7184c28 --- /dev/null +++ b/results/mw-stick-pull.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.0,3 +200000,0.8,3 +300000,1.0,3 +400000,0.6,3 +500000,1.0,3 +600000,0.8,3 +700000,1.0,3 +800000,0.8,3 +900000,0.8,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,0.7,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,0.2,2 +200000,0.9,2 +300000,0.9,2 +400000,0.8,2 +500000,0.8,2 +600000,0.8,2 +700000,0.9,2 +800000,0.9,2 +900000,1.0,2 +1000000,0.7,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,0.9,2 +1400000,0.9,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,0.9,2 +2000000,0.9,2 +0,0.0,1 +100000,0.8,1 +200000,0.8,1 +300000,0.9,1 +400000,0.8,1 +500000,0.9,1 +600000,0.7,1 +700000,0.5,1 +800000,0.9,1 +900000,0.9,1 +1000000,0.9,1 +1100000,1.0,1 +1200000,0.9,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,0.8,1 +1600000,1.0,1 +1700000,0.9,1 +1800000,0.9,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-stick-push.csv b/results/mw-stick-push.csv new file mode 100644 index 0000000..82d975f --- /dev/null +++ b/results/mw-stick-push.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.4,3 +200000,0.9,3 +300000,0.9,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,0.9,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,1.0,2 +200000,0.7,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,0.9,2 +1400000,1.0,2 +1500000,0.9,2 +1600000,0.8,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,0.7,2 +2000000,1.0,2 +0,0.0,1 +100000,0.8,1 +200000,1.0,1 +300000,0.9,1 +400000,1.0,1 +500000,0.9,1 +600000,1.0,1 +700000,0.7,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,0.8,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,0.9,1 diff --git a/results/mw-sweep-into.csv b/results/mw-sweep-into.csv new file mode 100644 index 0000000..3582f5b --- /dev/null +++ b/results/mw-sweep-into.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.9,3 +200000,0.7,3 +300000,0.8,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,0.9,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,1 +100000,0.9,1 +200000,1.0,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,0.8,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 +0,0.0,2 +100000,0.9,2 +200000,0.2,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,0.9,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 diff --git a/results/mw-sweep.csv b/results/mw-sweep.csv new file mode 100644 index 0000000..9550d59 --- /dev/null +++ b/results/mw-sweep.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,1.0,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,0.8,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,1 +100000,1.0,1 +200000,1.0,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,0.7,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 +0,0.0,2 +100000,0.9,2 +200000,1.0,2 +300000,0.9,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,0.9,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 diff --git a/results/mw-window-close.csv b/results/mw-window-close.csv new file mode 100644 index 0000000..4b4e898 --- /dev/null +++ b/results/mw-window-close.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,1.0,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,2 +100000,1.0,2 +200000,1.0,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,1.0,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +0,0.0,1 +100000,1.0,1 +200000,1.0,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,0.8,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 diff --git a/results/mw-window-open.csv b/results/mw-window-open.csv new file mode 100644 index 0000000..d3ee1f0 --- /dev/null +++ b/results/mw-window-open.csv @@ -0,0 +1,63 @@ +step,success,seed +0,0.0,3 +100000,0.8,3 +200000,1.0,3 +300000,1.0,3 +400000,1.0,3 +500000,1.0,3 +600000,1.0,3 +700000,1.0,3 +800000,1.0,3 +900000,1.0,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,1.0,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,1.0,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +0,0.0,1 +100000,0.3,1 +200000,1.0,1 +300000,1.0,1 +400000,1.0,1 +500000,1.0,1 +600000,1.0,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,1.0,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 +0,0.0,2 +100000,0.9,2 +200000,1.0,2 +300000,1.0,2 +400000,1.0,2 +500000,1.0,2 +600000,1.0,2 +700000,1.0,2 +800000,1.0,2 +900000,1.0,2 +1000000,0.9,2 +1100000,1.0,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 diff --git a/results/myo-hand-key-turn-hard.csv b/results/myo-hand-key-turn-hard.csv new file mode 100644 index 0000000..ab84721 --- /dev/null +++ b/results/myo-hand-key-turn-hard.csv @@ -0,0 +1,122 @@ +step,success,seed +0,0.0,1 +50000,0.0,1 +100000,0.0,1 +150000,0.0,1 +200000,0.0,1 +250000,0.0,1 +300000,0.0,1 +350000,0.0,1 +400000,0.0,1 +450000,0.0,1 +500000,0.0,1 +550000,0.0,1 +600000,0.0,1 +650000,0.0,1 +700000,0.0,1 +750000,0.0,1 +800000,0.0,1 +850000,0.0,1 +900000,0.0,1 +950000,0.0,1 +1000000,0.0,1 +1050000,0.0,1 +1100000,0.0,1 +1150000,0.0,1 +1200000,0.0,1 +1250000,0.0,1 +1300000,0.0,1 +1350000,0.0,1 +1400000,0.0,1 +1450000,0.0,1 +1500000,0.0,1 +1550000,0.0,1 +1600000,0.0,1 +1650000,0.0,1 +1700000,0.1,1 +1750000,0.0,1 +1800000,0.0,1 +1850000,0.0,1 +1900000,0.0,1 +1950000,0.0,1 +2000000,0.0,1 +0,0.0,3 +50000,0.0,3 +100000,0.0,3 +150000,0.0,3 +200000,0.0,3 +250000,0.0,3 +300000,0.0,3 +350000,0.0,3 +400000,0.0,3 +450000,0.0,3 +500000,0.0,3 +550000,0.0,3 +600000,0.0,3 +650000,0.0,3 +700000,0.0,3 +750000,0.0,3 +800000,0.0,3 +850000,0.0,3 +900000,0.0,3 +950000,0.0,3 +1000000,0.0,3 +1050000,0.0,3 +1100000,0.0,3 +1150000,0.0,3 +1200000,0.0,3 +1250000,0.0,3 +1300000,0.0,3 +1350000,0.0,3 +1400000,0.0,3 +1450000,0.0,3 +1500000,0.0,3 +1550000,0.0,3 +1600000,0.0,3 +1650000,0.0,3 +1700000,0.0,3 +1750000,0.0,3 +1800000,0.0,3 +1850000,0.0,3 +1900000,0.0,3 +1950000,0.0,3 +0,0.0,2 +50000,0.0,2 +100000,0.0,2 +150000,0.0,2 +200000,0.0,2 +250000,0.0,2 +300000,0.0,2 +350000,0.0,2 +400000,0.0,2 +450000,0.0,2 +500000,0.0,2 +550000,0.0,2 +600000,0.0,2 +650000,0.0,2 +700000,0.0,2 +750000,0.0,2 +800000,0.0,2 +850000,0.0,2 +900000,0.0,2 +950000,0.0,2 +1000000,0.0,2 +1050000,0.0,2 +1100000,0.0,2 +1150000,0.0,2 +1200000,0.0,2 +1250000,0.0,2 +1300000,0.0,2 +1350000,0.0,2 +1400000,0.0,2 +1450000,0.0,2 +1500000,0.0,2 +1550000,0.1,2 +1600000,0.1,2 +1650000,0.0,2 +1700000,0.1,2 +1750000,0.1,2 +1800000,0.1,2 +1850000,0.2,2 +1900000,0.0,2 +1950000,0.1,2 diff --git a/results/myo-hand-key-turn.csv b/results/myo-hand-key-turn.csv new file mode 100644 index 0000000..1748894 --- /dev/null +++ b/results/myo-hand-key-turn.csv @@ -0,0 +1,122 @@ +step,success,seed +0,0.0,1 +50000,0.0,1 +100000,1.0,1 +150000,1.0,1 +200000,1.0,1 +250000,1.0,1 +300000,1.0,1 +350000,1.0,1 +400000,1.0,1 +450000,1.0,1 +500000,1.0,1 +550000,1.0,1 +600000,1.0,1 +650000,1.0,1 +700000,1.0,1 +750000,1.0,1 +800000,1.0,1 +850000,1.0,1 +900000,1.0,1 +950000,1.0,1 +1000000,1.0,1 +1050000,1.0,1 +1100000,1.0,1 +1150000,1.0,1 +1200000,1.0,1 +1250000,1.0,1 +1300000,1.0,1 +1350000,1.0,1 +1400000,1.0,1 +1450000,1.0,1 +1500000,1.0,1 +1550000,1.0,1 +1600000,1.0,1 +1650000,1.0,1 +1700000,1.0,1 +1750000,0.9,1 +1800000,1.0,1 +1850000,1.0,1 +1900000,1.0,1 +1950000,1.0,1 +2000000,1.0,1 +0,0.0,2 +50000,1.0,2 +100000,1.0,2 +150000,1.0,2 +200000,1.0,2 +250000,1.0,2 +300000,1.0,2 +350000,1.0,2 +400000,1.0,2 +450000,1.0,2 +500000,1.0,2 +550000,1.0,2 +600000,1.0,2 +650000,1.0,2 +700000,1.0,2 +750000,1.0,2 +800000,1.0,2 +850000,1.0,2 +900000,1.0,2 +950000,1.0,2 +1000000,1.0,2 +1050000,1.0,2 +1100000,1.0,2 +1150000,1.0,2 +1200000,1.0,2 +1250000,1.0,2 +1300000,1.0,2 +1350000,1.0,2 +1400000,1.0,2 +1450000,1.0,2 +1500000,1.0,2 +1550000,1.0,2 +1600000,1.0,2 +1650000,1.0,2 +1700000,1.0,2 +1750000,1.0,2 +1800000,1.0,2 +1850000,1.0,2 +1900000,1.0,2 +1950000,1.0,2 +0,0.0,3 +50000,1.0,3 +100000,0.9,3 +150000,0.5,3 +200000,1.0,3 +250000,1.0,3 +300000,1.0,3 +350000,1.0,3 +400000,1.0,3 +450000,1.0,3 +500000,1.0,3 +550000,1.0,3 +600000,1.0,3 +650000,1.0,3 +700000,1.0,3 +750000,1.0,3 +800000,1.0,3 +850000,1.0,3 +900000,1.0,3 +950000,1.0,3 +1000000,1.0,3 +1050000,1.0,3 +1100000,1.0,3 +1150000,1.0,3 +1200000,1.0,3 +1250000,1.0,3 +1300000,1.0,3 +1350000,1.0,3 +1400000,1.0,3 +1450000,1.0,3 +1500000,1.0,3 +1550000,1.0,3 +1600000,1.0,3 +1650000,1.0,3 +1700000,1.0,3 +1750000,1.0,3 +1800000,1.0,3 +1850000,1.0,3 +1900000,1.0,3 +1950000,1.0,3 diff --git a/results/myo-hand-obj-hold-hard.csv b/results/myo-hand-obj-hold-hard.csv new file mode 100644 index 0000000..6263c99 --- /dev/null +++ b/results/myo-hand-obj-hold-hard.csv @@ -0,0 +1,122 @@ +step,success,seed +0,0.0,1 +50000,0.0,1 +100000,0.1,1 +150000,0.2,1 +200000,0.0,1 +250000,0.1,1 +300000,0.3,1 +350000,0.3,1 +400000,0.3,1 +450000,0.5,1 +500000,0.3,1 +550000,0.4,1 +600000,0.4,1 +650000,0.4,1 +700000,0.8,1 +750000,0.5,1 +800000,0.5,1 +850000,0.5,1 +900000,0.8,1 +950000,0.7,1 +1000000,0.7,1 +1050000,0.9,1 +1100000,0.6,1 +1150000,0.5,1 +1200000,0.7,1 +1250000,0.7,1 +1300000,0.7,1 +1350000,0.6,1 +1400000,0.9,1 +1450000,0.7,1 +1500000,0.7,1 +1550000,0.8,1 +1600000,0.8,1 +1650000,0.4,1 +1700000,0.9,1 +1750000,0.6,1 +1800000,0.5,1 +1850000,0.8,1 +1900000,0.4,1 +1950000,0.9,1 +2000000,0.8,1 +0,0.0,3 +50000,0.0,3 +100000,0.1,3 +150000,0.0,3 +200000,0.1,3 +250000,0.2,3 +300000,0.2,3 +350000,0.0,3 +400000,0.3,3 +450000,0.2,3 +500000,0.4,3 +550000,0.2,3 +600000,0.4,3 +650000,0.5,3 +700000,0.5,3 +750000,0.4,3 +800000,0.7,3 +850000,0.8,3 +900000,0.5,3 +950000,0.6,3 +1000000,0.4,3 +1050000,0.8,3 +1100000,0.5,3 +1150000,0.7,3 +1200000,0.7,3 +1250000,0.7,3 +1300000,0.7,3 +1350000,0.7,3 +1400000,0.9,3 +1450000,0.8,3 +1500000,0.6,3 +1550000,0.6,3 +1600000,0.9,3 +1650000,0.7,3 +1700000,0.9,3 +1750000,0.8,3 +1800000,0.9,3 +1850000,0.6,3 +1900000,1.0,3 +1950000,0.8,3 +0,0.0,2 +50000,0.0,2 +100000,0.1,2 +150000,0.2,2 +200000,0.4,2 +250000,0.0,2 +300000,0.1,2 +350000,0.3,2 +400000,0.6,2 +450000,0.4,2 +500000,0.3,2 +550000,0.6,2 +600000,0.5,2 +650000,0.8,2 +700000,0.5,2 +750000,0.6,2 +800000,0.5,2 +850000,0.7,2 +900000,0.6,2 +950000,0.4,2 +1000000,0.6,2 +1050000,1.0,2 +1100000,0.6,2 +1150000,0.9,2 +1200000,0.7,2 +1250000,0.8,2 +1300000,0.8,2 +1350000,1.0,2 +1400000,0.8,2 +1450000,0.9,2 +1500000,1.0,2 +1550000,0.9,2 +1600000,1.0,2 +1650000,0.9,2 +1700000,1.0,2 +1750000,1.0,2 +1800000,1.0,2 +1850000,1.0,2 +1900000,0.9,2 +1950000,1.0,2 diff --git a/results/myo-hand-obj-hold.csv b/results/myo-hand-obj-hold.csv new file mode 100644 index 0000000..43af016 --- /dev/null +++ b/results/myo-hand-obj-hold.csv @@ -0,0 +1,122 @@ +step,success,seed +0,0.0,1 +50000,0.0,1 +100000,0.0,1 +150000,0.8,1 +200000,0.3,1 +250000,1.0,1 +300000,0.8,1 +350000,0.5,1 +400000,1.0,1 +450000,1.0,1 +500000,1.0,1 +550000,1.0,1 +600000,1.0,1 +650000,1.0,1 +700000,1.0,1 +750000,1.0,1 +800000,1.0,1 +850000,1.0,1 +900000,1.0,1 +950000,1.0,1 +1000000,1.0,1 +1050000,1.0,1 +1100000,1.0,1 +1150000,1.0,1 +1200000,1.0,1 +1250000,1.0,1 +1300000,1.0,1 +1350000,0.8,1 +1400000,0.9,1 +1450000,1.0,1 +1500000,1.0,1 +1550000,1.0,1 +1600000,0.2,1 +1650000,1.0,1 +1700000,1.0,1 +1750000,1.0,1 +1800000,1.0,1 +1850000,1.0,1 +1900000,1.0,1 +1950000,1.0,1 +2000000,1.0,1 +0,0.0,3 +50000,0.0,3 +100000,0.0,3 +150000,0.8,3 +200000,1.0,3 +250000,0.9,3 +300000,1.0,3 +350000,1.0,3 +400000,1.0,3 +450000,1.0,3 +500000,1.0,3 +550000,1.0,3 +600000,1.0,3 +650000,1.0,3 +700000,1.0,3 +750000,1.0,3 +800000,1.0,3 +850000,1.0,3 +900000,1.0,3 +950000,1.0,3 +1000000,1.0,3 +1050000,1.0,3 +1100000,1.0,3 +1150000,1.0,3 +1200000,1.0,3 +1250000,1.0,3 +1300000,1.0,3 +1350000,1.0,3 +1400000,1.0,3 +1450000,1.0,3 +1500000,1.0,3 +1550000,1.0,3 +1600000,1.0,3 +1650000,1.0,3 +1700000,1.0,3 +1750000,1.0,3 +1800000,1.0,3 +1850000,1.0,3 +1900000,1.0,3 +1950000,1.0,3 +0,0.0,2 +50000,0.0,2 +100000,0.0,2 +150000,0.0,2 +200000,0.1,2 +250000,0.9,2 +300000,0.9,2 +350000,0.5,2 +400000,1.0,2 +450000,1.0,2 +500000,1.0,2 +550000,0.9,2 +600000,1.0,2 +650000,1.0,2 +700000,1.0,2 +750000,1.0,2 +800000,1.0,2 +850000,1.0,2 +900000,1.0,2 +950000,1.0,2 +1000000,1.0,2 +1050000,1.0,2 +1100000,1.0,2 +1150000,1.0,2 +1200000,1.0,2 +1250000,1.0,2 +1300000,1.0,2 +1350000,1.0,2 +1400000,1.0,2 +1450000,0.8,2 +1500000,0.5,2 +1550000,1.0,2 +1600000,1.0,2 +1650000,1.0,2 +1700000,1.0,2 +1750000,0.0,2 +1800000,1.0,2 +1850000,1.0,2 +1900000,1.0,2 +1950000,1.0,2 diff --git a/results/myo-hand-pen-twirl-hard.csv b/results/myo-hand-pen-twirl-hard.csv new file mode 100644 index 0000000..1898f21 --- /dev/null +++ b/results/myo-hand-pen-twirl-hard.csv @@ -0,0 +1,121 @@ +step,success,seed +0,0.0,3 +50000,0.0,3 +100000,0.0,3 +150000,0.2,3 +200000,0.0,3 +250000,0.1,3 +300000,0.0,3 +350000,0.2,3 +400000,0.4,3 +450000,0.4,3 +500000,0.4,3 +550000,0.4,3 +600000,0.3,3 +650000,0.3,3 +700000,0.3,3 +750000,0.4,3 +800000,0.4,3 +850000,0.4,3 +900000,0.4,3 +950000,0.4,3 +1000000,0.4,3 +1050000,0.2,3 +1100000,0.4,3 +1150000,0.5,3 +1200000,0.4,3 +1250000,0.4,3 +1300000,0.4,3 +1350000,0.5,3 +1400000,0.5,3 +1450000,0.4,3 +1500000,0.4,3 +1550000,0.4,3 +1600000,0.5,3 +1650000,0.5,3 +1700000,0.4,3 +1750000,0.2,3 +1800000,0.6,3 +1850000,0.4,3 +1900000,0.5,3 +1950000,0.3,3 +0,0.0,2 +50000,0.0,2 +100000,0.0,2 +150000,0.0,2 +200000,0.0,2 +250000,0.1,2 +300000,0.1,2 +350000,0.3,2 +400000,0.5,2 +450000,0.1,2 +500000,0.3,2 +550000,0.3,2 +600000,0.2,2 +650000,0.3,2 +700000,0.3,2 +750000,0.4,2 +800000,0.2,2 +850000,0.5,2 +900000,0.4,2 +950000,0.6,2 +1000000,0.4,2 +1050000,0.3,2 +1100000,0.1,2 +1150000,0.1,2 +1200000,0.4,2 +1250000,0.4,2 +1300000,0.5,2 +1350000,0.2,2 +1400000,0.4,2 +1450000,0.3,2 +1500000,0.3,2 +1550000,0.4,2 +1600000,0.6,2 +1650000,0.4,2 +1700000,0.4,2 +1750000,0.5,2 +1800000,0.7,2 +1850000,0.6,2 +1900000,0.5,2 +1950000,0.6,2 +0,0.0,1 +50000,0.0,1 +100000,0.0,1 +150000,0.0,1 +200000,0.0,1 +250000,0.0,1 +300000,0.0,1 +350000,0.2,1 +400000,0.1,1 +450000,0.2,1 +500000,0.4,1 +550000,0.2,1 +600000,0.6,1 +650000,0.1,1 +700000,0.2,1 +750000,0.4,1 +800000,0.6,1 +850000,0.6,1 +900000,0.3,1 +950000,0.7,1 +1000000,0.4,1 +1050000,0.5,1 +1100000,0.2,1 +1150000,0.5,1 +1200000,0.5,1 +1250000,0.5,1 +1300000,0.4,1 +1350000,0.5,1 +1400000,0.3,1 +1450000,0.6,1 +1500000,0.6,1 +1550000,0.6,1 +1600000,0.4,1 +1650000,0.5,1 +1700000,0.8,1 +1750000,0.5,1 +1800000,0.6,1 +1850000,0.5,1 +1900000,0.7,1 +1950000,0.3,1 diff --git a/results/myo-hand-pen-twirl.csv b/results/myo-hand-pen-twirl.csv new file mode 100644 index 0000000..b4faee1 --- /dev/null +++ b/results/myo-hand-pen-twirl.csv @@ -0,0 +1,121 @@ +step,success,seed +0,0.0,3 +50000,0.0,3 +100000,0.0,3 +150000,0.0,3 +200000,0.0,3 +250000,0.0,3 +300000,0.1,3 +350000,0.0,3 +400000,0.2,3 +450000,0.2,3 +500000,0.6,3 +550000,0.7,3 +600000,0.9,3 +650000,1.0,3 +700000,1.0,3 +750000,1.0,3 +800000,1.0,3 +850000,1.0,3 +900000,1.0,3 +950000,1.0,3 +1000000,1.0,3 +1050000,1.0,3 +1100000,1.0,3 +1150000,1.0,3 +1200000,1.0,3 +1250000,1.0,3 +1300000,1.0,3 +1350000,1.0,3 +1400000,0.8,3 +1450000,1.0,3 +1500000,1.0,3 +1550000,1.0,3 +1600000,1.0,3 +1650000,0.8,3 +1700000,1.0,3 +1750000,1.0,3 +1800000,1.0,3 +1850000,1.0,3 +1900000,1.0,3 +1950000,1.0,3 +0,0.0,2 +50000,0.0,2 +100000,0.0,2 +150000,0.0,2 +200000,0.0,2 +250000,0.0,2 +300000,0.0,2 +350000,0.0,2 +400000,0.0,2 +450000,0.0,2 +500000,0.0,2 +550000,0.0,2 +600000,0.0,2 +650000,0.1,2 +700000,0.0,2 +750000,0.0,2 +800000,0.0,2 +850000,0.0,2 +900000,0.0,2 +950000,0.0,2 +1000000,0.1,2 +1050000,0.4,2 +1100000,0.5,2 +1150000,0.9,2 +1200000,0.8,2 +1250000,1.0,2 +1300000,1.0,2 +1350000,1.0,2 +1400000,1.0,2 +1450000,1.0,2 +1500000,1.0,2 +1550000,1.0,2 +1600000,1.0,2 +1650000,1.0,2 +1700000,1.0,2 +1750000,1.0,2 +1800000,1.0,2 +1850000,1.0,2 +1900000,1.0,2 +1950000,1.0,2 +0,0.0,1 +50000,0.0,1 +100000,0.0,1 +150000,0.1,1 +200000,0.3,1 +250000,0.8,1 +300000,1.0,1 +350000,1.0,1 +400000,1.0,1 +450000,1.0,1 +500000,1.0,1 +550000,1.0,1 +600000,1.0,1 +650000,0.9,1 +700000,1.0,1 +750000,1.0,1 +800000,1.0,1 +850000,1.0,1 +900000,1.0,1 +950000,1.0,1 +1000000,1.0,1 +1050000,1.0,1 +1100000,1.0,1 +1150000,1.0,1 +1200000,0.4,1 +1250000,1.0,1 +1300000,1.0,1 +1350000,0.9,1 +1400000,1.0,1 +1450000,1.0,1 +1500000,1.0,1 +1550000,1.0,1 +1600000,0.2,1 +1650000,1.0,1 +1700000,1.0,1 +1750000,1.0,1 +1800000,0.9,1 +1850000,1.0,1 +1900000,1.0,1 +1950000,1.0,1 diff --git a/results/myo-hand-pose-hard.csv b/results/myo-hand-pose-hard.csv new file mode 100644 index 0000000..f66b5d2 --- /dev/null +++ b/results/myo-hand-pose-hard.csv @@ -0,0 +1,121 @@ +step,success,seed +0,0.0,3 +50000,0.0,3 +100000,0.0,3 +150000,0.0,3 +200000,0.0,3 +250000,0.0,3 +300000,0.0,3 +350000,0.0,3 +400000,0.0,3 +450000,0.0,3 +500000,0.0,3 +550000,0.0,3 +600000,0.0,3 +650000,0.0,3 +700000,0.0,3 +750000,0.0,3 +800000,0.0,3 +850000,0.0,3 +900000,0.0,3 +950000,0.0,3 +1000000,0.0,3 +1050000,0.0,3 +1100000,0.0,3 +1150000,0.1,3 +1200000,0.0,3 +1250000,0.0,3 +1300000,0.0,3 +1350000,0.0,3 +1400000,0.0,3 +1450000,0.0,3 +1500000,0.0,3 +1550000,0.0,3 +1600000,0.0,3 +1650000,0.0,3 +1700000,0.0,3 +1750000,0.2,3 +1800000,0.0,3 +1850000,0.2,3 +1900000,0.1,3 +1950000,0.2,3 +0,0.0,2 +50000,0.0,2 +100000,0.0,2 +150000,0.0,2 +200000,0.0,2 +250000,0.0,2 +300000,0.0,2 +350000,0.0,2 +400000,0.0,2 +450000,0.0,2 +500000,0.0,2 +550000,0.0,2 +600000,0.0,2 +650000,0.0,2 +700000,0.0,2 +750000,0.0,2 +800000,0.0,2 +850000,0.0,2 +900000,0.0,2 +950000,0.0,2 +1000000,0.0,2 +1050000,0.1,2 +1100000,0.0,2 +1150000,0.0,2 +1200000,0.0,2 +1250000,0.0,2 +1300000,0.0,2 +1350000,0.0,2 +1400000,0.0,2 +1450000,0.0,2 +1500000,0.0,2 +1550000,0.1,2 +1600000,0.0,2 +1650000,0.0,2 +1700000,0.0,2 +1750000,0.0,2 +1800000,0.1,2 +1850000,0.0,2 +1900000,0.2,2 +1950000,0.1,2 +0,0.0,1 +50000,0.0,1 +100000,0.0,1 +150000,0.0,1 +200000,0.0,1 +250000,0.0,1 +300000,0.0,1 +350000,0.0,1 +400000,0.0,1 +450000,0.0,1 +500000,0.0,1 +550000,0.0,1 +600000,0.0,1 +650000,0.0,1 +700000,0.0,1 +750000,0.0,1 +800000,0.0,1 +850000,0.0,1 +900000,0.1,1 +950000,0.0,1 +1000000,0.0,1 +1050000,0.0,1 +1100000,0.1,1 +1150000,0.0,1 +1200000,0.0,1 +1250000,0.0,1 +1300000,0.0,1 +1350000,0.0,1 +1400000,0.1,1 +1450000,0.0,1 +1500000,0.1,1 +1550000,0.1,1 +1600000,0.0,1 +1650000,0.0,1 +1700000,0.2,1 +1750000,0.3,1 +1800000,0.0,1 +1850000,0.2,1 +1900000,0.2,1 +1950000,0.1,1 diff --git a/results/myo-hand-pose.csv b/results/myo-hand-pose.csv new file mode 100644 index 0000000..9cb2d26 --- /dev/null +++ b/results/myo-hand-pose.csv @@ -0,0 +1,123 @@ +step,success,seed +0,0.0,3 +50000,0.0,3 +100000,0.3,3 +150000,0.9,3 +200000,0.9,3 +250000,1.0,3 +300000,1.0,3 +350000,1.0,3 +400000,1.0,3 +450000,1.0,3 +500000,1.0,3 +550000,1.0,3 +600000,1.0,3 +650000,1.0,3 +700000,0.0,3 +750000,1.0,3 +800000,1.0,3 +850000,1.0,3 +900000,1.0,3 +950000,1.0,3 +1000000,1.0,3 +1050000,1.0,3 +1100000,1.0,3 +1150000,1.0,3 +1200000,1.0,3 +1250000,1.0,3 +1300000,1.0,3 +1350000,1.0,3 +1400000,1.0,3 +1450000,1.0,3 +1500000,1.0,3 +1550000,1.0,3 +1600000,1.0,3 +1650000,1.0,3 +1700000,1.0,3 +1750000,1.0,3 +1800000,1.0,3 +1850000,1.0,3 +1900000,1.0,3 +1950000,1.0,3 +2000000,1.0,3 +0,0.0,2 +50000,0.0,2 +100000,0.0,2 +150000,0.9,2 +200000,1.0,2 +250000,1.0,2 +300000,1.0,2 +350000,1.0,2 +400000,1.0,2 +450000,1.0,2 +500000,1.0,2 +550000,1.0,2 +600000,1.0,2 +650000,1.0,2 +700000,1.0,2 +750000,1.0,2 +800000,1.0,2 +850000,1.0,2 +900000,1.0,2 +950000,1.0,2 +1000000,1.0,2 +1050000,0.9,2 +1100000,1.0,2 +1150000,1.0,2 +1200000,1.0,2 +1250000,1.0,2 +1300000,1.0,2 +1350000,1.0,2 +1400000,1.0,2 +1450000,1.0,2 +1500000,1.0,2 +1550000,1.0,2 +1600000,1.0,2 +1650000,1.0,2 +1700000,1.0,2 +1750000,1.0,2 +1800000,1.0,2 +1850000,1.0,2 +1900000,1.0,2 +1950000,1.0,2 +2000000,1.0,2 +0,0.0,1 +50000,0.0,1 +100000,0.4,1 +150000,1.0,1 +200000,1.0,1 +250000,1.0,1 +300000,1.0,1 +350000,1.0,1 +400000,1.0,1 +450000,1.0,1 +500000,1.0,1 +550000,1.0,1 +600000,1.0,1 +650000,1.0,1 +700000,1.0,1 +750000,1.0,1 +800000,1.0,1 +850000,1.0,1 +900000,1.0,1 +950000,1.0,1 +1000000,1.0,1 +1050000,1.0,1 +1100000,1.0,1 +1150000,1.0,1 +1200000,1.0,1 +1250000,1.0,1 +1300000,1.0,1 +1350000,1.0,1 +1400000,1.0,1 +1450000,1.0,1 +1500000,1.0,1 +1550000,1.0,1 +1600000,1.0,1 +1650000,1.0,1 +1700000,1.0,1 +1750000,1.0,1 +1800000,1.0,1 +1850000,1.0,1 +1900000,1.0,1 +1950000,1.0,1 diff --git a/results/myo-hand-reach-hard.csv b/results/myo-hand-reach-hard.csv new file mode 100644 index 0000000..c5752e0 --- /dev/null +++ b/results/myo-hand-reach-hard.csv @@ -0,0 +1,121 @@ +step,success,seed +0,0.0,2 +50000,0.0,2 +100000,0.2,2 +150000,0.8,2 +200000,0.8,2 +250000,0.5,2 +300000,1.0,2 +350000,0.9,2 +400000,0.8,2 +450000,0.6,2 +500000,0.8,2 +550000,0.9,2 +600000,1.0,2 +650000,1.0,2 +700000,1.0,2 +750000,0.7,2 +800000,0.8,2 +850000,1.0,2 +900000,0.7,2 +950000,0.7,2 +1000000,0.8,2 +1050000,1.0,2 +1100000,0.8,2 +1150000,0.7,2 +1200000,1.0,2 +1250000,0.7,2 +1300000,0.9,2 +1350000,0.8,2 +1400000,0.8,2 +1450000,1.0,2 +1500000,1.0,2 +1550000,1.0,2 +1600000,1.0,2 +1650000,1.0,2 +1700000,0.8,2 +1750000,0.6,2 +1800000,1.0,2 +1850000,1.0,2 +1900000,1.0,2 +1950000,1.0,2 +0,0.0,3 +50000,0.0,3 +100000,0.6,3 +150000,0.9,3 +200000,0.6,3 +250000,1.0,3 +300000,1.0,3 +350000,0.6,3 +400000,1.0,3 +450000,0.8,3 +500000,0.7,3 +550000,0.8,3 +600000,1.0,3 +650000,0.9,3 +700000,0.9,3 +750000,0.9,3 +800000,1.0,3 +850000,0.8,3 +900000,1.0,3 +950000,0.9,3 +1000000,0.7,3 +1050000,1.0,3 +1100000,0.9,3 +1150000,1.0,3 +1200000,0.8,3 +1250000,0.6,3 +1300000,0.9,3 +1350000,0.6,3 +1400000,1.0,3 +1450000,0.9,3 +1500000,0.8,3 +1550000,0.9,3 +1600000,0.7,3 +1650000,1.0,3 +1700000,1.0,3 +1750000,1.0,3 +1800000,0.9,3 +1850000,0.8,3 +1900000,0.9,3 +1950000,0.9,3 +0,0.0,1 +50000,0.0,1 +100000,0.2,1 +150000,0.1,1 +200000,0.8,1 +250000,0.7,1 +300000,0.8,1 +350000,0.8,1 +400000,0.9,1 +450000,0.9,1 +500000,0.9,1 +550000,1.0,1 +600000,0.9,1 +650000,1.0,1 +700000,1.0,1 +750000,0.9,1 +800000,1.0,1 +850000,1.0,1 +900000,1.0,1 +950000,1.0,1 +1000000,1.0,1 +1050000,0.9,1 +1100000,1.0,1 +1150000,1.0,1 +1200000,1.0,1 +1250000,1.0,1 +1300000,0.9,1 +1350000,0.9,1 +1400000,1.0,1 +1450000,0.9,1 +1500000,1.0,1 +1550000,1.0,1 +1600000,0.8,1 +1650000,1.0,1 +1700000,1.0,1 +1750000,0.9,1 +1800000,1.0,1 +1850000,1.0,1 +1900000,1.0,1 +1950000,1.0,1 diff --git a/results/myo-hand-reach.csv b/results/myo-hand-reach.csv new file mode 100644 index 0000000..f98a248 --- /dev/null +++ b/results/myo-hand-reach.csv @@ -0,0 +1,123 @@ +step,success,seed +0,0.0,3 +50000,1.0,3 +100000,1.0,3 +150000,1.0,3 +200000,1.0,3 +250000,1.0,3 +300000,1.0,3 +350000,1.0,3 +400000,1.0,3 +450000,1.0,3 +500000,1.0,3 +550000,1.0,3 +600000,1.0,3 +650000,1.0,3 +700000,1.0,3 +750000,1.0,3 +800000,1.0,3 +850000,1.0,3 +900000,1.0,3 +950000,1.0,3 +1000000,1.0,3 +1050000,1.0,3 +1100000,1.0,3 +1150000,1.0,3 +1200000,1.0,3 +1250000,1.0,3 +1300000,1.0,3 +1350000,1.0,3 +1400000,1.0,3 +1450000,1.0,3 +1500000,1.0,3 +1550000,1.0,3 +1600000,1.0,3 +1650000,1.0,3 +1700000,1.0,3 +1750000,1.0,3 +1800000,1.0,3 +1850000,1.0,3 +1900000,1.0,3 +1950000,1.0,3 +2000000,1.0,3 +0,0.0,2 +50000,1.0,2 +100000,1.0,2 +150000,1.0,2 +200000,1.0,2 +250000,1.0,2 +300000,1.0,2 +350000,1.0,2 +400000,0.6,2 +450000,1.0,2 +500000,0.9,2 +550000,1.0,2 +600000,0.9,2 +650000,1.0,2 +700000,1.0,2 +750000,1.0,2 +800000,1.0,2 +850000,1.0,2 +900000,1.0,2 +950000,1.0,2 +1000000,1.0,2 +1050000,1.0,2 +1100000,1.0,2 +1150000,1.0,2 +1200000,1.0,2 +1250000,1.0,2 +1300000,1.0,2 +1350000,1.0,2 +1400000,1.0,2 +1450000,1.0,2 +1500000,1.0,2 +1550000,1.0,2 +1600000,1.0,2 +1650000,1.0,2 +1700000,1.0,2 +1750000,1.0,2 +1800000,1.0,2 +1850000,1.0,2 +1900000,1.0,2 +1950000,1.0,2 +2000000,1.0,2 +0,0.0,1 +50000,0.9,1 +100000,1.0,1 +150000,0.8,1 +200000,1.0,1 +250000,1.0,1 +300000,1.0,1 +350000,0.8,1 +400000,1.0,1 +450000,1.0,1 +500000,1.0,1 +550000,1.0,1 +600000,1.0,1 +650000,1.0,1 +700000,1.0,1 +750000,1.0,1 +800000,1.0,1 +850000,1.0,1 +900000,1.0,1 +950000,1.0,1 +1000000,1.0,1 +1050000,1.0,1 +1100000,1.0,1 +1150000,1.0,1 +1200000,1.0,1 +1250000,1.0,1 +1300000,1.0,1 +1350000,1.0,1 +1400000,1.0,1 +1450000,1.0,1 +1500000,1.0,1 +1550000,1.0,1 +1600000,1.0,1 +1650000,1.0,1 +1700000,1.0,1 +1750000,1.0,1 +1800000,1.0,1 +1850000,1.0,1 +1900000,1.0,1 +1950000,1.0,1 diff --git a/results/pendulum-spin.csv b/results/pendulum-spin.csv new file mode 100644 index 0000000..eb38c7d --- /dev/null +++ b/results/pendulum-spin.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,183.8,3 +100000,834.5,3 +200000,873.1,3 +300000,834.7,3 +400000,823.1,3 +500000,828.5,3 +600000,848.2,3 +700000,854.4,3 +800000,825.5,3 +900000,823.2,3 +1000000,855.2,3 +1100000,859.6,3 +1200000,862.2,3 +1300000,860.5,3 +1400000,838.9,3 +1500000,853.4,3 +1600000,849.4,3 +1700000,813.8,3 +1800000,825.7,3 +1900000,832.9,3 +2000000,837.2,3 +2100000,822.0,3 +2200000,803.0,3 +2300000,846.4,3 +2400000,842.0,3 +2500000,843.7,3 +2600000,832.0,3 +2700000,830.2,3 +2800000,873.8,3 +2900000,877.5,3 +3000000,859.1,3 +3100000,842.4,3 +3200000,841.5,3 +3300000,829.2,3 +3400000,848.8,3 +3500000,829.8,3 +3600000,825.2,3 +3700000,864.3,3 +3800000,849.9,3 +3900000,861.8,3 +4000000,811.1,3 +0,103.4,2 +100000,869.3,2 +200000,829.8,2 +300000,835.5,2 +400000,893.6,2 +500000,837.5,2 +600000,884.5,2 +700000,852.2,2 +800000,832.2,2 +900000,847.7,2 +1000000,850.4,2 +1100000,850.1,2 +1200000,845.0,2 +1300000,829.3,2 +1400000,839.1,2 +1500000,861.4,2 +1600000,857.8,2 +1700000,865.7,2 +1800000,830.1,2 +1900000,823.0,2 +2000000,851.6,2 +2100000,853.9,2 +2200000,863.0,2 +2300000,821.6,2 +2400000,828.4,2 +2500000,844.1,2 +2600000,880.4,2 +2700000,844.9,2 +2800000,838.0,2 +2900000,851.7,2 +3000000,838.5,2 +3100000,843.5,2 +3200000,886.3,2 +3300000,866.4,2 +3400000,860.9,2 +3500000,876.8,2 +3600000,865.2,2 +3700000,860.6,2 +3800000,841.3,2 +3900000,860.4,2 +4000000,867.9,2 +0,73.6,1 +100000,810.8,1 +200000,826.8,1 +300000,840.6,1 +400000,806.2,1 +500000,869.9,1 +600000,842.6,1 +700000,865.2,1 +800000,867.9,1 +900000,866.0,1 +1000000,833.4,1 +1100000,833.4,1 +1200000,814.8,1 +1300000,826.6,1 +1400000,816.8,1 +1500000,846.9,1 +1600000,847.3,1 +1700000,886.5,1 +1800000,840.8,1 +1900000,849.1,1 +2000000,846.6,1 +2100000,854.1,1 +2200000,834.2,1 +2300000,800.8,1 +2400000,814.1,1 +2500000,828.6,1 +2600000,856.3,1 +2700000,857.2,1 +2800000,852.3,1 +2900000,829.1,1 +3000000,835.5,1 +3100000,827.5,1 +3200000,823.9,1 +3300000,816.1,1 +3400000,844.7,1 +3500000,874.2,1 +3600000,807.4,1 +3700000,876.9,1 +3800000,879.8,1 +3900000,858.6,1 +4000000,850.9,1 diff --git a/results/pendulum-swingup.csv b/results/pendulum-swingup.csv new file mode 100644 index 0000000..959971d --- /dev/null +++ b/results/pendulum-swingup.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,1.4,3 +100000,756.0,3 +200000,876.9,3 +300000,814.8,3 +400000,801.8,3 +500000,816.3,3 +600000,853.2,3 +700000,862.0,3 +800000,803.2,3 +900000,806.4,3 +1000000,867.7,3 +1100000,848.9,3 +1200000,866.4,3 +1300000,864.5,3 +1400000,829.1,3 +1500000,855.6,3 +1600000,839.4,3 +1700000,791.5,3 +1800000,814.2,3 +1900000,821.0,3 +2000000,829.3,3 +2100000,804.0,3 +2200000,767.5,3 +2300000,828.0,3 +2400000,830.1,3 +2500000,836.2,3 +2600000,815.6,3 +2700000,812.1,3 +2800000,893.3,3 +2900000,887.6,3 +3000000,867.8,3 +3100000,837.1,3 +3200000,828.8,3 +3300000,816.9,3 +3400000,845.3,3 +3500000,819.7,3 +3600000,808.5,3 +3700000,860.1,3 +3800000,857.5,3 +3900000,862.2,3 +4000000,782.8,3 +0,0.0,1 +100000,563.7,1 +200000,801.6,1 +300000,830.2,1 +400000,772.6,1 +500000,884.6,1 +600000,835.7,1 +700000,866.1,1 +800000,891.9,1 +900000,877.9,1 +1000000,815.6,1 +1100000,827.0,1 +1200000,786.5,1 +1300000,803.8,1 +1400000,792.3,1 +1500000,837.8,1 +1600000,846.2,1 +1700000,904.9,1 +1800000,836.5,1 +1900000,843.7,1 +2000000,839.5,1 +2100000,853.5,1 +2200000,827.7,1 +2300000,771.1,1 +2400000,784.2,1 +2500000,805.8,1 +2600000,862.6,1 +2700000,855.7,1 +2800000,851.5,1 +2900000,818.5,1 +3000000,819.8,1 +3100000,804.5,1 +3200000,800.5,1 +3300000,789.5,1 +3400000,826.4,1 +3500000,882.3,1 +3600000,780.6,1 +3700000,898.8,1 +3800000,891.4,1 +3900000,866.4,1 +4000000,848.3,1 +0,0.0,2 +100000,852.2,2 +200000,795.0,2 +300000,819.5,2 +400000,827.4,2 +500000,824.2,2 +600000,905.7,2 +700000,854.4,2 +800000,817.7,2 +900000,844.1,2 +1000000,774.8,2 +1100000,849.2,2 +1200000,837.7,2 +1300000,810.9,2 +1400000,826.2,2 +1500000,862.1,2 +1600000,855.6,2 +1700000,875.8,2 +1800000,886.0,2 +1900000,811.5,2 +2000000,841.1,2 +2100000,848.1,2 +2200000,864.3,2 +2300000,801.2,2 +2400000,828.0,2 +2500000,831.4,2 +2600000,894.1,2 +2700000,847.4,2 +2800000,831.1,2 +2900000,842.8,2 +3000000,824.5,2 +3100000,830.5,2 +3200000,906.2,2 +3300000,871.5,2 +3400000,866.2,2 +3500000,885.5,2 +3600000,870.3,2 +3700000,861.9,2 +3800000,838.4,2 +3900000,849.5,2 +4000000,883.0,2 diff --git a/results/pick-cube.csv b/results/pick-cube.csv new file mode 100644 index 0000000..f96df7a --- /dev/null +++ b/results/pick-cube.csv @@ -0,0 +1,124 @@ +step,success,seed +0,0.0,1 +100000,0.0,1 +200000,0.0,1 +300000,0.7,1 +400000,0.9,1 +500000,0.8,1 +600000,0.8,1 +700000,1.0,1 +800000,1.0,1 +900000,1.0,1 +1000000,1.0,1 +1100000,1.0,1 +1200000,1.0,1 +1300000,1.0,1 +1400000,1.0,1 +1500000,0.9,1 +1600000,0.9,1 +1700000,1.0,1 +1800000,1.0,1 +1900000,1.0,1 +2000000,1.0,1 +2100000,1.0,1 +2200000,1.0,1 +2300000,1.0,1 +2400000,0.9,1 +2500000,1.0,1 +2600000,1.0,1 +2700000,0.9,1 +2800000,1.0,1 +2900000,0.9,1 +3000000,1.0,1 +3100000,1.0,1 +3200000,1.0,1 +3300000,1.0,1 +3400000,0.9,1 +3500000,1.0,1 +3600000,1.0,1 +3700000,1.0,1 +3800000,0.9,1 +3900000,1.0,1 +4000000,1.0,1 +0,0.0,3 +100000,0.0,3 +200000,0.0,3 +300000,0.4,3 +400000,0.7,3 +500000,0.7,3 +600000,0.9,3 +700000,1.0,3 +800000,1.0,3 +900000,0.9,3 +1000000,1.0,3 +1100000,1.0,3 +1200000,0.9,3 +1300000,1.0,3 +1400000,1.0,3 +1500000,1.0,3 +1600000,0.9,3 +1700000,1.0,3 +1800000,1.0,3 +1900000,1.0,3 +2000000,1.0,3 +2100000,1.0,3 +2200000,1.0,3 +2300000,1.0,3 +2400000,1.0,3 +2500000,1.0,3 +2600000,1.0,3 +2700000,1.0,3 +2800000,1.0,3 +2900000,1.0,3 +3000000,1.0,3 +3100000,1.0,3 +3200000,1.0,3 +3300000,1.0,3 +3400000,1.0,3 +3500000,1.0,3 +3600000,1.0,3 +3700000,1.0,3 +3800000,1.0,3 +3900000,1.0,3 +4000000,1.0,3 +0,0.0,2 +100000,0.0,2 +200000,0.0,2 +300000,0.2,2 +400000,0.7,2 +500000,0.4,2 +600000,0.9,2 +700000,0.7,2 +800000,0.9,2 +900000,0.9,2 +1000000,0.8,2 +1100000,0.9,2 +1200000,1.0,2 +1300000,1.0,2 +1400000,1.0,2 +1500000,1.0,2 +1600000,1.0,2 +1700000,0.8,2 +1800000,1.0,2 +1900000,1.0,2 +2000000,1.0,2 +2100000,0.9,2 +2200000,1.0,2 +2300000,1.0,2 +2400000,1.0,2 +2500000,1.0,2 +2600000,1.0,2 +2700000,1.0,2 +2800000,1.0,2 +2900000,0.9,2 +3000000,0.9,2 +3100000,1.0,2 +3200000,1.0,2 +3300000,0.9,2 +3400000,1.0,2 +3500000,1.0,2 +3600000,0.9,2 +3700000,1.0,2 +3800000,1.0,2 +3900000,1.0,2 +4000000,1.0,2 diff --git a/results/pick-ycb.csv b/results/pick-ycb.csv new file mode 100644 index 0000000..2e2a917 --- /dev/null +++ b/results/pick-ycb.csv @@ -0,0 +1,421 @@ +step,success,seed +0,0.0,2 +100000,0.0,2 +200000,0.0,2 +300000,0.0,2 +400000,0.0,2 +500000,0.0,2 +600000,0.0,2 +700000,0.0,2 +800000,0.0,2 +900000,0.0,2 +1000000,0.0,2 +1100000,0.0,2 +1200000,0.0,2 +1300000,0.0,2 +1400000,0.0,2 +1500000,0.1,2 +1600000,0.0,2 +1700000,0.1,2 +1800000,0.1,2 +1900000,0.1,2 +2000000,0.2,2 +2100000,0.0,2 +2200000,0.2,2 +2300000,0.3,2 +2400000,0.2,2 +2500000,0.6,2 +2600000,0.3,2 +2700000,0.1,2 +2800000,0.4,2 +2900000,0.3,2 +3000000,0.2,2 +3100000,0.5,2 +3200000,0.5,2 +3300000,0.5,2 +3400000,0.5,2 +3500000,0.3,2 +3600000,0.4,2 +3700000,0.2,2 +3800000,0.4,2 +3900000,0.5,2 +4000000,0.4,2 +4100000,0.2,2 +4200000,0.5,2 +4300000,0.8,2 +4400000,0.7,2 +4500000,0.3,2 +4600000,0.5,2 +4700000,0.7,2 +4800000,0.5,2 +4900000,0.7,2 +5000000,0.3,2 +5100000,0.6,2 +5200000,0.7,2 +5300000,0.4,2 +5400000,0.3,2 +5500000,0.2,2 +5600000,0.6,2 +5700000,0.7,2 +5800000,0.6,2 +5900000,0.5,2 +6000000,0.3,2 +6100000,0.4,2 +6200000,0.5,2 +6300000,0.8,2 +6400000,0.7,2 +6500000,0.6,2 +6600000,0.5,2 +6700000,0.6,2 +6800000,0.6,2 +6900000,0.9,2 +7000000,0.5,2 +7100000,0.6,2 +7200000,0.7,2 +7300000,0.8,2 +7400000,0.7,2 +7500000,0.5,2 +7600000,0.5,2 +7700000,0.6,2 +7800000,0.5,2 +7900000,0.6,2 +8000000,0.5,2 +8100000,0.4,2 +8200000,0.7,2 +8300000,0.7,2 +8400000,0.6,2 +8500000,0.6,2 +8600000,0.5,2 +8700000,0.4,2 +8800000,0.6,2 +8900000,0.6,2 +9000000,0.6,2 +9100000,0.7,2 +9200000,0.9,2 +9300000,0.5,2 +9400000,0.7,2 +9500000,0.5,2 +9600000,0.6,2 +9700000,0.7,2 +9800000,0.4,2 +9900000,0.5,2 +10000000,0.6,2 +10100000,0.7,2 +10200000,0.7,2 +10300000,0.5,2 +10400000,0.3,2 +10500000,0.8,2 +10600000,0.9,2 +10700000,0.4,2 +10800000,0.4,2 +10900000,0.7,2 +11000000,0.6,2 +11100000,0.7,2 +11200000,0.7,2 +11300000,0.8,2 +11400000,0.8,2 +11500000,0.6,2 +11600000,0.8,2 +11700000,0.6,2 +11800000,0.6,2 +11900000,0.5,2 +12000000,0.6,2 +12100000,0.7,2 +12200000,0.6,2 +12300000,0.5,2 +12400000,0.7,2 +12500000,0.5,2 +12600000,0.4,2 +12700000,0.8,2 +12800000,0.5,2 +12900000,0.4,2 +13000000,0.8,2 +13100000,0.8,2 +13200000,0.6,2 +13300000,0.6,2 +13400000,0.3,2 +13500000,0.6,2 +13600000,0.6,2 +13700000,0.6,2 +13800000,0.6,2 +13900000,0.8,2 +0,0.0,3 +100000,0.0,3 +200000,0.0,3 +300000,0.0,3 +400000,0.0,3 +500000,0.0,3 +600000,0.0,3 +700000,0.0,3 +800000,0.0,3 +900000,0.0,3 +1000000,0.0,3 +1100000,0.0,3 +1200000,0.0,3 +1300000,0.0,3 +1400000,0.1,3 +1500000,0.0,3 +1600000,0.0,3 +1700000,0.0,3 +1800000,0.0,3 +1900000,0.0,3 +2000000,0.1,3 +2100000,0.2,3 +2200000,0.1,3 +2300000,0.2,3 +2400000,0.2,3 +2500000,0.0,3 +2600000,0.0,3 +2700000,0.1,3 +2800000,0.3,3 +2900000,0.3,3 +3000000,0.2,3 +3100000,0.2,3 +3200000,0.1,3 +3300000,0.3,3 +3400000,0.3,3 +3500000,0.4,3 +3600000,0.3,3 +3700000,0.3,3 +3800000,0.2,3 +3900000,0.2,3 +4000000,0.6,3 +4100000,0.3,3 +4200000,0.1,3 +4300000,0.3,3 +4400000,0.5,3 +4500000,0.2,3 +4600000,0.2,3 +4700000,0.5,3 +4800000,0.5,3 +4900000,0.3,3 +5000000,0.4,3 +5100000,0.4,3 +5200000,0.6,3 +5300000,0.5,3 +5400000,0.4,3 +5500000,0.3,3 +5600000,0.6,3 +5700000,0.5,3 +5800000,0.3,3 +5900000,0.7,3 +6000000,0.7,3 +6100000,0.7,3 +6200000,0.6,3 +6300000,0.4,3 +6400000,0.4,3 +6500000,0.7,3 +6600000,0.4,3 +6700000,0.5,3 +6800000,0.6,3 +6900000,0.7,3 +7000000,0.2,3 +7100000,0.8,3 +7200000,0.6,3 +7300000,0.6,3 +7400000,0.5,3 +7500000,0.3,3 +7600000,0.6,3 +7700000,0.5,3 +7800000,0.5,3 +7900000,0.4,3 +8000000,0.5,3 +8100000,0.5,3 +8200000,0.5,3 +8300000,0.6,3 +8400000,0.4,3 +8500000,0.3,3 +8600000,0.7,3 +8700000,0.3,3 +8800000,0.7,3 +8900000,0.8,3 +9000000,0.6,3 +9100000,0.7,3 +9200000,0.8,3 +9300000,0.5,3 +9400000,0.4,3 +9500000,0.6,3 +9600000,0.2,3 +9700000,0.5,3 +9800000,0.2,3 +9900000,0.6,3 +10000000,0.3,3 +10100000,0.5,3 +10200000,0.5,3 +10300000,0.4,3 +10400000,0.4,3 +10500000,0.5,3 +10600000,0.6,3 +10700000,0.5,3 +10800000,0.4,3 +10900000,0.6,3 +11000000,0.5,3 +11100000,0.3,3 +11200000,0.7,3 +11300000,0.6,3 +11400000,0.8,3 +11500000,0.8,3 +11600000,0.9,3 +11700000,0.7,3 +11800000,0.5,3 +11900000,0.5,3 +12000000,0.2,3 +12100000,0.6,3 +12200000,0.5,3 +12300000,0.5,3 +12400000,0.6,3 +12500000,0.4,3 +12600000,0.2,3 +12700000,0.8,3 +12800000,0.4,3 +12900000,0.8,3 +13000000,0.6,3 +13100000,0.6,3 +13200000,0.3,3 +13300000,0.8,3 +13400000,0.4,3 +13500000,0.4,3 +13600000,0.4,3 +13700000,0.5,3 +13800000,0.6,3 +13900000,0.5,3 +0,0.0,1 +100000,0.0,1 +200000,0.0,1 +300000,0.0,1 +400000,0.0,1 +500000,0.0,1 +600000,0.0,1 +700000,0.0,1 +800000,0.0,1 +900000,0.0,1 +1000000,0.0,1 +1100000,0.0,1 +1200000,0.0,1 +1300000,0.0,1 +1400000,0.1,1 +1500000,0.1,1 +1600000,0.1,1 +1700000,0.0,1 +1800000,0.3,1 +1900000,0.2,1 +2000000,0.1,1 +2100000,0.1,1 +2200000,0.1,1 +2300000,0.0,1 +2400000,0.0,1 +2500000,0.1,1 +2600000,0.0,1 +2700000,0.2,1 +2800000,0.3,1 +2900000,0.4,1 +3000000,0.2,1 +3100000,0.0,1 +3200000,0.6,1 +3300000,0.5,1 +3400000,0.4,1 +3500000,0.3,1 +3600000,0.5,1 +3700000,0.6,1 +3800000,0.2,1 +3900000,0.2,1 +4000000,0.4,1 +4100000,0.3,1 +4200000,0.5,1 +4300000,0.5,1 +4400000,0.6,1 +4500000,0.2,1 +4600000,0.4,1 +4700000,0.7,1 +4800000,0.8,1 +4900000,0.2,1 +5000000,0.3,1 +5100000,0.4,1 +5200000,0.8,1 +5300000,0.5,1 +5400000,0.3,1 +5500000,0.4,1 +5600000,0.6,1 +5700000,0.6,1 +5800000,0.5,1 +5900000,0.8,1 +6000000,0.3,1 +6100000,0.6,1 +6200000,0.5,1 +6300000,0.3,1 +6400000,0.7,1 +6500000,0.9,1 +6600000,0.5,1 +6700000,0.5,1 +6800000,0.4,1 +6900000,0.7,1 +7000000,0.4,1 +7100000,0.4,1 +7200000,0.5,1 +7300000,0.5,1 +7400000,0.6,1 +7500000,0.8,1 +7600000,0.7,1 +7700000,0.5,1 +7800000,0.4,1 +7900000,0.7,1 +8000000,0.5,1 +8100000,0.3,1 +8200000,0.6,1 +8300000,0.5,1 +8400000,0.4,1 +8500000,0.3,1 +8600000,0.7,1 +8700000,0.3,1 +8800000,0.7,1 +8900000,0.7,1 +9000000,0.6,1 +9100000,0.4,1 +9200000,0.6,1 +9300000,0.5,1 +9400000,0.8,1 +9500000,0.6,1 +9600000,0.4,1 +9700000,0.4,1 +9800000,0.4,1 +9900000,0.5,1 +10000000,0.7,1 +10100000,0.4,1 +10200000,0.4,1 +10300000,0.4,1 +10400000,0.8,1 +10500000,0.6,1 +10600000,0.4,1 +10700000,0.5,1 +10800000,0.4,1 +10900000,0.7,1 +11000000,0.5,1 +11100000,0.7,1 +11200000,0.5,1 +11300000,0.8,1 +11400000,0.4,1 +11500000,0.7,1 +11600000,0.6,1 +11700000,0.6,1 +11800000,0.5,1 +11900000,0.6,1 +12000000,0.7,1 +12100000,0.4,1 +12200000,0.5,1 +12300000,0.7,1 +12400000,0.5,1 +12500000,0.4,1 +12600000,0.6,1 +12700000,0.7,1 +12800000,0.4,1 +12900000,0.6,1 +13000000,0.9,1 +13100000,0.5,1 +13200000,0.6,1 +13300000,0.6,1 +13400000,0.3,1 +13500000,0.6,1 +13600000,0.7,1 +13700000,0.8,1 +13800000,0.6,1 +13900000,0.8,1 diff --git a/results/quadruped-run.csv b/results/quadruped-run.csv new file mode 100644 index 0000000..1240fe7 --- /dev/null +++ b/results/quadruped-run.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,278.5,2 +100000,385.7,2 +200000,801.8,2 +300000,888.2,2 +400000,925.2,2 +500000,933.5,2 +600000,928.9,2 +700000,935.4,2 +800000,943.7,2 +900000,952.0,2 +1000000,946.7,2 +1100000,943.0,2 +1200000,949.5,2 +1300000,942.5,2 +1400000,946.7,2 +1500000,949.6,2 +1600000,953.2,2 +1700000,946.8,2 +1800000,951.0,2 +1900000,954.2,2 +2000000,957.2,2 +2100000,948.5,2 +2200000,951.6,2 +2300000,954.2,2 +2400000,953.5,2 +2500000,953.8,2 +2600000,954.5,2 +2700000,952.3,2 +2800000,939.9,2 +2900000,944.4,2 +3000000,953.3,2 +3100000,952.7,2 +3200000,946.5,2 +3300000,954.4,2 +3400000,951.4,2 +3500000,954.3,2 +3600000,954.1,2 +3700000,952.2,2 +3800000,951.6,2 +3900000,962.3,2 +4000000,955.8,2 +0,129.4,1 +100000,121.4,1 +200000,691.1,1 +300000,874.7,1 +400000,915.7,1 +500000,936.4,1 +600000,947.5,1 +700000,938.9,1 +800000,941.3,1 +900000,927.8,1 +1000000,940.6,1 +1100000,945.0,1 +1200000,941.2,1 +1300000,954.5,1 +1400000,946.1,1 +1500000,956.9,1 +1600000,923.9,1 +1700000,952.9,1 +1800000,957.1,1 +1900000,957.4,1 +2000000,940.9,1 +2100000,954.6,1 +2200000,947.0,1 +2300000,956.1,1 +2400000,962.5,1 +2500000,953.8,1 +2600000,949.2,1 +2700000,950.2,1 +2800000,955.4,1 +2900000,957.6,1 +3000000,957.3,1 +3100000,960.9,1 +3200000,963.9,1 +3300000,951.2,1 +3400000,960.2,1 +3500000,954.8,1 +3600000,955.1,1 +3700000,956.1,1 +3800000,955.3,1 +3900000,960.8,1 +4000000,951.5,1 +0,202.7,3 +100000,210.5,3 +200000,784.2,3 +300000,921.9,3 +400000,948.7,3 +500000,949.0,3 +600000,942.5,3 +700000,941.6,3 +800000,941.5,3 +900000,906.1,3 +1000000,925.7,3 +1100000,935.9,3 +1200000,956.0,3 +1300000,948.1,3 +1400000,939.6,3 +1500000,932.7,3 +1600000,955.4,3 +1700000,952.4,3 +1800000,955.4,3 +1900000,943.2,3 +2000000,944.7,3 +2100000,957.5,3 +2200000,936.0,3 +2300000,954.9,3 +2400000,953.9,3 +2500000,944.4,3 +2600000,961.5,3 +2700000,951.5,3 +2800000,958.8,3 +2900000,962.6,3 +3000000,955.6,3 +3100000,960.5,3 +3200000,957.6,3 +3300000,958.5,3 +3400000,963.6,3 +3500000,948.5,3 +3600000,957.8,3 +3700000,960.0,3 +3800000,947.1,3 +3900000,957.7,3 +4000000,954.2,3 diff --git a/results/quadruped-walk.csv b/results/quadruped-walk.csv new file mode 100644 index 0000000..234bfe6 --- /dev/null +++ b/results/quadruped-walk.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,283.2,2 +100000,420.2,2 +200000,950.8,2 +300000,959.1,2 +400000,961.7,2 +500000,957.5,2 +600000,955.0,2 +700000,968.8,2 +800000,965.0,2 +900000,966.0,2 +1000000,968.2,2 +1100000,968.1,2 +1200000,964.2,2 +1300000,964.1,2 +1400000,962.9,2 +1500000,965.4,2 +1600000,974.5,2 +1700000,975.5,2 +1800000,966.6,2 +1900000,975.7,2 +2000000,974.5,2 +2100000,964.0,2 +2200000,970.6,2 +2300000,975.4,2 +2400000,969.2,2 +2500000,972.1,2 +2600000,973.5,2 +2700000,965.8,2 +2800000,976.8,2 +2900000,974.1,2 +3000000,968.4,2 +3100000,977.9,2 +3200000,973.0,2 +3300000,976.9,2 +3400000,979.0,2 +3500000,975.6,2 +3600000,973.6,2 +3700000,969.5,2 +3800000,967.3,2 +3900000,980.0,2 +4000000,969.8,2 +0,126.9,1 +100000,213.6,1 +200000,888.8,1 +300000,953.5,1 +400000,945.8,1 +500000,949.1,1 +600000,958.8,1 +700000,955.9,1 +800000,956.9,1 +900000,961.3,1 +1000000,965.3,1 +1100000,962.7,1 +1200000,971.4,1 +1300000,969.5,1 +1400000,967.2,1 +1500000,977.8,1 +1600000,968.9,1 +1700000,967.4,1 +1800000,974.9,1 +1900000,965.9,1 +2000000,969.5,1 +2100000,973.8,1 +2200000,973.3,1 +2300000,972.5,1 +2400000,982.0,1 +2500000,969.7,1 +2600000,964.6,1 +2700000,956.7,1 +2800000,972.3,1 +2900000,971.9,1 +3000000,967.9,1 +3100000,975.3,1 +3200000,977.8,1 +3300000,970.2,1 +3400000,976.3,1 +3500000,977.4,1 +3600000,970.1,1 +3700000,957.4,1 +3800000,973.9,1 +3900000,964.1,1 +4000000,972.1,1 +0,201.4,3 +100000,826.7,3 +200000,904.4,3 +300000,937.5,3 +400000,970.0,3 +500000,964.9,3 +600000,929.7,3 +700000,971.0,3 +800000,965.7,3 +900000,964.6,3 +1000000,935.5,3 +1100000,969.2,3 +1200000,971.6,3 +1300000,974.1,3 +1400000,968.9,3 +1500000,966.5,3 +1600000,968.5,3 +1700000,975.3,3 +1800000,969.3,3 +1900000,966.3,3 +2000000,964.6,3 +2100000,963.0,3 +2200000,966.3,3 +2300000,969.0,3 +2400000,976.8,3 +2500000,972.4,3 +2600000,968.8,3 +2700000,973.2,3 +2800000,977.8,3 +2900000,982.8,3 +3000000,969.3,3 +3100000,969.9,3 +3200000,971.8,3 +3300000,966.3,3 +3400000,980.8,3 +3500000,957.6,3 +3600000,975.5,3 +3700000,982.3,3 +3800000,968.4,3 +3900000,975.8,3 +4000000,971.2,3 diff --git a/results/reacher-easy.csv b/results/reacher-easy.csv new file mode 100644 index 0000000..32c5018 --- /dev/null +++ b/results/reacher-easy.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,91.4,2 +100000,972.8,2 +200000,985.5,2 +300000,880.9,2 +400000,888.6,2 +500000,892.2,2 +600000,986.6,2 +700000,983.8,2 +800000,985.2,2 +900000,985.6,2 +1000000,981.8,2 +1100000,986.8,2 +1200000,982.8,2 +1300000,942.0,2 +1400000,980.0,2 +1500000,984.5,2 +1600000,978.5,2 +1700000,991.3,2 +1800000,979.5,2 +1900000,982.9,2 +2000000,979.3,2 +2100000,981.1,2 +2200000,983.6,2 +2300000,987.6,2 +2400000,988.0,2 +2500000,987.1,2 +2600000,981.6,2 +2700000,986.8,2 +2800000,983.4,2 +2900000,982.1,2 +3000000,982.5,2 +3100000,981.0,2 +3200000,986.7,2 +3300000,986.1,2 +3400000,981.4,2 +3500000,983.7,2 +3600000,988.4,2 +3700000,986.1,2 +3800000,983.7,2 +3900000,978.7,2 +4000000,989.4,2 +0,51.5,1 +100000,977.9,1 +200000,969.3,1 +300000,973.0,1 +400000,989.6,1 +500000,985.4,1 +600000,985.0,1 +700000,985.0,1 +800000,975.7,1 +900000,987.3,1 +1000000,978.4,1 +1100000,967.2,1 +1200000,988.1,1 +1300000,983.7,1 +1400000,987.8,1 +1500000,983.0,1 +1600000,984.0,1 +1700000,986.1,1 +1800000,990.3,1 +1900000,948.6,1 +2000000,984.6,1 +2100000,986.5,1 +2200000,981.7,1 +2300000,984.0,1 +2400000,987.0,1 +2500000,979.5,1 +2600000,991.1,1 +2700000,983.6,1 +2800000,990.8,1 +2900000,982.5,1 +3000000,985.1,1 +3100000,987.5,1 +3200000,986.0,1 +3300000,986.0,1 +3400000,981.8,1 +3500000,985.5,1 +3600000,985.2,1 +3700000,983.1,1 +3800000,984.2,1 +3900000,985.5,1 +4000000,986.8,1 +0,140.8,3 +100000,859.9,3 +200000,973.9,3 +300000,978.4,3 +400000,987.9,3 +500000,880.7,3 +600000,980.9,3 +700000,980.0,3 +800000,984.0,3 +900000,976.8,3 +1000000,987.8,3 +1100000,982.0,3 +1200000,987.9,3 +1300000,979.9,3 +1400000,984.5,3 +1500000,981.0,3 +1600000,986.7,3 +1700000,986.5,3 +1800000,985.9,3 +1900000,986.9,3 +2000000,981.9,3 +2100000,983.1,3 +2200000,984.5,3 +2300000,962.7,3 +2400000,988.5,3 +2500000,984.7,3 +2600000,986.1,3 +2700000,907.3,3 +2800000,982.6,3 +2900000,984.3,3 +3000000,941.1,3 +3100000,985.1,3 +3200000,983.5,3 +3300000,984.7,3 +3400000,983.2,3 +3500000,982.3,3 +3600000,985.7,3 +3700000,985.9,3 +3800000,984.6,3 +3900000,986.7,3 +4000000,985.3,3 diff --git a/results/reacher-hard.csv b/results/reacher-hard.csv new file mode 100644 index 0000000..5376622 --- /dev/null +++ b/results/reacher-hard.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,4.0,3 +100000,256.3,3 +200000,668.7,3 +300000,880.2,3 +400000,981.0,3 +500000,974.8,3 +600000,901.4,3 +700000,974.2,3 +800000,983.1,3 +900000,973.4,3 +1000000,984.3,3 +1100000,978.4,3 +1200000,982.5,3 +1300000,977.4,3 +1400000,981.6,3 +1500000,977.9,3 +1600000,983.6,3 +1700000,982.5,3 +1800000,983.2,3 +1900000,982.0,3 +2000000,978.4,3 +2100000,980.4,3 +2200000,982.4,3 +2300000,981.5,3 +2400000,984.3,3 +2500000,981.5,3 +2600000,981.9,3 +2700000,981.8,3 +2800000,979.7,3 +2900000,983.1,3 +3000000,983.5,3 +3100000,981.6,3 +3200000,979.7,3 +3300000,981.7,3 +3400000,981.2,3 +3500000,978.7,3 +3600000,982.2,3 +3700000,979.7,3 +3800000,981.3,3 +3900000,982.6,3 +4000000,975.0,3 +0,23.4,2 +100000,479.1,2 +200000,972.9,2 +300000,896.9,2 +400000,973.4,2 +500000,879.0,2 +600000,969.4,2 +700000,977.1,2 +800000,980.2,2 +900000,882.6,2 +1000000,981.1,2 +1100000,980.9,2 +1200000,977.8,2 +1300000,985.9,2 +1400000,981.9,2 +1500000,981.2,2 +1600000,977.5,2 +1700000,988.3,2 +1800000,978.9,2 +1900000,977.9,2 +2000000,978.4,2 +2100000,977.2,2 +2200000,980.4,2 +2300000,984.3,2 +2400000,981.4,2 +2500000,982.9,2 +2600000,976.3,2 +2700000,983.7,2 +2800000,977.4,2 +2900000,979.1,2 +3000000,978.7,2 +3100000,978.8,2 +3200000,982.7,2 +3300000,982.7,2 +3400000,978.5,2 +3500000,969.5,2 +3600000,985.2,2 +3700000,980.9,2 +3800000,968.3,2 +3900000,983.3,2 +4000000,985.9,2 +0,19.2,1 +100000,507.3,1 +200000,971.2,1 +300000,972.0,1 +400000,885.1,1 +500000,887.4,1 +600000,977.2,1 +700000,980.1,1 +800000,979.3,1 +900000,984.9,1 +1000000,977.7,1 +1100000,931.6,1 +1200000,986.5,1 +1300000,980.1,1 +1400000,985.0,1 +1500000,980.9,1 +1600000,980.0,1 +1700000,982.3,1 +1800000,986.2,1 +1900000,983.0,1 +2000000,981.7,1 +2100000,983.9,1 +2200000,977.4,1 +2300000,981.4,1 +2400000,982.7,1 +2500000,976.9,1 +2600000,987.1,1 +2700000,979.2,1 +2800000,987.9,1 +2900000,980.3,1 +3000000,982.1,1 +3100000,984.6,1 +3200000,981.8,1 +3300000,982.9,1 +3400000,985.4,1 +3500000,982.0,1 +3600000,968.9,1 +3700000,979.7,1 +3800000,982.1,1 +3900000,982.5,1 +4000000,984.5,1 diff --git a/results/reacher-three-easy.csv b/results/reacher-three-easy.csv new file mode 100644 index 0000000..481ea6b --- /dev/null +++ b/results/reacher-three-easy.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,33.9,3 +100000,883.3,3 +200000,945.4,3 +300000,974.1,3 +400000,979.5,3 +500000,969.9,3 +600000,942.2,3 +700000,976.8,3 +800000,976.0,3 +900000,976.2,3 +1000000,972.3,3 +1100000,975.6,3 +1200000,978.6,3 +1300000,977.9,3 +1400000,975.5,3 +1500000,975.2,3 +1600000,929.5,3 +1700000,978.7,3 +1800000,975.1,3 +1900000,973.5,3 +2000000,958.1,3 +2100000,875.6,3 +2200000,980.1,3 +2300000,982.2,3 +2400000,982.1,3 +2500000,885.9,3 +2600000,982.9,3 +2700000,983.9,3 +2800000,979.0,3 +2900000,981.3,3 +3000000,982.4,3 +3100000,978.6,3 +3200000,985.4,3 +3300000,984.2,3 +3400000,973.3,3 +3500000,980.0,3 +3600000,979.7,3 +3700000,977.7,3 +3800000,978.6,3 +3900000,984.2,3 +4000000,978.9,3 +0,41.6,1 +100000,839.5,1 +200000,973.6,1 +300000,976.4,1 +400000,978.1,1 +500000,873.8,1 +600000,973.1,1 +700000,975.9,1 +800000,871.8,1 +900000,980.3,1 +1000000,879.5,1 +1100000,979.9,1 +1200000,975.5,1 +1300000,973.4,1 +1400000,925.6,1 +1500000,978.0,1 +1600000,972.8,1 +1700000,979.2,1 +1800000,983.5,1 +1900000,982.5,1 +2000000,974.7,1 +2100000,977.9,1 +2200000,872.9,1 +2300000,979.7,1 +2400000,982.5,1 +2500000,978.3,1 +2600000,974.5,1 +2700000,979.7,1 +2800000,980.2,1 +2900000,981.8,1 +3000000,982.4,1 +3100000,981.5,1 +3200000,983.1,1 +3300000,979.5,1 +3400000,983.2,1 +3500000,984.1,1 +3600000,985.7,1 +3700000,979.4,1 +3800000,986.9,1 +3900000,978.2,1 +4000000,978.9,1 +0,102.2,2 +100000,918.7,2 +200000,968.5,2 +300000,973.1,2 +400000,976.7,2 +500000,972.5,2 +600000,975.0,2 +700000,875.7,2 +800000,977.4,2 +900000,975.6,2 +1000000,979.1,2 +1100000,976.6,2 +1200000,980.5,2 +1300000,978.5,2 +1400000,983.1,2 +1500000,981.1,2 +1600000,982.0,2 +1700000,982.4,2 +1800000,975.7,2 +1900000,979.3,2 +2000000,977.2,2 +2100000,984.1,2 +2200000,977.2,2 +2300000,985.3,2 +2400000,981.5,2 +2500000,982.4,2 +2600000,977.8,2 +2700000,980.9,2 +2800000,974.5,2 +2900000,980.2,2 +3000000,884.3,2 +3100000,981.0,2 +3200000,983.4,2 +3300000,979.7,2 +3400000,982.3,2 +3500000,980.0,2 +3600000,985.9,2 +3700000,980.5,2 +3800000,986.2,2 +3900000,981.2,2 +4000000,986.9,2 diff --git a/results/reacher-three-hard.csv b/results/reacher-three-hard.csv new file mode 100644 index 0000000..8c48dff --- /dev/null +++ b/results/reacher-three-hard.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,7.7,3 +100000,275.6,3 +200000,673.0,3 +300000,867.8,3 +400000,588.9,3 +500000,878.5,3 +600000,782.4,3 +700000,676.8,3 +800000,771.3,3 +900000,680.6,3 +1000000,500.7,3 +1100000,677.6,3 +1200000,776.5,3 +1300000,781.3,3 +1400000,967.8,3 +1500000,682.1,3 +1600000,776.4,3 +1700000,772.8,3 +1800000,578.5,3 +1900000,783.2,3 +2000000,773.9,3 +2100000,384.3,3 +2200000,585.0,3 +2300000,677.3,3 +2400000,784.3,3 +2500000,880.9,3 +2600000,775.2,3 +2700000,682.4,3 +2800000,778.8,3 +2900000,586.0,3 +3000000,869.5,3 +3100000,871.0,3 +3200000,773.7,3 +3300000,779.2,3 +3400000,781.4,3 +3500000,681.9,3 +3600000,871.6,3 +3700000,872.5,3 +3800000,875.1,3 +3900000,489.0,3 +4000000,875.5,3 +0,18.8,2 +100000,221.4,2 +200000,649.3,2 +300000,759.4,2 +400000,684.2,2 +500000,487.2,2 +600000,654.0,2 +700000,682.8,2 +800000,876.1,2 +900000,873.8,2 +1000000,782.8,2 +1100000,581.9,2 +1200000,781.2,2 +1300000,880.6,2 +1400000,874.8,2 +1500000,784.2,2 +1600000,971.3,2 +1700000,783.3,2 +1800000,969.5,2 +1900000,681.5,2 +2000000,976.6,2 +2100000,687.2,2 +2200000,573.3,2 +2300000,884.3,2 +2400000,883.4,2 +2500000,881.7,2 +2600000,586.6,2 +2700000,778.9,2 +2800000,783.0,2 +2900000,879.1,2 +3000000,781.5,2 +3100000,878.2,2 +3200000,881.1,2 +3300000,976.9,2 +3400000,979.6,2 +3500000,686.9,2 +3600000,782.5,2 +3700000,782.6,2 +3800000,786.6,2 +3900000,881.3,2 +4000000,786.3,2 +0,0.0,1 +100000,303.9,1 +200000,674.1,1 +300000,775.2,1 +400000,973.2,1 +500000,677.8,1 +600000,876.2,1 +700000,876.5,1 +800000,864.3,1 +900000,854.5,1 +1000000,875.1,1 +1100000,778.5,1 +1200000,577.3,1 +1300000,874.3,1 +1400000,778.4,1 +1500000,878.9,1 +1600000,774.6,1 +1700000,968.1,1 +1800000,977.9,1 +1900000,879.6,1 +2000000,872.8,1 +2100000,874.4,1 +2200000,678.2,1 +2300000,682.7,1 +2400000,873.5,1 +2500000,776.0,1 +2600000,775.7,1 +2700000,680.5,1 +2800000,585.2,1 +2900000,971.5,1 +3000000,774.2,1 +3100000,957.1,1 +3200000,876.4,1 +3300000,871.5,1 +3400000,583.2,1 +3500000,873.7,1 +3600000,783.0,1 +3700000,878.0,1 +3800000,976.9,1 +3900000,675.6,1 +4000000,877.5,1 diff --git a/results/stack-cube.csv b/results/stack-cube.csv new file mode 100644 index 0000000..c8f3df0 --- /dev/null +++ b/results/stack-cube.csv @@ -0,0 +1,124 @@ +step,success,seed +0,0.0,3 +100000,0.0,3 +200000,0.0,3 +300000,0.0,3 +400000,0.0,3 +500000,0.0,3 +600000,0.0,3 +700000,0.0,3 +800000,0.0,3 +900000,0.0,3 +1000000,0.0,3 +1100000,0.0,3 +1200000,0.0,3 +1300000,0.0,3 +1400000,0.1,3 +1500000,0.0,3 +1600000,0.0,3 +1700000,0.3,3 +1800000,0.3,3 +1900000,0.5,3 +2000000,0.8,3 +2100000,0.4,3 +2200000,0.5,3 +2300000,0.6,3 +2400000,0.4,3 +2500000,0.6,3 +2600000,0.9,3 +2700000,0.7,3 +2800000,0.8,3 +2900000,0.6,3 +3000000,0.9,3 +3100000,0.9,3 +3200000,0.9,3 +3300000,0.9,3 +3400000,1.0,3 +3500000,0.9,3 +3600000,0.9,3 +3700000,0.8,3 +3800000,1.0,3 +3900000,0.9,3 +4000000,0.9,3 +0,0.0,1 +100000,0.0,1 +200000,0.0,1 +300000,0.0,1 +400000,0.0,1 +500000,0.0,1 +600000,0.0,1 +700000,0.0,1 +800000,0.0,1 +900000,0.0,1 +1000000,0.0,1 +1100000,0.1,1 +1200000,0.0,1 +1300000,0.3,1 +1400000,0.3,1 +1500000,0.1,1 +1600000,0.4,1 +1700000,0.5,1 +1800000,0.3,1 +1900000,0.7,1 +2000000,0.3,1 +2100000,0.6,1 +2200000,0.4,1 +2300000,0.7,1 +2400000,0.8,1 +2500000,0.8,1 +2600000,0.9,1 +2700000,0.8,1 +2800000,0.8,1 +2900000,0.7,1 +3000000,1.0,1 +3100000,0.9,1 +3200000,0.8,1 +3300000,0.9,1 +3400000,1.0,1 +3500000,0.9,1 +3600000,0.8,1 +3700000,0.9,1 +3800000,0.9,1 +3900000,1.0,1 +4000000,1.0,1 +0,0.0,2 +100000,0.0,2 +200000,0.0,2 +300000,0.0,2 +400000,0.0,2 +500000,0.0,2 +600000,0.0,2 +700000,0.0,2 +800000,0.0,2 +900000,0.0,2 +1000000,0.0,2 +1100000,0.0,2 +1200000,0.0,2 +1300000,0.0,2 +1400000,0.0,2 +1500000,0.0,2 +1600000,0.3,2 +1700000,0.5,2 +1800000,0.5,2 +1900000,0.2,2 +2000000,0.4,2 +2100000,0.5,2 +2200000,0.4,2 +2300000,0.2,2 +2400000,0.7,2 +2500000,0.8,2 +2600000,0.8,2 +2700000,0.8,2 +2800000,1.0,2 +2900000,0.6,2 +3000000,1.0,2 +3100000,0.8,2 +3200000,0.9,2 +3300000,1.0,2 +3400000,1.0,2 +3500000,0.9,2 +3600000,0.6,2 +3700000,0.9,2 +3800000,1.0,2 +3900000,1.0,2 +4000000,0.8,2 diff --git a/results/turn-faucet.csv b/results/turn-faucet.csv new file mode 100644 index 0000000..8e8b2c8 --- /dev/null +++ b/results/turn-faucet.csv @@ -0,0 +1,121 @@ +step,success,seed +0,0.0,3 +100000,0.1,3 +200000,0.3,3 +300000,0.3,3 +400000,0.8,3 +500000,0.4,3 +600000,0.4,3 +700000,0.5,3 +800000,0.9,3 +900000,1.0,3 +1000000,0.7,3 +1100000,1.0,3 +1200000,0.8,3 +1300000,1.0,3 +1400000,0.7,3 +1500000,0.9,3 +1600000,0.9,3 +1700000,0.9,3 +1800000,0.7,3 +1900000,0.8,3 +2000000,1.0,3 +2100000,0.9,3 +2200000,0.9,3 +2300000,0.9,3 +2400000,0.9,3 +2500000,1.0,3 +2600000,0.9,3 +2700000,0.9,3 +2800000,1.0,3 +2900000,0.9,3 +3000000,0.9,3 +3100000,1.0,3 +3200000,0.8,3 +3300000,0.9,3 +3400000,0.9,3 +3500000,1.0,3 +3600000,1.0,3 +3700000,0.9,3 +3800000,1.0,3 +3900000,1.0,3 +0,0.0,2 +100000,0.0,2 +200000,0.6,2 +300000,0.3,2 +400000,0.8,2 +500000,0.7,2 +600000,0.8,2 +700000,0.7,2 +800000,1.0,2 +900000,0.9,2 +1000000,0.9,2 +1100000,0.9,2 +1200000,0.8,2 +1300000,0.9,2 +1400000,0.9,2 +1500000,1.0,2 +1600000,0.9,2 +1700000,1.0,2 +1800000,1.0,2 +1900000,0.7,2 +2000000,0.9,2 +2100000,0.8,2 +2200000,1.0,2 +2300000,1.0,2 +2400000,0.9,2 +2500000,1.0,2 +2600000,1.0,2 +2700000,0.9,2 +2800000,1.0,2 +2900000,0.9,2 +3000000,1.0,2 +3100000,1.0,2 +3200000,0.9,2 +3300000,1.0,2 +3400000,0.9,2 +3500000,1.0,2 +3600000,0.9,2 +3700000,1.0,2 +3800000,0.9,2 +3900000,0.9,2 +0,0.0,1 +100000,0.2,1 +200000,0.6,1 +300000,0.3,1 +400000,0.8,1 +500000,0.6,1 +600000,0.7,1 +700000,0.7,1 +800000,0.8,1 +900000,1.0,1 +1000000,0.6,1 +1100000,1.0,1 +1200000,0.8,1 +1300000,0.8,1 +1400000,0.9,1 +1500000,0.8,1 +1600000,1.0,1 +1700000,1.0,1 +1800000,0.9,1 +1900000,0.8,1 +2000000,1.0,1 +2100000,0.9,1 +2200000,1.0,1 +2300000,0.9,1 +2400000,1.0,1 +2500000,1.0,1 +2600000,1.0,1 +2700000,1.0,1 +2800000,0.9,1 +2900000,0.9,1 +3000000,0.9,1 +3100000,1.0,1 +3200000,1.0,1 +3300000,1.0,1 +3400000,1.0,1 +3500000,1.0,1 +3600000,1.0,1 +3700000,0.9,1 +3800000,0.9,1 +3900000,1.0,1 diff --git a/results/walker-run-backwards.csv b/results/walker-run-backwards.csv new file mode 100644 index 0000000..729527d --- /dev/null +++ b/results/walker-run-backwards.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,20.6,2 +100000,530.5,2 +200000,652.0,2 +300000,716.2,2 +400000,794.7,2 +500000,854.1,2 +600000,867.1,2 +700000,874.6,2 +800000,880.8,2 +900000,912.3,2 +1000000,910.1,2 +1100000,923.4,2 +1200000,922.5,2 +1300000,933.5,2 +1400000,933.6,2 +1500000,937.6,2 +1600000,925.9,2 +1700000,932.8,2 +1800000,934.7,2 +1900000,938.0,2 +2000000,936.0,2 +2100000,937.3,2 +2200000,936.7,2 +2300000,936.2,2 +2400000,932.7,2 +2500000,938.5,2 +2600000,936.5,2 +2700000,947.1,2 +2800000,939.4,2 +2900000,943.2,2 +3000000,935.8,2 +3100000,938.5,2 +3200000,941.6,2 +3300000,941.2,2 +3400000,941.0,2 +3500000,952.2,2 +3600000,946.2,2 +3700000,943.5,2 +3800000,937.6,2 +3900000,944.3,2 +4000000,939.3,2 +0,21.9,1 +100000,535.0,1 +200000,659.3,1 +300000,696.1,1 +400000,729.3,1 +500000,760.7,1 +600000,788.1,1 +700000,799.2,1 +800000,804.3,1 +900000,806.9,1 +1000000,839.2,1 +1100000,859.7,1 +1200000,876.6,1 +1300000,877.4,1 +1400000,868.0,1 +1500000,886.1,1 +1600000,882.2,1 +1700000,872.8,1 +1800000,884.2,1 +1900000,891.4,1 +2000000,886.2,1 +2100000,889.7,1 +2200000,895.8,1 +2300000,904.4,1 +2400000,909.3,1 +2500000,914.4,1 +2600000,912.1,1 +2700000,903.4,1 +2800000,920.7,1 +2900000,909.1,1 +3000000,920.1,1 +3100000,926.8,1 +3200000,930.9,1 +3300000,927.0,1 +3400000,932.8,1 +3500000,932.7,1 +3600000,933.0,1 +3700000,931.3,1 +3800000,930.9,1 +3900000,931.1,1 +4000000,936.5,1 +0,24.0,3 +100000,482.1,3 +200000,684.7,3 +300000,724.9,3 +400000,746.9,3 +500000,769.7,3 +600000,801.0,3 +700000,805.3,3 +800000,825.5,3 +900000,816.8,3 +1000000,850.1,3 +1100000,842.0,3 +1200000,858.7,3 +1300000,876.8,3 +1400000,865.1,3 +1500000,870.7,3 +1600000,884.9,3 +1700000,888.0,3 +1800000,884.0,3 +1900000,901.4,3 +2000000,889.6,3 +2100000,897.6,3 +2200000,904.4,3 +2300000,902.0,3 +2400000,909.9,3 +2500000,911.8,3 +2600000,909.9,3 +2700000,912.9,3 +2800000,912.6,3 +2900000,870.9,3 +3000000,920.7,3 +3100000,928.5,3 +3200000,923.7,3 +3300000,925.2,3 +3400000,932.7,3 +3500000,935.6,3 +3600000,936.1,3 +3700000,931.0,3 +3800000,935.9,3 +3900000,942.4,3 +4000000,941.0,3 diff --git a/results/walker-run.csv b/results/walker-run.csv new file mode 100644 index 0000000..84d1a31 --- /dev/null +++ b/results/walker-run.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,20.7,2 +100000,718.3,2 +200000,773.5,2 +300000,801.7,2 +400000,809.8,2 +500000,815.2,2 +600000,821.7,2 +700000,815.4,2 +800000,830.9,2 +900000,834.1,2 +1000000,833.7,2 +1100000,843.0,2 +1200000,839.5,2 +1300000,850.3,2 +1400000,848.3,2 +1500000,844.9,2 +1600000,854.0,2 +1700000,859.7,2 +1800000,858.1,2 +1900000,860.7,2 +2000000,858.3,2 +2100000,863.2,2 +2200000,865.1,2 +2300000,866.1,2 +2400000,871.7,2 +2500000,869.5,2 +2600000,831.8,2 +2700000,869.3,2 +2800000,873.9,2 +2900000,874.4,2 +3000000,871.5,2 +3100000,874.1,2 +3200000,876.1,2 +3300000,869.9,2 +3400000,870.8,2 +3500000,875.2,2 +3600000,875.9,2 +3700000,876.8,2 +3800000,878.4,2 +3900000,881.6,2 +4000000,879.3,2 +0,25.8,1 +100000,691.5,1 +200000,767.8,1 +300000,796.7,1 +400000,817.1,1 +500000,811.6,1 +600000,823.0,1 +700000,829.8,1 +800000,837.8,1 +900000,838.2,1 +1000000,837.0,1 +1100000,849.1,1 +1200000,844.1,1 +1300000,845.8,1 +1400000,849.5,1 +1500000,853.7,1 +1600000,852.4,1 +1700000,851.9,1 +1800000,853.7,1 +1900000,851.9,1 +2000000,856.3,1 +2100000,855.8,1 +2200000,857.2,1 +2300000,862.5,1 +2400000,860.8,1 +2500000,860.3,1 +2600000,866.9,1 +2700000,867.4,1 +2800000,868.8,1 +2900000,867.4,1 +3000000,867.2,1 +3100000,871.1,1 +3200000,870.3,1 +3300000,867.5,1 +3400000,867.4,1 +3500000,867.1,1 +3600000,870.1,1 +3700000,869.8,1 +3800000,872.4,1 +3900000,868.1,1 +4000000,868.1,1 +0,24.3,3 +100000,759.1,3 +200000,800.2,3 +300000,827.3,3 +400000,826.7,3 +500000,834.4,3 +600000,839.6,3 +700000,845.1,3 +800000,850.4,3 +900000,847.9,3 +1000000,856.3,3 +1100000,860.0,3 +1200000,862.0,3 +1300000,865.6,3 +1400000,870.4,3 +1500000,868.4,3 +1600000,867.5,3 +1700000,869.0,3 +1800000,877.0,3 +1900000,878.3,3 +2000000,872.1,3 +2100000,878.7,3 +2200000,872.5,3 +2300000,881.2,3 +2400000,882.1,3 +2500000,880.8,3 +2600000,882.8,3 +2700000,885.3,3 +2800000,887.2,3 +2900000,887.0,3 +3000000,883.0,3 +3100000,883.0,3 +3200000,884.2,3 +3300000,878.4,3 +3400000,891.5,3 +3500000,885.3,3 +3600000,884.3,3 +3700000,882.9,3 +3800000,889.4,3 +3900000,888.0,3 +4000000,884.2,3 diff --git a/results/walker-stand.csv b/results/walker-stand.csv new file mode 100644 index 0000000..f8199d1 --- /dev/null +++ b/results/walker-stand.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,126.2,2 +100000,979.5,2 +200000,965.5,2 +300000,990.1,2 +400000,971.6,2 +500000,988.1,2 +600000,988.7,2 +700000,988.7,2 +800000,986.1,2 +900000,990.4,2 +1000000,991.0,2 +1100000,989.7,2 +1200000,989.3,2 +1300000,994.2,2 +1400000,990.7,2 +1500000,988.5,2 +1600000,993.4,2 +1700000,996.3,2 +1800000,992.0,2 +1900000,991.5,2 +2000000,991.9,2 +2100000,993.2,2 +2200000,994.3,2 +2300000,993.5,2 +2400000,992.2,2 +2500000,987.2,2 +2600000,994.4,2 +2700000,990.3,2 +2800000,991.1,2 +2900000,994.6,2 +3000000,993.5,2 +3100000,994.5,2 +3200000,996.6,2 +3300000,991.2,2 +3400000,991.6,2 +3500000,991.4,2 +3600000,993.1,2 +3700000,994.9,2 +3800000,992.3,2 +3900000,995.3,2 +4000000,992.2,2 +0,152.5,1 +100000,985.2,1 +200000,980.1,1 +300000,987.1,1 +400000,985.9,1 +500000,898.1,1 +600000,985.7,1 +700000,988.9,1 +800000,992.3,1 +900000,986.9,1 +1000000,987.0,1 +1100000,994.9,1 +1200000,991.3,1 +1300000,991.2,1 +1400000,994.1,1 +1500000,992.6,1 +1600000,994.3,1 +1700000,993.3,1 +1800000,994.8,1 +1900000,991.8,1 +2000000,992.3,1 +2100000,991.6,1 +2200000,993.4,1 +2300000,990.9,1 +2400000,995.0,1 +2500000,993.3,1 +2600000,992.9,1 +2700000,995.2,1 +2800000,994.4,1 +2900000,992.1,1 +3000000,995.0,1 +3100000,992.9,1 +3200000,995.5,1 +3300000,992.5,1 +3400000,988.9,1 +3500000,991.5,1 +3600000,994.6,1 +3700000,994.0,1 +3800000,993.4,1 +3900000,992.0,1 +4000000,992.0,1 +0,129.2,3 +100000,978.9,3 +200000,987.4,3 +300000,991.6,3 +400000,982.6,3 +500000,985.3,3 +600000,988.2,3 +700000,991.2,3 +800000,992.1,3 +900000,985.2,3 +1000000,993.8,3 +1100000,991.8,3 +1200000,989.8,3 +1300000,991.1,3 +1400000,995.4,3 +1500000,992.2,3 +1600000,993.5,3 +1700000,993.0,3 +1800000,994.4,3 +1900000,994.2,3 +2000000,992.1,3 +2100000,996.3,3 +2200000,992.4,3 +2300000,998.0,3 +2400000,995.0,3 +2500000,994.9,3 +2600000,997.5,3 +2700000,997.3,3 +2800000,996.5,3 +2900000,996.1,3 +3000000,994.6,3 +3100000,991.2,3 +3200000,992.5,3 +3300000,990.2,3 +3400000,998.9,3 +3500000,993.8,3 +3600000,991.5,3 +3700000,989.7,3 +3800000,994.0,3 +3900000,996.4,3 +4000000,993.9,3 diff --git a/results/walker-walk-backwards.csv b/results/walker-walk-backwards.csv new file mode 100644 index 0000000..d7c8f35 --- /dev/null +++ b/results/walker-walk-backwards.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,26.7,3 +100000,952.3,3 +200000,966.2,3 +300000,971.4,3 +400000,975.4,3 +500000,981.2,3 +600000,978.1,3 +700000,978.8,3 +800000,975.0,3 +900000,973.2,3 +1000000,979.6,3 +1100000,978.3,3 +1200000,980.1,3 +1300000,977.8,3 +1400000,979.0,3 +1500000,980.7,3 +1600000,982.5,3 +1700000,975.9,3 +1800000,983.6,3 +1900000,985.4,3 +2000000,981.6,3 +2100000,982.6,3 +2200000,977.2,3 +2300000,986.8,3 +2400000,983.0,3 +2500000,984.2,3 +2600000,983.8,3 +2700000,984.1,3 +2800000,986.3,3 +2900000,981.4,3 +3000000,985.7,3 +3100000,984.9,3 +3200000,979.3,3 +3300000,975.9,3 +3400000,988.1,3 +3500000,984.8,3 +3600000,980.3,3 +3700000,978.1,3 +3800000,984.8,3 +3900000,986.1,3 +4000000,980.4,3 +0,21.4,2 +100000,958.7,2 +200000,967.6,2 +300000,970.1,2 +400000,965.4,2 +500000,984.3,2 +600000,961.4,2 +700000,971.0,2 +800000,981.6,2 +900000,969.8,2 +1000000,971.9,2 +1100000,981.3,2 +1200000,979.0,2 +1300000,988.1,2 +1400000,985.3,2 +1500000,980.1,2 +1600000,978.1,2 +1700000,976.5,2 +1800000,980.3,2 +1900000,985.8,2 +2000000,983.6,2 +2100000,986.1,2 +2200000,986.0,2 +2300000,977.6,2 +2400000,983.1,2 +2500000,982.9,2 +2600000,981.5,2 +2700000,984.5,2 +2800000,984.2,2 +2900000,982.2,2 +3000000,980.8,2 +3100000,981.5,2 +3200000,976.5,2 +3300000,983.6,2 +3400000,982.9,2 +3500000,991.5,2 +3600000,980.7,2 +3700000,983.9,2 +3800000,982.6,2 +3900000,982.0,2 +4000000,984.9,2 +0,27.0,1 +100000,949.0,1 +200000,956.5,1 +300000,981.1,1 +400000,948.6,1 +500000,969.2,1 +600000,980.7,1 +700000,981.7,1 +800000,979.3,1 +900000,978.2,1 +1000000,980.3,1 +1100000,981.4,1 +1200000,987.6,1 +1300000,976.1,1 +1400000,988.6,1 +1500000,979.2,1 +1600000,981.9,1 +1700000,978.7,1 +1800000,980.7,1 +1900000,984.6,1 +2000000,985.7,1 +2100000,978.5,1 +2200000,986.1,1 +2300000,984.1,1 +2400000,983.2,1 +2500000,988.2,1 +2600000,983.3,1 +2700000,980.9,1 +2800000,984.0,1 +2900000,986.0,1 +3000000,985.4,1 +3100000,981.3,1 +3200000,984.3,1 +3300000,979.3,1 +3400000,984.8,1 +3500000,986.8,1 +3600000,981.4,1 +3700000,984.9,1 +3800000,977.6,1 +3900000,980.4,1 +4000000,985.2,1 diff --git a/results/walker-walk.csv b/results/walker-walk.csv new file mode 100644 index 0000000..dd89cb2 --- /dev/null +++ b/results/walker-walk.csv @@ -0,0 +1,124 @@ +step,reward,seed +0,24.8,2 +100000,947.7,2 +200000,971.2,2 +300000,977.2,2 +400000,978.4,2 +500000,973.5,2 +600000,980.6,2 +700000,976.7,2 +800000,981.0,2 +900000,976.4,2 +1000000,980.5,2 +1100000,983.9,2 +1200000,976.0,2 +1300000,983.0,2 +1400000,979.0,2 +1500000,979.5,2 +1600000,978.2,2 +1700000,987.1,2 +1800000,983.5,2 +1900000,983.3,2 +2000000,983.3,2 +2100000,982.8,2 +2200000,986.4,2 +2300000,983.6,2 +2400000,985.8,2 +2500000,984.7,2 +2600000,984.7,2 +2700000,982.8,2 +2800000,984.1,2 +2900000,984.6,2 +3000000,983.4,2 +3100000,984.6,2 +3200000,988.1,2 +3300000,975.8,2 +3400000,985.2,2 +3500000,981.4,2 +3600000,986.4,2 +3700000,985.1,2 +3800000,983.6,2 +3900000,987.3,2 +4000000,977.4,2 +0,27.9,1 +100000,966.7,1 +200000,970.5,1 +300000,975.3,1 +400000,973.3,1 +500000,979.4,1 +600000,980.4,1 +700000,978.5,1 +800000,978.6,1 +900000,979.3,1 +1000000,975.2,1 +1100000,985.8,1 +1200000,978.8,1 +1300000,980.5,1 +1400000,984.7,1 +1500000,985.5,1 +1600000,985.6,1 +1700000,982.4,1 +1800000,984.7,1 +1900000,984.0,1 +2000000,985.6,1 +2100000,981.4,1 +2200000,984.0,1 +2300000,983.1,1 +2400000,984.2,1 +2500000,984.5,1 +2600000,984.5,1 +2700000,986.2,1 +2800000,984.4,1 +2900000,984.5,1 +3000000,986.6,1 +3100000,984.2,1 +3200000,984.1,1 +3300000,983.2,1 +3400000,981.9,1 +3500000,983.2,1 +3600000,984.5,1 +3700000,985.2,1 +3800000,982.9,1 +3900000,983.8,1 +4000000,982.9,1 +0,31.4,3 +100000,971.3,3 +200000,978.6,3 +300000,977.8,3 +400000,977.4,3 +500000,983.2,3 +600000,984.5,3 +700000,982.9,3 +800000,981.1,3 +900000,972.5,3 +1000000,983.5,3 +1100000,982.6,3 +1200000,981.5,3 +1300000,982.9,3 +1400000,986.8,3 +1500000,980.8,3 +1600000,982.7,3 +1700000,981.1,3 +1800000,985.2,3 +1900000,988.5,3 +2000000,983.1,3 +2100000,988.1,3 +2200000,982.7,3 +2300000,991.2,3 +2400000,987.4,3 +2500000,987.6,3 +2600000,987.4,3 +2700000,987.2,3 +2800000,988.6,3 +2900000,987.1,3 +3000000,983.0,3 +3100000,984.2,3 +3200000,985.9,3 +3300000,976.1,3 +3400000,991.7,3 +3500000,983.8,3 +3600000,977.9,3 +3700000,980.2,3 +3800000,986.0,3 +3900000,988.7,3 +4000000,983.1,3 diff --git a/tdmpc2/__init__.py b/tdmpc2/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/tdmpc2/common/__init__.py b/tdmpc2/common/__init__.py new file mode 100644 index 0000000..7fa5309 --- /dev/null +++ b/tdmpc2/common/__init__.py @@ -0,0 +1,60 @@ +MODEL_SIZE = { # parameters (M) + 1: {'enc_dim': 256, + 'mlp_dim': 384, + 'latent_dim': 128, + 'num_enc_layers': 2, + 'num_q': 2}, + 5: {'enc_dim': 256, + 'mlp_dim': 512, + 'latent_dim': 512, + 'num_enc_layers': 2}, + 19: {'enc_dim': 1024, + 'mlp_dim': 1024, + 'latent_dim': 768, + 'num_enc_layers': 3}, + 48: {'enc_dim': 1792, + 'mlp_dim': 1792, + 'latent_dim': 768, + 'num_enc_layers': 4}, + 317: {'enc_dim': 4096, + 'mlp_dim': 4096, + 'latent_dim': 1376, + 'num_enc_layers': 5, + 'num_q': 8}, +} + +TASK_SET = { + 'mt30': [ + # 19 original dmcontrol tasks + 'walker-stand', 'walker-walk', 'walker-run', 'cheetah-run', 'reacher-easy', + 'reacher-hard', 'acrobot-swingup', 'pendulum-swingup', 'cartpole-balance', 'cartpole-balance-sparse', + 'cartpole-swingup', 'cartpole-swingup-sparse', 'cup-catch', 'finger-spin', 'finger-turn-easy', + 'finger-turn-hard', 'fish-swim', 'hopper-stand', 'hopper-hop', + # 11 custom dmcontrol tasks + 'walker-walk-backwards', 'walker-run-backwards', 'cheetah-run-backwards', 'cheetah-run-front', 'cheetah-run-back', + 'cheetah-jump', 'hopper-hop-backwards', 'reacher-three-easy', 'reacher-three-hard', 'cup-spin', + 'pendulum-spin', + ], + 'mt80': [ + # 19 original dmcontrol tasks + 'walker-stand', 'walker-walk', 'walker-run', 'cheetah-run', 'reacher-easy', + 'reacher-hard', 'acrobot-swingup', 'pendulum-swingup', 'cartpole-balance', 'cartpole-balance-sparse', + 'cartpole-swingup', 'cartpole-swingup-sparse', 'cup-catch', 'finger-spin', 'finger-turn-easy', + 'finger-turn-hard', 'fish-swim', 'hopper-stand', 'hopper-hop', + # 11 custom dmcontrol tasks + 'walker-walk-backwards', 'walker-run-backwards', 'cheetah-run-backwards', 'cheetah-run-front', 'cheetah-run-back', + 'cheetah-jump', 'hopper-hop-backwards', 'reacher-three-easy', 'reacher-three-hard', 'cup-spin', + 'pendulum-spin', + # meta-world mt50 + 'mw-assembly', 'mw-basketball', 'mw-button-press-topdown', 'mw-button-press-topdown-wall', 'mw-button-press', + 'mw-button-press-wall', 'mw-coffee-button', 'mw-coffee-pull', 'mw-coffee-push', 'mw-dial-turn', + 'mw-disassemble', 'mw-door-open', 'mw-door-close', 'mw-drawer-close', 'mw-drawer-open', + 'mw-faucet-open', 'mw-faucet-close', 'mw-hammer', 'mw-handle-press-side', 'mw-handle-press', + 'mw-handle-pull-side', 'mw-handle-pull', 'mw-lever-pull', 'mw-peg-insert-side', 'mw-peg-unplug-side', + 'mw-pick-out-of-hole', 'mw-pick-place', 'mw-pick-place-wall', 'mw-plate-slide', 'mw-plate-slide-side', + 'mw-plate-slide-back', 'mw-plate-slide-back-side', 'mw-push-back', 'mw-push', 'mw-push-wall', + 'mw-reach', 'mw-reach-wall', 'mw-shelf-place', 'mw-soccer', 'mw-stick-push', + 'mw-stick-pull', 'mw-sweep-into', 'mw-sweep', 'mw-window-open', 'mw-window-close', + 'mw-bin-picking', 'mw-box-close', 'mw-door-lock', 'mw-door-unlock', 'mw-hand-insert', + ], +} diff --git a/tdmpc2/common/buffer.py b/tdmpc2/common/buffer.py new file mode 100644 index 0000000..dbbfea6 --- /dev/null +++ b/tdmpc2/common/buffer.py @@ -0,0 +1,115 @@ +from pathlib import Path +import torch +from tensordict.tensordict import TensorDict +from torchrl.data.replay_buffers import ReplayBuffer, LazyTensorStorage +from torchrl.data.replay_buffers.samplers import RandomSampler +from torchrl.envs import RandomCropTensorDict, Transform, Compose + +from common.logger import make_dir + + +class DataPrepTransform(Transform): + """ + Preprocesses data for TD-MPC2 training. + Replay data is expected to be a TensorDict with the following keys: + obs: observations + action: actions + reward: rewards + task: task IDs (optional) + A TensorDict with T time steps has T+1 observations and T actions and rewards. + The first actions and rewards in each TensorDict are dummies and should be ignored. + """ + + def __init__(self): + super().__init__([]) + + def forward(self, td): + td = td.permute(1,0) + return td['obs'], td['action'][1:], td['reward'][1:].unsqueeze(-1), (td['task'][0] if 'task' in td.keys() else None) + + +class Buffer(): + """ + Create a replay buffer for TD-MPC2 training. + Uses CUDA memory if available, and CPU memory otherwise. + """ + + def __init__(self, cfg): + self.cfg = cfg + self._device = torch.device('cuda') + self._capacity = min(cfg.buffer_size, cfg.steps)//cfg.episode_length + self._num_eps = 0 + + @property + def capacity(self): + """Return the capacity of the buffer.""" + return self._capacity + + @property + def num_eps(self): + """Return the number of episodes in the buffer.""" + return self._num_eps + + def _reserve_buffer(self, storage): + """ + Reserve a buffer with the given storage. + Uses the RandomSampler to sample trajectories, + and the RandomCropTensorDict transform to crop trajectories to the desired length. + DataPrepTransform is used to preprocess data to the expected format in TD-MPC2 updates. + """ + return ReplayBuffer( + storage=storage, + sampler=RandomSampler(), + pin_memory=True, + prefetch=1, + transform=Compose( + RandomCropTensorDict(self.cfg.horizon+1, -1), + DataPrepTransform(), + ), + batch_size=self.cfg.batch_size, + ) + + def _init(self, tds): + """Initialize the replay buffer. Use the first episode to estimate storage requirements.""" + mem_free, _ = torch.cuda.mem_get_info() + bytes_per_ep = sum([ + (v.numel()*v.element_size() if not isinstance(v, TensorDict) \ + else sum([x.numel()*x.element_size() for x in v.values()])) \ + for k,v in tds.items() + ]) + print(f'Bytes per episode: {bytes_per_ep:,}') + total_bytes = bytes_per_ep*self._capacity + print(f'Storage required: {total_bytes/1e9:.2f} GB') + # Heuristic: decide whether to use CUDA or CPU memory + if 2.5*total_bytes > mem_free: # Insufficient CUDA memory + print('Using CPU memory for storage.') + return self._reserve_buffer( + LazyTensorStorage(self._capacity, device=torch.device('cpu')) + ) + else: # Sufficient CUDA memory + print('Using CUDA memory for storage.') + return self._reserve_buffer( + LazyTensorStorage(self._capacity, device=torch.device('cuda')) + ) + + def add(self, tds): + """Add an episode to the buffer. All episodes are expected to have the same length.""" + if self._num_eps == 0: + self._buffer = self._init(tds) + self._buffer.add(tds) + self._num_eps += 1 + return self._num_eps + + def sample(self): + """Sample a batch of sub-trajectories from the buffer.""" + obs, action, reward, task = self._buffer.sample(batch_size=self.cfg.batch_size) + return obs.to(self._device, non_blocking=True), \ + action.to(self._device, non_blocking=True), \ + reward.to(self._device, non_blocking=True), \ + task.to(self._device, non_blocking=True) if task is not None else None + + def save(self): + """Save the buffer to disk. Useful for storing offline datasets.""" + td = self._buffer._storage._storage.cpu() + fp = make_dir(Path(self.cfg.buffer_dir) / self.cfg.task / str(self.cfg.seed)) / f'{self._num_eps}.pt' + torch.save(td, fp) diff --git a/tdmpc2/common/init.py b/tdmpc2/common/init.py new file mode 100644 index 0000000..45a3f5e --- /dev/null +++ b/tdmpc2/common/init.py @@ -0,0 +1,22 @@ +import torch.nn as nn + + +def weight_init(m): + """Custom weight initialization for TD-MPC2.""" + if isinstance(m, nn.Linear): + nn.init.trunc_normal_(m.weight, std=0.02) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Embedding): + nn.init.uniform_(m.weight, -0.02, 0.02) + elif isinstance(m, nn.ParameterList): + for i,p in enumerate(m): + if p.dim() == 3: # Linear + nn.init.trunc_normal_(p, std=0.02) # Weight + nn.init.constant_(m[i+1], 0) # Bias + + +def zero_(params): + """Initialize parameters to zero.""" + for p in params: + p.data.fill_(0) diff --git a/tdmpc2/common/layers.py b/tdmpc2/common/layers.py new file mode 100644 index 0000000..baebf73 --- /dev/null +++ b/tdmpc2/common/layers.py @@ -0,0 +1,97 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from functorch import combine_state_for_ensemble + + +class Ensemble(nn.Module): + """ + Vectorized ensemble of modules. + """ + + def __init__(self, modules, **kwargs): + super().__init__() + modules = nn.ModuleList(modules) + fn, params, _ = combine_state_for_ensemble(modules) + self.vmap = torch.vmap(fn, in_dims=(0, 0, None), randomness='different', **kwargs) + self.params = nn.ParameterList([nn.Parameter(p) for p in params]) + self._repr = str(modules) + + def modules(self): + return self.vmap.__wrapped__.stateless_model + + def forward(self, *args, **kwargs): + return self.vmap([p for p in self.params], (), *args, **kwargs) + + def __repr__(self): + return 'Vectorized ' + self._repr + + +class SimNorm(nn.Module): + """ + Simplicial normalization. + Adapted from https://arxiv.org/abs/2204.00616. + """ + + def __init__(self, cfg): + super().__init__() + self.dim = cfg.simnorm_dim + + def forward(self, x): + shp = x.shape + x = x.view(*shp[:-1], -1, self.dim) + x = F.softmax(x, dim=-1) + return x.view(*shp) + + def __repr__(self): + return f"SimNorm(dim={self.dim})" + + +class NormedLinear(nn.Linear): + """ + Linear layer with LayerNorm, activation, and optionally dropout. + """ + + def __init__(self, *args, dropout=0., act=nn.Mish(inplace=True), **kwargs): + super().__init__(*args, **kwargs) + self.ln = nn.LayerNorm(self.out_features) + self.act = act + self.dropout = nn.Dropout(dropout, inplace=True) if dropout else None + + def forward(self, x): + x = super().forward(x) + if self.dropout: + x = self.dropout(x) + return self.act(self.ln(x)) + + def __repr__(self): + repr_dropout = f", dropout={self.dropout.p}" if self.dropout else "" + return f"NormedLinear(in_features={self.in_features}, "\ + f"out_features={self.out_features}, "\ + f"bias={self.bias is not None}{repr_dropout}, "\ + f"act={self.act.__class__.__name__})" + + +def enc(cfg, out={}): + """ + Returns a dictionary of encoders for each observation in the dict. + """ + for k in cfg.obs_shape.keys(): + assert k == 'state' + out[k] = mlp(cfg.obs_shape[k][0] + cfg.task_dim, max(cfg.num_enc_layers-1, 1)*[cfg.enc_dim], cfg.latent_dim, act=SimNorm(cfg)) + return nn.ModuleDict(out) + + +def mlp(in_dim, mlp_dims, out_dim, act=None, dropout=0.): + """ + Basic building block of TD-MPC2. + MLP with LayerNorm, Mish activations, and optionally dropout. + """ + if isinstance(mlp_dims, int): + mlp_dims = [mlp_dims] + dims = [in_dim] + mlp_dims + [out_dim] + mlp = nn.ModuleList() + for i in range(len(dims) - 2): + mlp.append(NormedLinear(dims[i], dims[i+1], dropout=dropout*(i==0))) + mlp.append(NormedLinear(dims[-2], dims[-1], act=act) if act else nn.Linear(dims[-2], dims[-1])) + return nn.Sequential(*mlp) diff --git a/tdmpc2/common/logger.py b/tdmpc2/common/logger.py new file mode 100755 index 0000000..39c93fe --- /dev/null +++ b/tdmpc2/common/logger.py @@ -0,0 +1,238 @@ +import os +import datetime +import re +import numpy as np +import pandas as pd +from termcolor import colored +from omegaconf import OmegaConf + +from common import TASK_SET + + +CONSOLE_FORMAT = [ + ("iteration", "I", "int"), + ("episode", "E", "int"), + ("step", "I", "int"), + ("episode_reward", "R", "float"), + ("episode_success", "S", "float"), + ("total_time", "T", "time"), +] + +CAT_TO_COLOR = { + "pretrain": "yellow", + "train": "blue", + "eval": "green", +} + + +def make_dir(dir_path): + """Create directory if it does not already exist.""" + try: + os.makedirs(dir_path) + except OSError: + pass + return dir_path + + +def print_run(cfg): + """ + Pretty-printing of current run information. + Logger calls this method at initialization. + """ + prefix, color, attrs = " ", "green", ["bold"] + + def _limstr(s, maxlen=36): + return str(s[:maxlen]) + "..." if len(str(s)) > maxlen else s + + def _pprint(k, v): + print( + prefix + colored(f'{k.capitalize()+":":<15}', color, attrs=attrs), _limstr(v) + ) + + obs_dim = cfg.obs_shape['state'][0] if 'state' in cfg.obs_shape else cfg.obs_shape[0] + kvs = [ + ("task", cfg.task_title), + ("steps", f"{int(cfg.steps):,}"), + ("observations", obs_dim), + ("actions", cfg.action_dim), + ("experiment", cfg.exp_name), + ] + w = np.max([len(_limstr(str(kv[1]))) for kv in kvs]) + 25 + div = "-" * w + print(div) + for k, v in kvs: + _pprint(k, v) + print(div) + + +def cfg_to_group(cfg, return_list=False): + """ + Return a wandb-safe group name for logging. + Optionally returns group name as list. + """ + lst = [cfg.task, re.sub("[^0-9a-zA-Z]+", "-", cfg.exp_name)] + return lst if return_list else "-".join(lst) + + +class VideoRecorder: + """Utility class for logging evaluation videos.""" + + def __init__(self, cfg, wandb, fps=15): + self.cfg = cfg + self._save_dir = make_dir(cfg.work_dir / 'eval_video') + self._wandb = wandb + self.fps = fps + self.frames = [] + self.enabled = False + + def init(self, env, enabled=True): + self.frames = [] + self.enabled = self._save_dir and self._wandb and enabled + self.record(env) + + def record(self, env): + if self.enabled: + self.frames.append(env.render()) + + def save(self, step, key='videos/eval_video'): + if self.enabled and len(self.frames) > 0: + frames = np.stack(self.frames) + return self._wandb.log( + {key: self._wandb.Video(frames.transpose(0, 3, 1, 2), fps=self.fps, format='mp4')}, step=step + ) + + +class Logger: + """Primary logging object. Logs either locally or using wandb.""" + + def __init__(self, cfg): + self._log_dir = make_dir(cfg.work_dir) + self._model_dir = make_dir(self._log_dir / "models") + self._save_csv = cfg.save_csv + self._save_agent = cfg.save_agent + self._group = cfg_to_group(cfg) + self._seed = cfg.seed + self._eval = [] + print_run(cfg) + self.project = cfg.get("wandb_project", "none") + self.entity = cfg.get("wandb_entity", "none") + if cfg.disable_wandb or self.project == "none" or self.entity == "none": + print(colored("Wandb disabled.", "blue", attrs=["bold"])) + cfg.save_agent = False + cfg.save_video = False + self._wandb = None + self._video = None + return + os.environ["WANDB_SILENT"] = "true" if cfg.wandb_silent else "false" + import wandb + + wandb.init( + project=self.project, + entity=self.entity, + name=str(cfg.seed), + group=self._group, + tags=cfg_to_group(cfg, return_list=True) + [f"seed:{cfg.seed}"], + dir=self._log_dir, + config=OmegaConf.to_container(cfg, resolve=True), + ) + print(colored("Logs will be synced with wandb.", "blue", attrs=["bold"])) + self._wandb = wandb + self._video = ( + VideoRecorder(cfg, self._wandb) + if self._wandb and cfg.save_video + else None + ) + + @property + def video(self): + return self._video + + @property + def model_dir(self): + return self._model_dir + + def save_agent(self, agent=None, identifier='final'): + if self._save_agent and agent: + fp = self._model_dir / f'{str(identifier)}.pt' + agent.save(fp) + if self._wandb: + artifact = self._wandb.Artifact( + self._group + '-' + str(self._seed) + '-' + str(identifier), + type='model', + ) + artifact.add_file(fp) + self._wandb.log_artifact(artifact) + + def finish(self, agent=None): + try: + self.save_agent(agent) + except Exception as e: + print(colored(f"Failed to save model: {e}", "red")) + if self._wandb: + self._wandb.finish() + + def _format(self, key, value, ty): + if ty == "int": + return f'{colored(key+":", "blue")} {int(value):,}' + elif ty == "float": + return f'{colored(key+":", "blue")} {value:.01f}' + elif ty == "time": + value = str(datetime.timedelta(seconds=int(value))) + return f'{colored(key+":", "blue")} {value}' + else: + raise f"invalid log format type: {ty}" + + def _print(self, d, category): + category = colored(category, CAT_TO_COLOR[category]) + pieces = [f" {category:<14}"] + for k, disp_k, ty in CONSOLE_FORMAT: + if k in d: + pieces.append(f"{self._format(disp_k, d[k], ty):<22}") + print(" ".join(pieces)) + + def pprint_multitask(self, d, cfg): + """Pretty-print evaluation metrics for multi-task training.""" + print(colored(f'Evaluated agent on {len(cfg.tasks)} tasks:', 'yellow', attrs=['bold'])) + dmcontrol_reward = [] + metaworld_reward = [] + metaworld_success = [] + for k, v in d.items(): + if '+' not in k: + continue + task = k.split('+')[1] + if task in TASK_SET['mt30'] and k.startswith('episode_reward'): # DMControl + dmcontrol_reward.append(v) + print(colored(f' {task:<22}\tR: {v:.01f}', 'yellow')) + elif task in TASK_SET['mt80'] and task not in TASK_SET['mt30']: # Meta-World + if k.startswith('episode_reward'): + metaworld_reward.append(v) + elif k.startswith('episode_success'): + metaworld_success.append(v) + print(colored(f' {task:<22}\tS: {v:.02f}', 'yellow')) + dmcontrol_reward = np.nanmean(dmcontrol_reward) + d['episode_reward+avg_dmcontrol'] = dmcontrol_reward + print(colored(f' {"dmcontrol":<22}\tR: {dmcontrol_reward:.01f}', 'yellow', attrs=['bold'])) + if cfg.task == 'mt80': + metaworld_reward = np.nanmean(metaworld_reward) + metaworld_success = np.nanmean(metaworld_success) + d['episode_reward+avg_metaworld'] = metaworld_reward + d['episode_success+avg_metaworld'] = metaworld_success + print(colored(f' {"metaworld":<22}\tR: {metaworld_reward:.01f}', 'yellow', attrs=['bold'])) + print(colored(f' {"metaworld":<22}\tS: {metaworld_success:.02f}', 'yellow', attrs=['bold'])) + + def log(self, d, category="train"): + assert category in CAT_TO_COLOR.keys(), f"invalid category: {category}" + if self._wandb: + if category in {"train", "eval"}: + xkey = "step" + elif category == "pretrain": + xkey = "iteration" + for k, v in d.items(): + self._wandb.log({category + "/" + k: v}, step=d[xkey]) + if category == "eval" and self._save_csv: + keys = ["step", "episode_reward"] + self._eval.append(np.array([d[keys[0]], d[keys[1]]])) + pd.DataFrame(np.array(self._eval)).to_csv( + self._log_dir / "eval.csv", header=keys, index=None + ) + self._print(d, category) diff --git a/tdmpc2/common/math.py b/tdmpc2/common/math.py new file mode 100644 index 0000000..62b8230 --- /dev/null +++ b/tdmpc2/common/math.py @@ -0,0 +1,95 @@ +import torch +import torch.nn.functional as F + + +def soft_ce(pred, target, cfg): + """Computes the cross entropy loss between predictions and soft targets.""" + pred = F.log_softmax(pred, dim=-1) + target = two_hot(target, cfg) + return -(target * pred).sum(-1, keepdim=True) + + +@torch.jit.script +def log_std(x, low, dif): + return low + 0.5 * dif * (torch.tanh(x) + 1) + + +@torch.jit.script +def _gaussian_residual(eps, log_std): + return -0.5 * eps.pow(2) - log_std + + +@torch.jit.script +def _gaussian_logprob(residual): + return residual - 0.5 * torch.log(2 * torch.pi) + + +def gaussian_logprob(eps, log_std, size=None): + """Compute Gaussian log probability.""" + residual = _gaussian_residual(eps, log_std).sum(-1, keepdim=True) + if size is None: + size = eps.size(-1) + return _gaussian_logprob(residual) * size + + +@torch.jit.script +def _squash(pi): + return torch.log(F.relu(1 - pi.pow(2)) + 1e-6) + + +def squash(mu, pi, log_pi): + """Apply squashing function.""" + mu = torch.tanh(mu) + pi = torch.tanh(pi) + log_pi -= _squash(pi).sum(-1, keepdim=True) + return mu, pi, log_pi + + +@torch.jit.script +def symlog(x): + """ + Symmetric logarithmic function. + Adapted from https://github.com/danijar/dreamerv3. + """ + return torch.sign(x) * torch.log(1 + torch.abs(x)) + + +@torch.jit.script +def symexp(x): + """ + Symmetric exponential function. + Adapted from https://github.com/danijar/dreamerv3. + """ + return torch.sign(x) * (torch.exp(torch.abs(x)) - 1) + + +def two_hot(x, cfg): + """Converts a batch of scalars to soft two-hot encoded targets for discrete regression.""" + if cfg.num_bins == 0: + return x + elif cfg.num_bins == 1: + return symlog(x) + x = torch.clamp(symlog(x), cfg.vmin, cfg.vmax).squeeze(1) + bin_idx = torch.floor((x - cfg.vmin) / cfg.bin_size).long() + bin_offset = ((x - cfg.vmin) / cfg.bin_size - bin_idx.float()).unsqueeze(-1) + soft_two_hot = torch.zeros(x.size(0), cfg.num_bins, device=x.device) + soft_two_hot.scatter_(1, bin_idx.unsqueeze(1), 1 - bin_offset) + soft_two_hot.scatter_(1, (bin_idx.unsqueeze(1) + 1) % cfg.num_bins, bin_offset) + return soft_two_hot + + +DREG_BINS = None + + +def two_hot_inv(x, cfg): + """Converts a batch of soft two-hot encoded vectors to scalars.""" + global DREG_BINS + if cfg.num_bins == 0: + return x + elif cfg.num_bins == 1: + return symexp(x) + if DREG_BINS is None: + DREG_BINS = torch.linspace(cfg.vmin, cfg.vmax, cfg.num_bins, device=x.device) + x = F.softmax(x, dim=-1) + x = torch.sum(x * DREG_BINS, dim=-1, keepdim=True) + return symexp(x) diff --git a/tdmpc2/common/parser.py b/tdmpc2/common/parser.py new file mode 100755 index 0000000..f36731e --- /dev/null +++ b/tdmpc2/common/parser.py @@ -0,0 +1,60 @@ +import re +from pathlib import Path + +import hydra +from omegaconf import OmegaConf + +from common import MODEL_SIZE, TASK_SET + + +def parse_cfg(cfg: OmegaConf) -> OmegaConf: + """ + Parses a Hydra config. Mostly for convenience. + """ + + # Logic + for k in cfg.keys(): + try: + v = cfg[k] + if v == None: + v = True + except: + pass + + # Algebraic expressions + for k in cfg.keys(): + try: + v = cfg[k] + if isinstance(v, str): + match = re.match(r"(\d+)([+\-*/])(\d+)", v) + if match: + cfg[k] = eval(match.group(1) + match.group(2) + match.group(3)) + if isinstance(cfg[k], float) and cfg[k].is_integer(): + cfg[k] = int(cfg[k]) + except: + pass + + # Convenience + cfg.work_dir = Path(hydra.utils.get_original_cwd()) / 'logs' / cfg.task / str(cfg.seed) / cfg.exp_name + cfg.task_title = cfg.task.replace("-", " ").title() + cfg.bin_size = (cfg.vmax - cfg.vmin) / (cfg.num_bins-1) # Bin size for discrete regression + + # Model size + assert cfg.model_size in MODEL_SIZE.keys(), \ + f'Invalid model size {cfg.model_size}. Must be one of {list(MODEL_SIZE.keys())}' + for k, v in MODEL_SIZE[cfg.model_size].items(): + cfg[k] = v + if cfg.task == 'mt30' and cfg.model_size == 19: + cfg.latent_dim = 512 # This checkpoint is slightly smaller + + # Multi-task + cfg.multitask = cfg.task in TASK_SET.keys() + if cfg.multitask: + cfg.task_title = cfg.task.upper() + # Account for slight inconsistency in task_dim for the mt30 experiments + cfg.task_dim = 96 if cfg.task == 'mt80' or cfg.model_size in {1, 317} else 64 + else: + cfg.task_dim = 0 + cfg.tasks = TASK_SET.get(cfg.task, [cfg.task]) + + return cfg diff --git a/tdmpc2/common/scale.py b/tdmpc2/common/scale.py new file mode 100644 index 0000000..63f0bb2 --- /dev/null +++ b/tdmpc2/common/scale.py @@ -0,0 +1,48 @@ +import torch + + +class RunningScale: + """Running trimmed scale estimator.""" + + def __init__(self, cfg): + self.cfg = cfg + self._value = torch.ones(1, dtype=torch.float32, device=torch.device('cuda')) + self._percentiles = torch.tensor([5, 95], dtype=torch.float32, device=torch.device('cuda')) + + def state_dict(self): + return dict(value=self._value, percentiles=self._percentiles) + + def load_state_dict(self, state_dict): + self._value.data.copy_(state_dict['value']) + self._percentiles.data.copy_(state_dict['percentiles']) + + @property + def value(self): + return self._value.cpu().item() + + def _percentile(self, x): + x_dtype, x_shape = x.dtype, x.shape + x = x.view(x.shape[0], -1) + in_sorted, _ = torch.sort(x, dim=0) + positions = self._percentiles * (x.shape[0]-1) / 100 + floored = torch.floor(positions) + ceiled = floored + 1 + ceiled[ceiled > x.shape[0] - 1] = x.shape[0] - 1 + weight_ceiled = positions-floored + weight_floored = 1.0 - weight_ceiled + d0 = in_sorted[floored.long(), :] * weight_floored[:, None] + d1 = in_sorted[ceiled.long(), :] * weight_ceiled[:, None] + return (d0+d1).view(-1, *x_shape[1:]).type(x_dtype) + + def update(self, x): + percentiles = self._percentile(x.detach()) + value = torch.clamp(percentiles[1] - percentiles[0], min=1.) + self._value.data.lerp_(value, self.cfg.tau) + + def __call__(self, x, update=False): + if update: + self.update(x) + return x * (1/self.value) + + def __repr__(self): + return f'RunningScale(S: {self.value})' diff --git a/tdmpc2/common/seed.py b/tdmpc2/common/seed.py new file mode 100644 index 0000000..5c8972e --- /dev/null +++ b/tdmpc2/common/seed.py @@ -0,0 +1,12 @@ +import random + +import numpy as np +import torch + + +def set_seed(seed): + """Set seed for reproducibility.""" + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) diff --git a/tdmpc2/common/world_model.py b/tdmpc2/common/world_model.py new file mode 100644 index 0000000..30fb1d4 --- /dev/null +++ b/tdmpc2/common/world_model.py @@ -0,0 +1,174 @@ +from copy import deepcopy + +import numpy as np +import torch +import torch.nn as nn + +from common import layers, math, init + + +class WorldModel(nn.Module): + """ + TD-MPC2 implicit world model architecture. + Can be used for both single-task and multi-task experiments. + """ + + def __init__(self, cfg): + super().__init__() + self.cfg = cfg + if cfg.multitask: + self._task_emb = nn.Embedding(len(cfg.tasks), cfg.task_dim, max_norm=1) + self._action_masks = torch.zeros(len(cfg.tasks), cfg.action_dim) + for i in range(len(cfg.tasks)): + self._action_masks[i, :cfg.action_dims[i]] = 1. + self._encoder = layers.enc(cfg) + self._dynamics = layers.mlp(cfg.latent_dim + cfg.action_dim + cfg.task_dim, 2*[cfg.mlp_dim], cfg.latent_dim, act=layers.SimNorm(cfg)) + self._reward = layers.mlp(cfg.latent_dim + cfg.action_dim + cfg.task_dim, 2*[cfg.mlp_dim], max(cfg.num_bins, 1)) + self._pi = layers.mlp(cfg.latent_dim + cfg.task_dim, 2*[cfg.mlp_dim], 2*cfg.action_dim) + self._Qs = layers.Ensemble([layers.mlp(cfg.latent_dim + cfg.action_dim + cfg.task_dim, 2*[cfg.mlp_dim], max(cfg.num_bins, 1), dropout=cfg.dropout) for _ in range(cfg.num_q)]) + self.apply(init.weight_init) + init.zero_([self._reward[-1].weight, self._Qs.params[-2]]) + self._target_Qs = deepcopy(self._Qs).requires_grad_(False) + self.log_std_min = torch.tensor(cfg.log_std_min) + self.log_std_dif = torch.tensor(cfg.log_std_max) - self.log_std_min + + @property + def total_params(self): + return sum(p.numel() for p in self.parameters() if p.requires_grad) + + def to(self, *args, **kwargs): + """ + Overriding `to` method to also move additional tensors to device. + """ + super().to(*args, **kwargs) + if self.cfg.multitask: + self._action_masks = self._action_masks.to(*args, **kwargs) + self.log_std_min = self.log_std_min.to(*args, **kwargs) + self.log_std_dif = self.log_std_dif.to(*args, **kwargs) + return self + + def train(self, mode=True): + """ + Overriding `train` method to keep target Q-networks in eval mode. + """ + super().train(mode) + self._target_Qs.train(False) + return self + + def track_q_grad(self, mode=True): + """ + Enables/disables gradient tracking of Q-networks. + Avoids unnecessary computation during policy optimization. + This method also enables/disables gradients for task embeddings, + and sets the dropout probability to 0 if `mode` is False. + """ + for p in self._Qs.parameters(): + p.requires_grad_(mode) + if self.cfg.multitask: + for p in self._task_emb.parameters(): + p.requires_grad_(mode) + for m in self._Qs.modules(): + if isinstance(m, nn.Dropout): + m.p = self.cfg.dropout if mode else 0 + + def soft_update_target_Q(self): + """ + Soft-update target Q-networks using Polyak averaging. + """ + with torch.no_grad(): + for p, p_target in zip(self._Qs.parameters(), self._target_Qs.parameters()): + p_target.data.lerp_(p.data, self.cfg.tau) + + def task_emb(self, x, task): + """ + Continuous task embedding for multi-task experiments. + Retrieves the task embedding for a given task ID `task` + and concatenates it to the input `x`. + """ + if isinstance(task, int): + task = torch.tensor([task], device=x.device) + emb = self._task_emb(task.long()) + if x.ndim == 3: + emb = emb.unsqueeze(0).repeat(x.shape[0], 1, 1) + elif emb.shape[0] == 1: + emb = emb.repeat(x.shape[0], 1) + return torch.cat([x, emb], dim=-1) + + def encode(self, obs, task): + """ + Encodes an observation into its latent representation. + This implementation assumes a single state-based observation. + """ + if self.cfg.multitask: + obs = self.task_emb(obs, task) + return self._encoder['state'](obs) + + def next(self, z, a, task): + """ + Predicts the next latent state given the current latent state and action. + """ + if self.cfg.multitask: + z = self.task_emb(z, task) + z = torch.cat([z, a], dim=-1) + return self._dynamics(z) + + def reward(self, z, a, task): + """ + Predicts instantaneous (single-step) reward. + """ + if self.cfg.multitask: + z = self.task_emb(z, task) + z = torch.cat([z, a], dim=-1) + return self._reward(z) + + def pi(self, z, task): + """ + Samples an action from the policy prior. + The policy prior is a Gaussian distribution with + mean and (log) std predicted by a neural network. + """ + if self.cfg.multitask: + z = self.task_emb(z, task) + + # Gaussian policy prior + mu, log_std = self._pi(z).chunk(2, dim=-1) + log_std = math.log_std(log_std, self.log_std_min, self.log_std_dif) + eps = torch.randn_like(mu) + + if self.cfg.multitask: # Mask out unused action dimensions + mu = mu * self._action_masks[task] + log_std = log_std * self._action_masks[task] + eps = eps * self._action_masks[task] + action_dims = self._action_masks.sum(-1)[task].unsqueeze(-1) + else: # No masking + action_dims = None + + log_pi = math.gaussian_logprob(eps, log_std, size=action_dims) + pi = mu + eps * log_std.exp() + mu, pi, log_pi = math.squash(mu, pi, log_pi) + + return mu, pi, log_pi, log_std + + def Q(self, z, a, task, return_type='min', target=False): + """ + Predict state-action value. + `return_type` can be one of [`min`, `avg`, `all`]: + - `min`: return the minimum of two randomly subsampled Q-values. + - `avg`: return the average of two randomly subsampled Q-values. + - `all`: return all Q-values. + `target` specifies whether to use the target Q-networks or not. + """ + assert return_type in {'min', 'avg', 'all'} + + if self.cfg.multitask: + z = self.task_emb(z, task) + + z = torch.cat([z, a], dim=-1) + out = (self._target_Qs if target else self._Qs)(z) + + if return_type == 'all': + return out + + Q1, Q2 = out[np.random.choice(self.cfg.num_q, 2, replace=False)] + Q1, Q2 = math.two_hot_inv(Q1, self.cfg), math.two_hot_inv(Q2, self.cfg) + return torch.min(Q1, Q2) if return_type == 'min' else (Q1 + Q2) / 2 diff --git a/tdmpc2/config.yaml b/tdmpc2/config.yaml new file mode 100755 index 0000000..3b945ee --- /dev/null +++ b/tdmpc2/config.yaml @@ -0,0 +1,86 @@ +defaults: + - override hydra/launcher: submitit_local + +# environment +task: dog-run + +# evaluation +checkpoint: ??? +eval_episodes: 10 +eval_freq: 50000 + +# training +steps: 10_000_000 +batch_size: 256 +reward_coef: 0.1 +value_coef: 0.1 +consistency_coef: 20 +rho: 0.5 +lr: 3e-4 +enc_lr_scale: 0.3 +grad_clip_norm: 20 +tau: 0.01 +discount_denom: 5 +discount_min: 0.95 +discount_max: 0.995 +buffer_size: 1_000_000 +exp_name: default +data_dir: ??? + +# planning +mpc: true +iterations: 6 +num_samples: 512 +num_elites: 64 +num_pi_trajs: 24 +horizon: 3 +min_std: 0.05 +max_std: 2 +temperature: 0.5 + +# actor +log_std_min: -10 +log_std_max: 2 +entropy_coef: 1e-4 + +# critic +num_bins: 101 +vmin: -10 +vmax: +10 + +# architecture +model_size: 5 +num_enc_layers: 2 +enc_dim: 256 +mlp_dim: 512 +latent_dim: 512 +task_dim: 96 +num_q: 5 +dropout: 0.01 +simnorm_dim: 8 + +# logging +wandb_project: ??? +wandb_entity: ??? +wandb_silent: false +disable_wandb: true +save_csv: true + +# misc +save_video: true +save_agent: true +seed: 1 + +# convenience +work_dir: ??? +task_title: ??? +multitask: ??? +tasks: ??? +obs_shape: ??? +action_dim: ??? +episode_length: ??? +obs_shapes: ??? +action_dims: ??? +episode_lengths: ??? +seed_steps: ??? +bin_size: ??? diff --git a/tdmpc2/envs/__init__.py b/tdmpc2/envs/__init__.py new file mode 100644 index 0000000..ef2a630 --- /dev/null +++ b/tdmpc2/envs/__init__.py @@ -0,0 +1,62 @@ +from copy import deepcopy +import warnings + +import gym + +from envs.wrappers.multitask import MultitaskWrapper +from envs.wrappers.tensor import TensorWrapper +from envs.dmcontrol import make_env as make_dm_control_env +from envs.maniskill import make_env as make_maniskill_env +from envs.metaworld import make_env as make_metaworld_env +from envs.myosuite import make_env as make_myosuite_env +from envs.exceptions import UnknownTaskError + +warnings.filterwarnings('ignore', category=DeprecationWarning) + + +def make_multitask_env(cfg): + """ + Make a multi-task environment for TD-MPC2 experiments. + """ + print('Creating multi-task environment with tasks:', cfg.tasks) + envs = [] + for task in cfg.tasks: + _cfg = deepcopy(cfg) + _cfg.task = task + _cfg.multitask = False + env = make_env(_cfg) + if env is None: + raise UnknownTaskError(task) + envs.append(env) + env = MultitaskWrapper(cfg, envs) + cfg.obs_shapes = env._obs_dims + cfg.action_dims = env._action_dims + cfg.episode_lengths = env._episode_lengths + return env + + +def make_env(cfg): + """ + Make an environment for TD-MPC2 experiments. + """ + gym.logger.set_level(40) + if cfg.multitask: + env = make_multitask_env(cfg) + else: + env = None + for fn in [make_dm_control_env, make_maniskill_env, make_metaworld_env, make_myosuite_env]: + try: + env = fn(cfg) + except UnknownTaskError: + pass + if env is None: + raise UnknownTaskError(cfg.task) + env = TensorWrapper(env) + try: # Dict + cfg.obs_shape = {k: v.shape for k, v in env.observation_space.spaces.items()} + except: # Box + cfg.obs_shape = {'state': env.observation_space.shape} + cfg.action_dim = env.action_space.shape[0] + cfg.episode_length = env.max_episode_steps + cfg.seed_steps = max(1000, 5*cfg.episode_length) + return env diff --git a/tdmpc2/envs/dmcontrol.py b/tdmpc2/envs/dmcontrol.py new file mode 100644 index 0000000..32cb4b6 --- /dev/null +++ b/tdmpc2/envs/dmcontrol.py @@ -0,0 +1,200 @@ +from collections import deque, defaultdict +from typing import Any, NamedTuple +import dm_env +import numpy as np +from envs.tasks import cheetah, walker, hopper, reacher, ball_in_cup, pendulum, fish +from dm_control import suite +suite.ALL_TASKS = suite.ALL_TASKS + suite._get_tasks('custom') +suite.TASKS_BY_DOMAIN = suite._get_tasks_by_domain(suite.ALL_TASKS) +from dm_control.suite.wrappers import action_scale +from dm_env import StepType, specs +from envs.exceptions import UnknownTaskError +import gym + + +class ExtendedTimeStep(NamedTuple): + step_type: Any + reward: Any + discount: Any + observation: Any + action: Any + + def first(self): + return self.step_type == StepType.FIRST + + def mid(self): + return self.step_type == StepType.MID + + def last(self): + return self.step_type == StepType.LAST + + +class ActionRepeatWrapper(dm_env.Environment): + def __init__(self, env, num_repeats): + self._env = env + self._num_repeats = num_repeats + + def step(self, action): + reward = 0.0 + discount = 1.0 + for i in range(self._num_repeats): + time_step = self._env.step(action) + reward += (time_step.reward or 0.0) * discount + discount *= time_step.discount + if time_step.last(): + break + + return time_step._replace(reward=reward, discount=discount) + + def observation_spec(self): + return self._env.observation_spec() + + def action_spec(self): + return self._env.action_spec() + + def reset(self): + return self._env.reset() + + def __getattr__(self, name): + return getattr(self._env, name) + + +class ActionDTypeWrapper(dm_env.Environment): + def __init__(self, env, dtype): + self._env = env + wrapped_action_spec = env.action_spec() + self._action_spec = specs.BoundedArray(wrapped_action_spec.shape, + dtype, + wrapped_action_spec.minimum, + wrapped_action_spec.maximum, + 'action') + + def step(self, action): + action = action.astype(self._env.action_spec().dtype) + return self._env.step(action) + + def observation_spec(self): + return self._env.observation_spec() + + def action_spec(self): + return self._action_spec + + def reset(self): + return self._env.reset() + + def __getattr__(self, name): + return getattr(self._env, name) + + +class ExtendedTimeStepWrapper(dm_env.Environment): + def __init__(self, env): + self._env = env + + def reset(self): + time_step = self._env.reset() + return self._augment_time_step(time_step) + + def step(self, action): + time_step = self._env.step(action) + return self._augment_time_step(time_step, action) + + def _augment_time_step(self, time_step, action=None): + if action is None: + action_spec = self.action_spec() + action = np.zeros(action_spec.shape, dtype=action_spec.dtype) + return ExtendedTimeStep(observation=time_step.observation, + step_type=time_step.step_type, + action=action, + reward=time_step.reward or 0.0, + discount=time_step.discount or 1.0) + + def observation_spec(self): + return self._env.observation_spec() + + def action_spec(self): + return self._env.action_spec() + + def __getattr__(self, name): + return getattr(self._env, name) + + +class TimeStepToGymWrapper: + def __init__(self, env, domain, task): + obs_shp = [] + for v in env.observation_spec().values(): + try: + shp = np.prod(v.shape) + except: + shp = 1 + obs_shp.append(shp) + obs_shp = (int(np.sum(obs_shp)),) + act_shp = env.action_spec().shape + self.observation_space = gym.spaces.Box( + low=np.full( + obs_shp, + -np.inf, + dtype=np.float32), + high=np.full( + obs_shp, + np.inf, + dtype=np.float32), + dtype=np.float32, + ) + self.action_space = gym.spaces.Box( + low=np.full(act_shp, env.action_spec().minimum), + high=np.full(act_shp, env.action_spec().maximum), + dtype=env.action_spec().dtype) + self.env = env + self.domain = domain + self.task = task + self.max_episode_steps = 500 + self.t = 0 + + @property + def unwrapped(self): + return self.env + + @property + def reward_range(self): + return None + + @property + def metadata(self): + return None + + def _obs_to_array(self, obs): + return np.concatenate([v.flatten() for v in obs.values()]) + + def reset(self): + self.t = 0 + return self._obs_to_array(self.env.reset().observation) + + def step(self, action): + self.t += 1 + time_step = self.env.step(action) + return self._obs_to_array(time_step.observation), time_step.reward, time_step.last() or self.t == self.max_episode_steps, defaultdict(float) + + def render(self, mode='rgb_array', width=384, height=384, camera_id=0): + camera_id = dict(quadruped=2).get(self.domain, camera_id) + return self.env.physics.render(height, width, camera_id) + + +def make_env(cfg): + """ + Make DMControl environment. + Adapted from https://github.com/facebookresearch/drqv2 + """ + domain, task = cfg.task.replace('-', '_').split('_', 1) + domain = dict(cup='ball_in_cup', pointmass='point_mass').get(domain, domain) + if (domain, task) not in suite.ALL_TASKS: + raise UnknownTaskError(cfg.task) + env = suite.load(domain, + task, + task_kwargs={'random': cfg.seed}, + visualize_reward=False) + env = ActionDTypeWrapper(env, np.float32) + env = ActionRepeatWrapper(env, 2) + env = action_scale.Wrapper(env, minimum=-1., maximum=1.) + env = ExtendedTimeStepWrapper(env) + env = TimeStepToGymWrapper(env, domain, task) + return env diff --git a/tdmpc2/envs/exceptions.py b/tdmpc2/envs/exceptions.py new file mode 100644 index 0000000..9bf1390 --- /dev/null +++ b/tdmpc2/envs/exceptions.py @@ -0,0 +1,4 @@ + +class UnknownTaskError(Exception): + def __init__(self, task): + super().__init__(f'Unknown task: {task}') diff --git a/tdmpc2/envs/maniskill.py b/tdmpc2/envs/maniskill.py new file mode 100644 index 0000000..1d2e4c9 --- /dev/null +++ b/tdmpc2/envs/maniskill.py @@ -0,0 +1,79 @@ +import gym +import numpy as np +from envs.wrappers.time_limit import TimeLimit +from envs.exceptions import UnknownTaskError + +import mani_skill2.envs + + +MANISKILL_TASKS = { + 'lift-cube': dict( + env='LiftCube-v0', + control_mode='pd_ee_delta_pos', + ), + 'pick-cube': dict( + env='PickCube-v0', + control_mode='pd_ee_delta_pos', + ), + 'stack-cube': dict( + env='StackCube-v0', + control_mode='pd_ee_delta_pos', + ), + 'pick-ycb': dict( + env='PickSingleYCB-v0', + control_mode='pd_ee_delta_pose', + ), + 'turn-faucet': dict( + env='TurnFaucet-v0', + control_mode='pd_ee_delta_pose', + ), +} + + +class ManiSkillWrapper(gym.Wrapper): + def __init__(self, env, cfg): + super().__init__(env) + self.env = env + self.cfg = cfg + self.observation_space = self.env.observation_space + self.action_space = gym.spaces.Box( + low=np.full(self.env.action_space.shape, self.env.action_space.low.min()), + high=np.full(self.env.action_space.shape, self.env.action_space.high.max()), + dtype=self.env.action_space.dtype, + ) + + def reset(self): + return self.env.reset() + + def step(self, action): + reward = 0 + for _ in range(2): + obs, r, _, info = self.env.step(action) + reward += r + return obs, reward, False, info + + @property + def unwrapped(self): + return self.env.unwrapped + + def render(self, args, **kwargs): + return self.env.render(mode='cameras') + + +def make_env(cfg): + """ + Make ManiSkill2 environment. + """ + if cfg.task not in MANISKILL_TASKS: + raise UnknownTaskError(cfg.task) + task_cfg = MANISKILL_TASKS[cfg.task] + env = gym.make( + task_cfg['env'], + obs_mode='state', + control_mode=task_cfg['control_mode'], + render_camera_cfgs=dict(width=384, height=384), + ) + env = ManiSkillWrapper(env, cfg) + env = TimeLimit(env, max_episode_steps=100) + env.max_episode_steps = env._max_episode_steps + return env diff --git a/tdmpc2/envs/metaworld.py b/tdmpc2/envs/metaworld.py new file mode 100644 index 0000000..fd7379d --- /dev/null +++ b/tdmpc2/envs/metaworld.py @@ -0,0 +1,52 @@ +import numpy as np +import gym +from envs.wrappers.time_limit import TimeLimit +from envs.exceptions import UnknownTaskError + +from metaworld.envs import ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE + + +class MetaWorldWrapper(gym.Wrapper): + def __init__(self, env, cfg): + super().__init__(env) + self.env = env + self.cfg = cfg + self.camera_name = "corner2" + self.env.model.cam_pos[2] = [0.75, 0.075, 0.7] + self.env._freeze_rand_vec = False + + def reset(self, **kwargs): + obs = super().reset(**kwargs).astype(np.float32) + self.env.step(np.zeros(self.env.action_space.shape)) + return obs + + def step(self, action): + reward = 0 + for _ in range(2): + obs, r, _, info = self.env.step(action.copy()) + reward += r + obs = obs.astype(np.float32) + return obs, reward, False, info + + @property + def unwrapped(self): + return self.env.unwrapped + + def render(self, *args, **kwargs): + return self.env.render( + offscreen=True, resolution=(384, 384), camera_name=self.camera_name + ).copy() + + +def make_env(cfg): + """ + Make Meta-World environment. + """ + env_id = cfg.task.split("-", 1)[-1] + "-v2-goal-observable" + if not cfg.task.startswith('mw-') or env_id not in ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE: + raise UnknownTaskError(cfg.task) + env = ALL_V2_ENVIRONMENTS_GOAL_OBSERVABLE[env_id](seed=cfg.seed) + env = MetaWorldWrapper(env, cfg) + env = TimeLimit(env, max_episode_steps=100) + env.max_episode_steps = env._max_episode_steps + return env diff --git a/tdmpc2/envs/myosuite.py b/tdmpc2/envs/myosuite.py new file mode 100644 index 0000000..c503782 --- /dev/null +++ b/tdmpc2/envs/myosuite.py @@ -0,0 +1,59 @@ +import numpy as np +import gym +from envs.wrappers.time_limit import TimeLimit +from envs.exceptions import UnknownTaskError + + +MYOSUITE_TASKS = { + 'myo-finger-reach': 'myoFingerReachFixed-v0', + 'myo-finger-reach-hard': 'myoFingerReachRandom-v0', + 'myo-finger-pose': 'myoFingerPoseFixed-v0', + 'myo-finger-pose-hard': 'myoFingerPoseRandom-v0', + 'myo-hand-reach': 'myoHandReachFixed-v0', + 'myo-hand-reach-hard': 'myoHandReachRandom-v0', + 'myo-hand-pose': 'myoHandPoseFixed-v0', + 'myo-hand-pose-hard': 'myoHandPoseRandom-v0', + 'myo-hand-obj-hold': 'myoHandObjHoldFixed-v0', + 'myo-hand-obj-hold-hard': 'myoHandObjHoldRandom-v0', + 'myo-hand-key-turn': 'myoHandKeyTurnFixed-v0', + 'myo-hand-key-turn-hard': 'myoHandKeyTurnRandom-v0', + 'myo-hand-pen-twirl': 'myoHandPenTwirlFixed-v0', + 'myo-hand-pen-twirl-hard': 'myoHandPenTwirlRandom-v0', +} + + +class MyoSuiteWrapper(gym.Wrapper): + def __init__(self, env, cfg): + super().__init__(env) + self.env = env + self.cfg = cfg + self.camera_id = 'hand_side_inter' + + def step(self, action): + obs, reward, _, info = self.env.step(action.copy()) + obs = obs.astype(np.float32) + info['success'] = info['solved'] + return obs, reward, False, info + + @property + def unwrapped(self): + return self.env.unwrapped + + def render(self, *args, **kwargs): + return self.env.sim.renderer.render_offscreen( + width=384, height=384, camera_id=self.camera_id + ).copy() + + +def make_env(cfg): + """ + Make Myosuite environment. + """ + if not cfg.task in MYOSUITE_TASKS: + raise UnknownTaskError(cfg.task) + import myosuite + env = gym.make(MYOSUITE_TASKS[cfg.task]) + env = MyoSuiteWrapper(env, cfg) + env = TimeLimit(env, max_episode_steps=100) + env.max_episode_steps = env._max_episode_steps + return env diff --git a/tdmpc2/envs/tasks/__init__.py b/tdmpc2/envs/tasks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tdmpc2/envs/tasks/ball_in_cup.py b/tdmpc2/envs/tasks/ball_in_cup.py new file mode 100644 index 0000000..fea86f6 --- /dev/null +++ b/tdmpc2/envs/tasks/ball_in_cup.py @@ -0,0 +1,99 @@ +import collections +import os + +from dm_control import mujoco +from dm_control.rl import control +from dm_control.suite import base +from dm_control.suite import ball_in_cup +from dm_control.suite import common +from dm_control.utils import rewards +from dm_control.utils import io as resources +import numpy as np + +_TASKS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'tasks') + +_DIST_TARGET = 0.5 +_TARGET_SPEED = 6. + +_DEFAULT_TIME_LIMIT = 20 # (seconds) +_CONTROL_TIMESTEP = .02 # (seconds) + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return resources.GetResource(os.path.join(_TASKS_DIR, 'ball_in_cup.xml')), common.ASSETS + + +@ball_in_cup.SUITE.add('custom') +def spin(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Ball-in-Cup Spin task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = CustomBallInCup(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics with additional features for the Ball-in-Cup domain.""" + + def ball_to_target(self): + """Returns the vector from the ball to the target.""" + target = self.named.data.site_xpos['target', ['x', 'z']] + ball = self.named.data.xpos['ball', ['x', 'z']] + return target - ball + + def in_target(self): + """Returns 1 if the ball is in the target, 0 otherwise.""" + ball_to_target = abs(self.ball_to_target()) + target_size = self.named.model.site_size['target', [0, 2]] + ball_size = self.named.model.geom_size['ball', 0] + return float(all(ball_to_target < target_size - ball_size)) + + +class CustomBallInCup(ball_in_cup.BallInCup): + """Custom Ball-in-Cup tasks.""" + + def initialize_episode(self, physics): + # Find a collision-free random initial position of the ball. + penetrating = True + valid_pos = False + init_out_of_target = self.random.uniform() < 0.1 + while penetrating or not valid_pos: + # Assign a random ball position. + physics.named.data.qpos['ball_x'] = self.random.uniform(-.2, .2) + physics.named.data.qpos['ball_z'] = self.random.uniform(.2, .5) + # Check for collisions. + physics.after_reset() + penetrating = physics.data.ncon > 0 + valid_pos = bool(physics.in_target()) or init_out_of_target + base.Task.initialize_episode(self, physics) + + def get_observation(self, physics): + """Returns an observation of the state.""" + obs = collections.OrderedDict() + obs['position'] = physics.position() + obs['velocity'] = physics.velocity() + return obs + + def get_reward(self, physics): + dist = np.linalg.norm(physics.ball_to_target()) + ball_vel_x = abs(physics.named.data.qvel['ball_x']) + ball_vel_z = abs(physics.named.data.qvel['ball_z']) + ball_vel = np.linalg.norm([ball_vel_x, ball_vel_z]) + + # reward: spin around target (maximize distance to target + ball velocity) + dist_reward = rewards.tolerance(dist, + bounds=(_DIST_TARGET, float('inf')), + margin=_DIST_TARGET/2, + value_at_margin=0.5, + sigmoid='linear') + not_in_target = 1 - physics.in_target() + vel_reward = rewards.tolerance(ball_vel, + bounds=(_TARGET_SPEED, float('inf')), + margin=_TARGET_SPEED/2, + value_at_margin=0.5, + sigmoid='linear') + spin_reward = not_in_target * (dist_reward + 2*vel_reward) / 3 + return spin_reward diff --git a/tdmpc2/envs/tasks/ball_in_cup.xml b/tdmpc2/envs/tasks/ball_in_cup.xml new file mode 100644 index 0000000..32708c1 --- /dev/null +++ b/tdmpc2/envs/tasks/ball_in_cup.xml @@ -0,0 +1,53 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tdmpc2/envs/tasks/cheetah.py b/tdmpc2/envs/tasks/cheetah.py new file mode 100644 index 0000000..f24d2f6 --- /dev/null +++ b/tdmpc2/envs/tasks/cheetah.py @@ -0,0 +1,268 @@ +import os + +from dm_control.rl import control +from dm_control.suite import common +from dm_control.suite import cheetah +from dm_control.utils import rewards +from dm_control.utils import io as resources + +_TASKS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'tasks') + +_CHEETAH_JUMP_HEIGHT = 1.2 +_CHEETAH_LIE_HEIGHT = 0.25 +_CHEETAH_SPIN_SPEED = 8 + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return resources.GetResource(os.path.join(_TASKS_DIR, 'cheetah.xml')), common.ASSETS + + +@cheetah.SUITE.add('custom') +def run_backwards(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Run Backwards task.""" + physics = cheetah.Physics.from_xml_string(*get_model_and_assets()) + task = CustomCheetah(goal='run-backwards', move_speed=cheetah._RUN_SPEED*0.8, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + **environment_kwargs) + + +@cheetah.SUITE.add('custom') +def stand_front(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Stand Front task.""" + physics = cheetah.Physics.from_xml_string(*get_model_and_assets()) + task = CustomCheetah(goal='stand-front', move_speed=0.5, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + **environment_kwargs) + + +@cheetah.SUITE.add('custom') +def stand_back(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Stand Back task.""" + physics = cheetah.Physics.from_xml_string(*get_model_and_assets()) + task = CustomCheetah(goal='stand-back', move_speed=0.5, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + **environment_kwargs) + + +@cheetah.SUITE.add('custom') +def jump(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Jump task.""" + physics = cheetah.Physics.from_xml_string(*get_model_and_assets()) + task = CustomCheetah(goal='jump', move_speed=0.5, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + **environment_kwargs) + + +@cheetah.SUITE.add('custom') +def run_front(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Run Front task.""" + physics = cheetah.Physics.from_xml_string(*get_model_and_assets()) + task = CustomCheetah(goal='run-front', move_speed=cheetah._RUN_SPEED*0.6, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + **environment_kwargs) + + +@cheetah.SUITE.add('custom') +def run_back(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Run Back task.""" + physics = cheetah.Physics.from_xml_string(*get_model_and_assets()) + task = CustomCheetah(goal='run-back', move_speed=cheetah._RUN_SPEED*0.6, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + **environment_kwargs) + + +@cheetah.SUITE.add('custom') +def lie_down(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Lie Down task.""" + physics = cheetah.Physics.from_xml_string(*get_model_and_assets()) + task = CustomCheetah(goal='lie-down', random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + **environment_kwargs) + + +@cheetah.SUITE.add('custom') +def legs_up(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Legs Up task.""" + physics = cheetah.Physics.from_xml_string(*get_model_and_assets()) + task = CustomCheetah(goal='legs-up', random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + **environment_kwargs) + + +@cheetah.SUITE.add('custom') +def flip(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Flip task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = CustomCheetah(goal='flip', move_speed=cheetah._RUN_SPEED, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + **environment_kwargs) + + +@cheetah.SUITE.add('custom') +def flip_backwards(time_limit=cheetah._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Flip Backwards task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = CustomCheetah(goal='flip-backwards', move_speed=cheetah._RUN_SPEED*0.8, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment(physics, task, time_limit=time_limit, + **environment_kwargs) + + +class Physics(cheetah.Physics): + """Physics simulation with additional features for the Cheetah domain.""" + + def angmomentum(self): + """Returns the angular momentum of torso of the Cheetah about Y axis.""" + return self.named.data.subtree_angmom['torso'][1] + + +class CustomCheetah(cheetah.Cheetah): + """Custom Cheetah tasks.""" + + def __init__(self, goal='run-backwards', move_speed=0, random=None): + super().__init__(random) + self._goal = goal + self._move_speed = move_speed + + def _run_backwards_reward(self, physics): + return rewards.tolerance(physics.speed(), + bounds=(-float('inf'), -self._move_speed), + margin=self._move_speed, + value_at_margin=0, + sigmoid='linear') + + def _stand_one_foot_reward(self, physics, foot): + """Note: `foot` is the foot that is *not* on the ground.""" + torso_height = physics.named.data.xpos['torso', 'z'] + foot_height = physics.named.data.xpos[foot, 'z'] + height_reward = rewards.tolerance((torso_height + foot_height)/2, + bounds=(_CHEETAH_JUMP_HEIGHT, float('inf')), + margin=_CHEETAH_JUMP_HEIGHT/2) + horizontal_speed_reward = rewards.tolerance(physics.speed(), + bounds=(-self._move_speed, self._move_speed), + margin=self._move_speed, + value_at_margin=0, + sigmoid='linear') + stand_reward = (5*height_reward + horizontal_speed_reward) / 6 + return stand_reward + + def _stand_front_reward(self, physics): + return self._stand_one_foot_reward(physics, 'bfoot') + + def _stand_back_reward(self, physics): + return self._stand_one_foot_reward(physics, 'ffoot') + + def _jump_reward(self, physics): + front_reward = self._stand_front_reward(physics) + back_reward = self._stand_back_reward(physics) + jump_reward = (front_reward + back_reward) / 2 + return jump_reward + + def _run_one_foot_reward(self, physics, foot): + """Note: `foot` is the foot that is *not* on the ground.""" + torso_height = physics.named.data.xpos['torso', 'z'] + foot_height = physics.named.data.xpos[foot, 'z'] + torso_up = rewards.tolerance(torso_height, + bounds=(_CHEETAH_JUMP_HEIGHT, float('inf')), + margin=_CHEETAH_JUMP_HEIGHT/2) + foot_up = rewards.tolerance(foot_height, + bounds=(_CHEETAH_JUMP_HEIGHT, float('inf')), + margin=_CHEETAH_JUMP_HEIGHT/2) + up_reward = (3*foot_up + 2*torso_up) / 5 + if self._move_speed == 0: + return up_reward + horizontal_speed_reward = rewards.tolerance(physics.speed(), + bounds=(self._move_speed, float('inf')), + margin=self._move_speed, + value_at_margin=0, + sigmoid='linear') + return up_reward * (5*horizontal_speed_reward + 1) / 6 + + def _run_front_reward(self, physics): + return self._run_one_foot_reward(physics, 'bfoot') + + def _run_back_reward(self, physics): + return self._run_one_foot_reward(physics, 'ffoot') + + def _lie_down_reward(self, physics): + torso_height = physics.named.data.xpos['torso', 'z'] + feet_height = (physics.named.data.xpos['ffoot', 'z'] + physics.named.data.xpos['bfoot', 'z']) / 2 + torso_down = rewards.tolerance(torso_height, + bounds=(-float('inf'), _CHEETAH_LIE_HEIGHT), + margin=_CHEETAH_LIE_HEIGHT, + value_at_margin=0, + sigmoid='linear') + feet_down = rewards.tolerance(feet_height, + bounds=(-float('inf'), _CHEETAH_LIE_HEIGHT), + margin=_CHEETAH_LIE_HEIGHT, + value_at_margin=0, + sigmoid='linear') + lie_down_reward = (3*torso_down + feet_down) / 4 + return lie_down_reward + + def _legs_up_reward(self, physics): + torso_height = physics.named.data.xpos['torso', 'z'] + torso_down = rewards.tolerance(torso_height, + bounds=(-float('inf'), _CHEETAH_LIE_HEIGHT), + margin=_CHEETAH_LIE_HEIGHT/2) + get_up = self._run_one_foot_reward(physics, 'bfoot') + legs_up_reward = (5*torso_down + get_up) / 6 + return legs_up_reward + + def _flip_reward(self, physics, forward=True): + spin_reward = rewards.tolerance( + (1. if forward else -1.) * physics.angmomentum(), + bounds=(_CHEETAH_SPIN_SPEED, float('inf')), + margin=_CHEETAH_SPIN_SPEED, + value_at_margin=0, + sigmoid='linear') + horizontal_speed_reward = rewards.tolerance( + (1. if forward else -1.) * physics.speed(), + bounds=(self._move_speed, float('inf')), + margin=self._move_speed, + value_at_margin=0, + sigmoid='linear') + flip_reward = (2*spin_reward + horizontal_speed_reward) / 3 + return flip_reward + + def get_reward(self, physics): + if self._goal == 'run-backwards': + return self._run_backwards_reward(physics) + elif self._goal == 'stand-front': + return self._stand_front_reward(physics) + elif self._goal == 'stand-back': + return self._stand_back_reward(physics) + elif self._goal == 'jump': + return self._jump_reward(physics) + elif self._goal == 'run-front': + return self._run_front_reward(physics) + elif self._goal == 'run-back': + return self._run_back_reward(physics) + elif self._goal == 'lie-down': + return self._lie_down_reward(physics) + elif self._goal == 'legs-up': + return self._legs_up_reward(physics) + elif self._goal == 'flip': + return self._flip_reward(physics, forward=True) + elif self._goal == 'flip-backwards': + return self._flip_reward(physics, forward=False) + else: + raise NotImplementedError(f'Goal {self._goal} is not implemented.') + + +if __name__ == '__main__': + env = jump() + obs = env.reset() + import numpy as np + next_obs, reward, done, info = env.step(np.zeros(6)) + print(reward) diff --git a/tdmpc2/envs/tasks/cheetah.xml b/tdmpc2/envs/tasks/cheetah.xml new file mode 100644 index 0000000..1a7f6fd --- /dev/null +++ b/tdmpc2/envs/tasks/cheetah.xml @@ -0,0 +1,73 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/tdmpc2/envs/tasks/fish.py b/tdmpc2/envs/tasks/fish.py new file mode 100644 index 0000000..59bed4b --- /dev/null +++ b/tdmpc2/envs/tasks/fish.py @@ -0,0 +1,79 @@ +import collections +import os + +from dm_control import mujoco +from dm_control.rl import control +from dm_control.suite import base +from dm_control.suite import common +from dm_control.suite import fish +from dm_control.utils import rewards +from dm_control.utils import io as resources +import numpy as np + +_TASKS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'tasks') + +_DEFAULT_TIME_LIMIT = 40 +_CONTROL_TIMESTEP = .04 +_JOINTS = ['tail1', + 'tail_twist', + 'tail2', + 'finright_roll', + 'finright_pitch', + 'finleft_roll', + 'finleft_pitch'] + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return resources.GetResource(os.path.join(_TASKS_DIR, 'fish.xml')), common.ASSETS + + +@fish.SUITE.add('custom') +def obstacles(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Fish Obstacles task.""" + physics = fish.Physics.from_xml_string(*get_model_and_assets()) + task = Obstacles(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, control_timestep=_CONTROL_TIMESTEP, time_limit=time_limit, + **environment_kwargs) + + +class Obstacles(fish.Swim): + """A custom Fish Obstacles task.""" + + def __init__(self, random=None): + super().__init__(random=random) + + def in_wall(self, physics, name, min_distance=0.08): + """Returns True if the given body is too close to a wall.""" + for wall in ['wall0', 'wall1', 'wall2', 'wall3']: + l1_dist = np.min(np.abs(physics.named.data.geom_xpos[name][:2] - physics.named.data.geom_xpos[wall][:2])) + if l1_dist < min_distance: + return True + return False + + def initialize_episode(self, physics): + in_wall = True + while in_wall: + # Randomize fish position. + quat = self.random.randn(4) + physics.named.data.qpos['root'][3:7] = quat / np.linalg.norm(quat) + for joint in _JOINTS: + physics.named.data.qpos[joint] = self.random.uniform(-.2, .2) + # Randomize target position. + physics.named.model.geom_pos['target', 'x'] = self.random.uniform(-.4, .4) + physics.named.model.geom_pos['target', 'y'] = self.random.uniform(-.4, .4) + physics.named.model.geom_pos['target', 'z'] = self.random.uniform(.1, .3) + # Make sure target is not too close to a wall. + physics.after_reset() + in_wall = self.in_wall(physics, 'target') + base.Task.initialize_episode(self, physics) + + def get_reward(self, physics): + radii = physics.named.model.geom_size[['mouth', 'target'], 0].sum() + in_target = rewards.tolerance(np.linalg.norm(physics.mouth_to_target()), + bounds=(0, radii), margin=2*radii) + is_upright = 0.5 * (physics.upright() + 1) + is_not_in_wall = 1. - self.in_wall(physics, 'torso', min_distance=0.06) + return is_not_in_wall * (7*in_target + is_upright) / 8 diff --git a/tdmpc2/envs/tasks/fish.xml b/tdmpc2/envs/tasks/fish.xml new file mode 100644 index 0000000..82c9ede --- /dev/null +++ b/tdmpc2/envs/tasks/fish.xml @@ -0,0 +1,93 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tdmpc2/envs/tasks/hopper.py b/tdmpc2/envs/tasks/hopper.py new file mode 100644 index 0000000..3e19b1c --- /dev/null +++ b/tdmpc2/envs/tasks/hopper.py @@ -0,0 +1,114 @@ +import os + +from dm_control import mujoco +from dm_control.rl import control +from dm_control.suite import common +from dm_control.suite import hopper +from dm_control.utils import rewards +from dm_control.utils import io as resources +import numpy as np + +_TASKS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'tasks') + +_CONTROL_TIMESTEP = .02 # (Seconds) + +# Default duration of an episode, in seconds. +_DEFAULT_TIME_LIMIT = 20 + +# Minimal height of torso over foot above which stand reward is 1. +_STAND_HEIGHT = 0.6 + +# Hopping speed above which hop reward is 1. +_HOP_SPEED = 2 + +# Angular momentum above which reward is 1. +_SPIN_SPEED = 5 + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return resources.GetResource(os.path.join(_TASKS_DIR, 'hopper.xml')), common.ASSETS + + +@hopper.SUITE.add('custom') +def hop_backwards(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Hop Backwards task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = CustomHopper(goal='hop-backwards', random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@hopper.SUITE.add('custom') +def flip(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Flip task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = CustomHopper(goal='flip', random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +@hopper.SUITE.add('custom') +def flip_backwards(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Flip Backwards task.""" + physics = Physics.from_xml_string(*get_model_and_assets()) + task = CustomHopper(goal='flip-backwards', random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=_CONTROL_TIMESTEP, + **environment_kwargs) + + +class Physics(hopper.Physics): + + def angmomentum(self): + """Returns the angular momentum of torso of the Cheetah about Y axis.""" + return self.named.data.subtree_angmom['torso'][1] + + +class CustomHopper(hopper.Hopper): + """Custom Hopper tasks.""" + + def __init__(self, goal='hop-backwards', random=None): + super().__init__(None, random) + self._goal = goal + + def _hop_backwards_reward(self, physics): + standing = rewards.tolerance(physics.height(), (_STAND_HEIGHT, 2)) + hopping = rewards.tolerance(physics.speed(), + bounds=(-float('inf'), -_HOP_SPEED/2), + margin=_HOP_SPEED/4, + value_at_margin=0.5, + sigmoid='linear') + return standing * hopping + + def _flip_reward(self, physics, forward=True): + reward = rewards.tolerance((1. if forward else -1.) * physics.angmomentum(), + bounds=(_SPIN_SPEED, float('inf')), + margin=_SPIN_SPEED/2, + value_at_margin=0, + sigmoid='linear') + return reward + + + def get_reward(self, physics): + if self._goal == 'hop-backwards': + return self._hop_backwards_reward(physics) + elif self._goal == 'flip': + return self._flip_reward(physics, forward=True) + elif self._goal == 'flip-backwards': + return self._flip_reward(physics, forward=False) + else: + raise NotImplementedError(f'Goal {self._goal} is not implemented.') + + +if __name__ == '__main__': + env = hop_backwards() + obs = env.reset() + import numpy as np + next_obs, reward, done, info = env.step(np.zeros(2)) + print(reward) diff --git a/tdmpc2/envs/tasks/hopper.xml b/tdmpc2/envs/tasks/hopper.xml new file mode 100644 index 0000000..84ad72e --- /dev/null +++ b/tdmpc2/envs/tasks/hopper.xml @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/tdmpc2/envs/tasks/pendulum.py b/tdmpc2/envs/tasks/pendulum.py new file mode 100644 index 0000000..3a5b636 --- /dev/null +++ b/tdmpc2/envs/tasks/pendulum.py @@ -0,0 +1,43 @@ +import os + +from dm_control.rl import control +from dm_control.suite import pendulum +from dm_control.suite import common +from dm_control.utils import rewards +from dm_control.utils import io as resources +import numpy as np + +_TASKS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'tasks') + +_DEFAULT_TIME_LIMIT = 20 +_TARGET_SPEED = 9. + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return resources.GetResource(os.path.join(_TASKS_DIR, 'pendulum.xml')), common.ASSETS + + +@pendulum.SUITE.add('custom') +def spin(time_limit=_DEFAULT_TIME_LIMIT, random=None, + environment_kwargs=None): + """Returns pendulum spin task.""" + physics = pendulum.Physics.from_xml_string(*get_model_and_assets()) + task = Spin(random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +class Spin(pendulum.SwingUp): + """A custom Pendulum Spin task.""" + + def __init__(self, random=None): + super().__init__(random=random) + + def get_reward(self, physics): + return rewards.tolerance(np.linalg.norm(physics.angular_velocity()), + bounds=(_TARGET_SPEED, float('inf')), + margin=_TARGET_SPEED/2, + value_at_margin=0.5, + sigmoid='linear') diff --git a/tdmpc2/envs/tasks/pendulum.xml b/tdmpc2/envs/tasks/pendulum.xml new file mode 100644 index 0000000..14377ae --- /dev/null +++ b/tdmpc2/envs/tasks/pendulum.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tdmpc2/envs/tasks/reacher.py b/tdmpc2/envs/tasks/reacher.py new file mode 100644 index 0000000..4c1778e --- /dev/null +++ b/tdmpc2/envs/tasks/reacher.py @@ -0,0 +1,89 @@ +import collections +import os + +from dm_control import mujoco +from dm_control.rl import control +from dm_control.suite import common +from dm_control.suite import reacher +from dm_control.utils import io as resources +import numpy as np + +_TASKS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'tasks') + +_DEFAULT_TIME_LIMIT = 20 +_BIG_TARGET = .05 +_SMALL_TARGET = .015 + + +def get_model_and_assets(links): + """Returns a tuple containing the model XML string and a dict of assets.""" + assert links in {3, 4}, 'Only 3 or 4 links are supported.' + fn = 'reacher_three_links.xml' if links == 3 else 'reacher_four_links.xml' + return resources.GetResource(os.path.join(_TASKS_DIR, fn)), common.ASSETS + + +@reacher.SUITE.add('custom') +def three_easy(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns three-link reacher with sparse reward with 5e-2 tol and randomized target.""" + physics = Physics.from_xml_string(*get_model_and_assets(links=3)) + task = CustomThreeLinkReacher(target_size=_BIG_TARGET, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +@reacher.SUITE.add('custom') +def three_hard(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns three-link reacher with sparse reward with 1e-2 tol and randomized target.""" + physics = Physics.from_xml_string(*get_model_and_assets(links=3)) + task = CustomThreeLinkReacher(target_size=_SMALL_TARGET, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +@reacher.SUITE.add('custom') +def four_easy(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns three-link reacher with sparse reward with 5e-2 tol and randomized target.""" + physics = Physics.from_xml_string(*get_model_and_assets(links=4)) + task = CustomThreeLinkReacher(target_size=_BIG_TARGET, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +@reacher.SUITE.add('custom') +def four_hard(time_limit=_DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns three-link reacher with sparse reward with 1e-2 tol and randomized target.""" + physics = Physics.from_xml_string(*get_model_and_assets(links=4)) + task = CustomThreeLinkReacher(target_size=_SMALL_TARGET, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, **environment_kwargs) + + +class Physics(mujoco.Physics): + """Physics simulation with additional features for the Reacher domain.""" + + def finger_to_target(self): + """Returns the vector from target to finger in global coordinates.""" + return (self.named.data.geom_xpos['target', :2] - + self.named.data.geom_xpos['finger', :2]) + + def finger_to_target_dist(self): + """Returns the signed distance between the finger and target surface.""" + return np.linalg.norm(self.finger_to_target()) + + +class CustomThreeLinkReacher(reacher.Reacher): + """Custom Reacher tasks.""" + + def __init__(self, target_size, random=None): + super().__init__(target_size, random) + + def get_observation(self, physics): + obs = collections.OrderedDict() + obs['position'] = physics.position() + obs['to_target'] = physics.finger_to_target() + obs['velocity'] = physics.velocity() + return obs diff --git a/tdmpc2/envs/tasks/reacher_four_links.xml b/tdmpc2/envs/tasks/reacher_four_links.xml new file mode 100644 index 0000000..d5aa8e5 --- /dev/null +++ b/tdmpc2/envs/tasks/reacher_four_links.xml @@ -0,0 +1,57 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tdmpc2/envs/tasks/reacher_three_links.xml b/tdmpc2/envs/tasks/reacher_three_links.xml new file mode 100644 index 0000000..f32f4bc --- /dev/null +++ b/tdmpc2/envs/tasks/reacher_three_links.xml @@ -0,0 +1,52 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tdmpc2/envs/tasks/walker.py b/tdmpc2/envs/tasks/walker.py new file mode 100644 index 0000000..d04c404 --- /dev/null +++ b/tdmpc2/envs/tasks/walker.py @@ -0,0 +1,223 @@ +import os + +from dm_control.rl import control +from dm_control.suite import common +from dm_control.suite import walker +from dm_control.utils import rewards +from dm_control.utils import io as resources + +_TASKS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'tasks') + +_YOGA_STAND_HEIGHT = 1.0 +_YOGA_LIE_DOWN_HEIGHT = 0.08 +_YOGA_LEGS_UP_HEIGHT = 1.1 + + +def get_model_and_assets(): + """Returns a tuple containing the model XML string and a dict of assets.""" + return resources.GetResource(os.path.join(_TASKS_DIR, 'walker.xml')), common.ASSETS + + +@walker.SUITE.add('custom') +def walk_backwards(time_limit=walker._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Walk Backwards task.""" + physics = walker.Physics.from_xml_string(*get_model_and_assets()) + task = BackwardsPlanarWalker(move_speed=walker._WALK_SPEED, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=walker._CONTROL_TIMESTEP, + **environment_kwargs) + + +@walker.SUITE.add('custom') +def run_backwards(time_limit=walker._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Run Backwards task.""" + physics = walker.Physics.from_xml_string(*get_model_and_assets()) + task = BackwardsPlanarWalker(move_speed=walker._RUN_SPEED, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=walker._CONTROL_TIMESTEP, + **environment_kwargs) + + +@walker.SUITE.add('custom') +def arabesque(time_limit=walker._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Arabesque task.""" + physics = walker.Physics.from_xml_string(*get_model_and_assets()) + task = YogaPlanarWalker(goal='arabesque', random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=walker._CONTROL_TIMESTEP, + **environment_kwargs) + + +@walker.SUITE.add('custom') +def lie_down(time_limit=walker._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Lie Down task.""" + physics = walker.Physics.from_xml_string(*get_model_and_assets()) + task = YogaPlanarWalker(goal='lie_down', random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=walker._CONTROL_TIMESTEP, + **environment_kwargs) + + +@walker.SUITE.add('custom') +def legs_up(time_limit=walker._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Legs Up task.""" + physics = walker.Physics.from_xml_string(*get_model_and_assets()) + task = YogaPlanarWalker(goal='legs_up', random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=walker._CONTROL_TIMESTEP, + **environment_kwargs) + + +@walker.SUITE.add('custom') +def headstand(time_limit=walker._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Headstand task.""" + physics = walker.Physics.from_xml_string(*get_model_and_assets()) + task = YogaPlanarWalker(goal='flip', move_speed=0, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=walker._CONTROL_TIMESTEP, + **environment_kwargs) + + +@walker.SUITE.add('custom') +def flip(time_limit=walker._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Flip task.""" + physics = walker.Physics.from_xml_string(*get_model_and_assets()) + task = YogaPlanarWalker(goal='flip', move_speed=walker._RUN_SPEED*0.75, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=walker._CONTROL_TIMESTEP, + **environment_kwargs) + + +@walker.SUITE.add('custom') +def backflip(time_limit=walker._DEFAULT_TIME_LIMIT, random=None, environment_kwargs=None): + """Returns the Backflip task.""" + physics = walker.Physics.from_xml_string(*get_model_and_assets()) + task = YogaPlanarWalker(goal='flip', move_speed=-walker._RUN_SPEED*0.75, random=random) + environment_kwargs = environment_kwargs or {} + return control.Environment( + physics, task, time_limit=time_limit, control_timestep=walker._CONTROL_TIMESTEP, + **environment_kwargs) + + +class BackwardsPlanarWalker(walker.PlanarWalker): + """Backwards PlanarWalker task.""" + def __init__(self, move_speed, random=None): + super().__init__(move_speed, random) + + def get_reward(self, physics): + standing = rewards.tolerance(physics.torso_height(), + bounds=(walker._STAND_HEIGHT, float('inf')), + margin=walker._STAND_HEIGHT/2) + upright = (1 + physics.torso_upright()) / 2 + stand_reward = (3*standing + upright) / 4 + if self._move_speed == 0: + return stand_reward + else: + move_reward = rewards.tolerance(physics.horizontal_velocity(), + bounds=(-float('inf'), -self._move_speed), + margin=self._move_speed/2, + value_at_margin=0.5, + sigmoid='linear') + return stand_reward * (5*move_reward + 1) / 6 + + +class YogaPlanarWalker(walker.PlanarWalker): + """Yoga PlanarWalker tasks.""" + + def __init__(self, goal='arabesque', move_speed=0, random=None): + super().__init__(0, random) + self._goal = goal + self._move_speed = move_speed + + def _arabesque_reward(self, physics): + standing = rewards.tolerance(physics.torso_height(), + bounds=(_YOGA_STAND_HEIGHT, float('inf')), + margin=_YOGA_STAND_HEIGHT/2) + left_foot_height = physics.named.data.xpos['left_foot', 'z'] + right_foot_height = physics.named.data.xpos['right_foot', 'z'] + left_foot_down = rewards.tolerance(left_foot_height, + bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT), + margin=_YOGA_STAND_HEIGHT/2) + right_foot_up = rewards.tolerance(right_foot_height, + bounds=(_YOGA_STAND_HEIGHT, float('inf')), + margin=_YOGA_STAND_HEIGHT/2) + upright = (1 - physics.torso_upright()) / 2 + arabesque_reward = (3*standing + left_foot_down + right_foot_up + upright) / 6 + return arabesque_reward + + def _lie_down_reward(self, physics): + torso_down = rewards.tolerance(physics.torso_height(), + bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT), + margin=_YOGA_LIE_DOWN_HEIGHT/2) + thigh_height = (physics.named.data.xpos['left_thigh', 'z'] + physics.named.data.xpos['right_thigh', 'z']) / 2 + thigh_down = rewards.tolerance(thigh_height, + bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT), + margin=_YOGA_LIE_DOWN_HEIGHT/2) + feet_height = (physics.named.data.xpos['left_foot', 'z'] + physics.named.data.xpos['right_foot', 'z']) / 2 + feet_down = rewards.tolerance(feet_height, + bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT), + margin=_YOGA_LIE_DOWN_HEIGHT/2) + upright = (1 - physics.torso_upright()) / 2 + lie_down_reward = (3*torso_down + thigh_down + upright) / 5 + return lie_down_reward + + def _legs_up_reward(self, physics): + torso_down = rewards.tolerance(physics.torso_height(), + bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT), + margin=_YOGA_LIE_DOWN_HEIGHT/2) + thigh_height = (physics.named.data.xpos['left_thigh', 'z'] + physics.named.data.xpos['right_thigh', 'z']) / 2 + thigh_down = rewards.tolerance(thigh_height, + bounds=(-float('inf'), _YOGA_LIE_DOWN_HEIGHT), + margin=_YOGA_LIE_DOWN_HEIGHT/2) + feet_height = (physics.named.data.xpos['left_foot', 'z'] + physics.named.data.xpos['right_foot', 'z']) / 2 + legs_up = rewards.tolerance(feet_height, + bounds=(_YOGA_LEGS_UP_HEIGHT, float('inf')), + margin=_YOGA_LEGS_UP_HEIGHT/2) + upright = (1 - physics.torso_upright()) / 2 + legs_up_reward = (3*torso_down + 2*legs_up + thigh_down + upright) / 7 + return legs_up_reward + + def _flip_reward(self, physics): + thigh_height = (physics.named.data.xpos['left_thigh', 'z'] + physics.named.data.xpos['right_thigh', 'z']) / 2 + thigh_up = rewards.tolerance(thigh_height, + bounds=(_YOGA_STAND_HEIGHT, float('inf')), + margin=_YOGA_STAND_HEIGHT/2) + feet_height = (physics.named.data.xpos['left_foot', 'z'] + physics.named.data.xpos['right_foot', 'z']) / 2 + legs_up = rewards.tolerance(feet_height, + bounds=(_YOGA_LEGS_UP_HEIGHT, float('inf')), + margin=_YOGA_LEGS_UP_HEIGHT/2) + upside_down_reward = (3*legs_up + 2*thigh_up) / 5 + if self._move_speed == 0: + return upside_down_reward + move_reward = rewards.tolerance(physics.horizontal_velocity(), + bounds=(self._move_speed, float('inf')) if self._move_speed > 0 else (-float('inf'), self._move_speed), + margin=abs(self._move_speed)/2, + value_at_margin=0.5, + sigmoid='linear') + return upside_down_reward * (5*move_reward + 1) / 6 + + def get_reward(self, physics): + if self._goal == 'arabesque': + return self._arabesque_reward(physics) + elif self._goal == 'lie_down': + return self._lie_down_reward(physics) + elif self._goal == 'legs_up': + return self._legs_up_reward(physics) + elif self._goal == 'flip': + return self._flip_reward(physics) + else: + raise NotImplementedError(f'Goal {self._goal} is not implemented.') + + +if __name__ == '__main__': + env = legs_up() + obs = env.reset() + import numpy as np + next_obs, reward, done, info = env.step(np.zeros(6)) diff --git a/tdmpc2/envs/tasks/walker.xml b/tdmpc2/envs/tasks/walker.xml new file mode 100644 index 0000000..1d17637 --- /dev/null +++ b/tdmpc2/envs/tasks/walker.xml @@ -0,0 +1,70 @@ + + + + + + diff --git a/tdmpc2/envs/wrappers/__init__.py b/tdmpc2/envs/wrappers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tdmpc2/envs/wrappers/multitask.py b/tdmpc2/envs/wrappers/multitask.py new file mode 100644 index 0000000..08dd4eb --- /dev/null +++ b/tdmpc2/envs/wrappers/multitask.py @@ -0,0 +1,57 @@ +import gym +import numpy as np +import torch + + +class MultitaskWrapper(gym.Wrapper): + """ + Wrapper for multi-task environments. + """ + + def __init__(self, cfg, envs): + super().__init__(envs[0]) + self.cfg = cfg + self.envs = envs + self._task = cfg.tasks[0] + self._task_idx = 0 + self._obs_dims = [env.observation_space.shape[0] for env in self.envs] + self._action_dims = [env.action_space.shape[0] for env in self.envs] + self._episode_lengths = [env.max_episode_steps for env in self.envs] + self._obs_shape = (max(self._obs_dims),) + self._action_dim = max(self._action_dims) + self.observation_space = gym.spaces.Box( + low=-np.inf, high=np.inf, shape=self._obs_shape, dtype=np.float32 + ) + self.action_space = gym.spaces.Box( + low=-1, high=1, shape=(self._action_dim,), dtype=np.float32 + ) + + @property + def task(self): + return self._task + + @property + def task_idx(self): + return self._task_idx + + @property + def _env(self): + return self.envs[self.task_idx] + + def rand_act(self): + return torch.from_numpy(self.action_space.sample().astype(np.float32)) + + def _pad_obs(self, obs): + if obs.shape != self._obs_shape: + obs = torch.cat((obs, torch.zeros(self._obs_shape[0]-obs.shape[0], dtype=obs.dtype, device=obs.device))) + return obs + + def reset(self, task_idx=-1): + self._task_idx = task_idx + self._task = self.cfg.tasks[task_idx] + self.env = self._env + return self._pad_obs(self.env.reset()) + + def step(self, action): + obs, reward, done, info = self.env.step(action[:self.env.action_space.shape[0]]) + return self._pad_obs(obs), reward, done, info diff --git a/tdmpc2/envs/wrappers/tensor.py b/tdmpc2/envs/wrappers/tensor.py new file mode 100644 index 0000000..548a5f4 --- /dev/null +++ b/tdmpc2/envs/wrappers/tensor.py @@ -0,0 +1,40 @@ +from collections import defaultdict + +import gym +import numpy as np +import torch + + +class TensorWrapper(gym.Wrapper): + """ + Wrapper for converting numpy arrays to torch tensors. + """ + + def __init__(self, env): + super().__init__(env) + + def rand_act(self): + return torch.from_numpy(self.action_space.sample().astype(np.float32)) + + def _try_f32_tensor(self, x): + x = torch.from_numpy(x) + if x.dtype == torch.float64: + x = x.float() + return x + + def _obs_to_tensor(self, obs): + if isinstance(obs, dict): + for k in obs.keys(): + obs[k] = self._try_f32_tensor(obs[k]) + else: + obs = self._try_f32_tensor(obs) + return obs + + def reset(self, task_idx=None): + return self._obs_to_tensor(self.env.reset()) + + def step(self, action): + obs, reward, done, info = self.env.step(action.numpy()) + info = defaultdict(float, info) + info['success'] = float(info['success']) + return self._obs_to_tensor(obs), torch.tensor(reward, dtype=torch.float32), done, info diff --git a/tdmpc2/envs/wrappers/time_limit.py b/tdmpc2/envs/wrappers/time_limit.py new file mode 100644 index 0000000..f81c281 --- /dev/null +++ b/tdmpc2/envs/wrappers/time_limit.py @@ -0,0 +1,72 @@ +""" +Wrapper for limiting the time steps of an environment. +Source: https://github.com/openai/gym/blob/3498617bf031538a808b75b932f4ed2c11896a3e/gym/wrappers/time_limit.py +""" +from typing import Optional + +import gym + + +class TimeLimit(gym.Wrapper): + """This wrapper will issue a `done` signal if a maximum number of timesteps is exceeded. + + Oftentimes, it is **very** important to distinguish `done` signals that were produced by the + :class:`TimeLimit` wrapper (truncations) and those that originate from the underlying environment (terminations). + This can be done by looking at the ``info`` that is returned when `done`-signal was issued. + The done-signal originates from the time limit (i.e. it signifies a *truncation*) if and only if + the key `"TimeLimit.truncated"` exists in ``info`` and the corresponding value is ``True``. + + Example: + >>> from gym.envs.classic_control import CartPoleEnv + >>> from gym.wrappers import TimeLimit + >>> env = CartPoleEnv() + >>> env = TimeLimit(env, max_episode_steps=1000) + """ + + def __init__(self, env: gym.Env, max_episode_steps: Optional[int] = None): + """Initializes the :class:`TimeLimit` wrapper with an environment and the number of steps after which truncation will occur. + + Args: + env: The environment to apply the wrapper + max_episode_steps: An optional max episode steps (if ``Ǹone``, ``env.spec.max_episode_steps`` is used) + """ + super().__init__(env) + if max_episode_steps is None and self.env.spec is not None: + max_episode_steps = env.spec.max_episode_steps + if self.env.spec is not None: + self.env.spec.max_episode_steps = max_episode_steps + self._max_episode_steps = max_episode_steps + self._elapsed_steps = None + + def step(self, action): + """Steps through the environment and if the number of steps elapsed exceeds ``max_episode_steps`` then truncate. + + Args: + action: The environment step action + + Returns: + The environment step ``(observation, reward, done, info)`` with "TimeLimit.truncated"=True + when truncated (the number of steps elapsed >= max episode steps) or + "TimeLimit.truncated"=False if the environment terminated + """ + observation, reward, done, info = self.env.step(action) + self._elapsed_steps += 1 + if self._elapsed_steps >= self._max_episode_steps: + # TimeLimit.truncated key may have been already set by the environment + # do not overwrite it + episode_truncated = not done or info.get("TimeLimit.truncated", False) + info["TimeLimit.truncated"] = episode_truncated + done = True + return observation, reward, done, info + + def reset(self, **kwargs): + """Resets the environment with :param:`**kwargs` and sets the number of steps elapsed to zero. + + Args: + **kwargs: The kwargs to reset the environment with + + Returns: + The reset environment + """ + self._elapsed_steps = 0 + return self.env.reset(**kwargs) diff --git a/tdmpc2/evaluate.py b/tdmpc2/evaluate.py new file mode 100755 index 0000000..f5b8628 --- /dev/null +++ b/tdmpc2/evaluate.py @@ -0,0 +1,103 @@ +import os +os.environ['MUJOCO_GL'] = 'egl' +import warnings +warnings.filterwarnings('ignore') + +import hydra +import imageio +import numpy as np +import torch +from termcolor import colored + +from common.parser import parse_cfg +from common.seed import set_seed +from envs import make_env +from tdmpc2 import TDMPC2 + +torch.backends.cudnn.benchmark = True + + +@hydra.main(config_name='config', config_path='.') +def evaluate(cfg: dict): + """ + Script for evaluating a single-task / multi-task TD-MPC2 checkpoint. + + Most relevant args: + `task`: task name (or mt30/mt80 for multi-task evaluation) + `model_size`: model size, must be one of `[1, 5, 19, 48, 317]` (default: 5) + `checkpoint`: path to model checkpoint to load + `eval_episodes`: number of episodes to evaluate on per task (default: 10) + `save_video`: whether to save a video of the evaluation (default: True) + `seed`: random seed (default: 1) + + See config.yaml for a full list of args. + + Example usage: + ```` + $ python evaluate.py task=mt80 model_size=48 checkpoint=/path/to/mt80-48M.pt + $ python evaluate.py task=mt30 model_size=317 checkpoint=/path/to/mt30-317M.pt + $ python evaluate.py task=dog-run checkpoint=/path/to/dog-1.pt save_video=true + ``` + """ + assert torch.cuda.is_available() + assert cfg.eval_episodes > 0, 'Must evaluate at least 1 episode.' + cfg = parse_cfg(cfg) + set_seed(cfg.seed) + print(colored(f'Task: {cfg.task}', 'blue', attrs=['bold'])) + print(colored(f'Model size: {cfg.model_size}', 'blue', attrs=['bold'])) + print(colored(f'Checkpoint: {cfg.checkpoint}', 'blue', attrs=['bold'])) + if not cfg.multitask and ('mt80' in cfg.checkpoint or 'mt30' in cfg.checkpoint): + print(colored('Warning: single-task evaluation of multi-task models is not currently supported.', 'red', attrs=['bold'])) + print(colored('To evaluate a multi-task model, use task=mt80 or task=mt30.', 'red', attrs=['bold'])) + + # Make environment + env = make_env(cfg) + + # Load agent + agent = TDMPC2(cfg) + assert os.path.exists(cfg.checkpoint), f'Checkpoint {cfg.checkpoint} not found! Must be a valid filepath.' + agent.load(cfg.checkpoint) + + # Evaluate + if cfg.multitask: + print(colored(f'Evaluating agent on {len(cfg.tasks)} tasks:', 'yellow', attrs=['bold'])) + else: + print(colored(f'Evaluating agent on {cfg.task}:', 'yellow', attrs=['bold'])) + if cfg.save_video: + video_dir = os.path.join(cfg.work_dir, 'videos') + os.makedirs(video_dir, exist_ok=True) + scores = [] + tasks = cfg.tasks if cfg.multitask else [cfg.task] + for task_idx, task in enumerate(tasks): + if not cfg.multitask: + task_idx = None + ep_rewards, ep_successes = [], [] + for i in range(cfg.eval_episodes): + obs, done, ep_reward, t = env.reset(task_idx=task_idx), False, 0, 0 + if cfg.save_video: + frames = [env.render()] + while not done: + action = agent.act(obs, t0=t==0, task=task_idx) + obs, reward, done, info = env.step(action) + ep_reward += reward + t += 1 + if cfg.save_video: + frames.append(env.render()) + ep_rewards.append(ep_reward) + ep_successes.append(info['success']) + if cfg.save_video: + imageio.mimsave( + os.path.join(video_dir, f'{task}-{i}.mp4'), frames, fps=15) + ep_rewards = np.mean(ep_rewards) + ep_successes = np.mean(ep_successes) + if cfg.multitask: + scores.append(ep_successes*100 if task.startswith('mw-') else ep_rewards/10) + print(colored(f' {task:<22}' \ + f'\tR: {ep_rewards:.01f} ' \ + f'\tS: {ep_successes:.02f}', 'yellow')) + if cfg.multitask: + print(colored(f'Normalized score: {np.mean(scores):.02f}', 'yellow', attrs=['bold'])) + + +if __name__ == '__main__': + evaluate() diff --git a/tdmpc2/tdmpc2.py b/tdmpc2/tdmpc2.py new file mode 100755 index 0000000..9ee3ff5 --- /dev/null +++ b/tdmpc2/tdmpc2.py @@ -0,0 +1,286 @@ +import numpy as np +import torch +import torch.nn.functional as F + +from common import math +from common.scale import RunningScale +from common.world_model import WorldModel + + +class TDMPC2: + """ + TD-MPC2 agent. Implements training + inference. + Can be used for both single-task and multi-task experiments. + """ + + def __init__(self, cfg): + self.cfg = cfg + self.device = torch.device('cuda') + self.model = WorldModel(cfg).to(self.device) + self.optim = torch.optim.Adam([ + {'params': self.model._encoder.parameters(), 'lr': self.cfg.lr*self.cfg.enc_lr_scale}, + {'params': self.model._dynamics.parameters()}, + {'params': self.model._reward.parameters()}, + {'params': self.model._Qs.parameters()}, + {'params': self.model._task_emb.parameters() if self.cfg.multitask else []} + ], lr=self.cfg.lr) + self.pi_optim = torch.optim.Adam(self.model._pi.parameters(), lr=self.cfg.lr, eps=1e-5) + self.model.eval() + self.scale = RunningScale(cfg) + self.cfg.iterations += 2*int(cfg.action_dim >= 20) # Heuristic for large action spaces + self.discount = torch.tensor( + [self._get_discount(ep_len) for ep_len in cfg.episode_lengths], device='cuda' + ) if self.cfg.multitask else self._get_discount(cfg.episode_length) + + def _get_discount(self, episode_length): + """ + Returns discount factor for a given episode length. + Simple heuristic that scales discount linearly with episode length. + Default values should work well for most tasks, but can be changed as needed. + + Args: + episode_length (int): Length of the episode. Assumes episodes are of fixed length. + + Returns: + float: Discount factor for the task. + """ + frac = episode_length/self.cfg.discount_denom + return min(max((frac-1)/(frac), self.cfg.discount_min), self.cfg.discount_max) + + def save(self, fp): + """ + Save state dict of the agent to filepath. + + Args: + fp (str): Filepath to save state dict to. + """ + torch.save({"model": self.model.state_dict()}, fp) + + def load(self, fp): + """ + Load a saved state dict from filepath (or dictionary) into current agent. + + Args: + fp (str or dict): Filepath or state dict to load. + """ + state_dict = fp if isinstance(fp, dict) else torch.load(fp) + self.model.load_state_dict(state_dict["model"]) + + @torch.no_grad() + def act(self, obs, t0=False, eval_mode=False, task=None): + """ + Select an action by planning in the latent space of the world model. + + Args: + obs (torch.Tensor): Observation from the environment. + t0 (bool): Whether this is the first observation in the episode. + eval_mode (bool): Whether to use the mean of the action distribution. + task (int): Task index (only used for multi-task experiments). + + Returns: + torch.Tensor: Action to take in the environment. + """ + obs = obs.to(self.device, non_blocking=True).unsqueeze(0) + if task is not None: + task = torch.tensor([task], device=self.device) + z = self.model.encode(obs, task) + a = self.plan(z, t0=t0, eval_mode=eval_mode, task=task) + return a.cpu() + + @torch.no_grad() + def _estimate_value(self, z, actions, task): + """Estimate value of a trajectory starting at latent state z and executing given actions.""" + G, discount = 0, 1 + for t in range(self.cfg.horizon): + reward = math.two_hot_inv(self.model.reward(z, actions[t], task), self.cfg) + z = self.model.next(z, actions[t], task) + G += discount * reward + discount *= self.discount[torch.tensor(task)] if self.cfg.multitask else self.discount + return G + discount * self.model.Q(z, self.model.pi(z, task)[1], task, return_type='avg') + + @torch.no_grad() + def plan(self, z, t0=False, eval_mode=False, task=None): + """ + Plan a sequence of actions using the learned world model. + + Args: + z (torch.Tensor): Latent state from which to plan. + t0 (bool): Whether this is the first observation in the episode. + eval_mode (bool): Whether to use the mean of the action distribution. + task (Torch.Tensor): Task index (only used for multi-task experiments). + + Returns: + torch.Tensor: Action to take in the environment. + """ + # Sample policy trajectories + if self.cfg.num_pi_trajs > 0: + pi_actions = torch.empty(self.cfg.horizon, self.cfg.num_pi_trajs, self.cfg.action_dim, device=self.device) + _z = z.repeat(self.cfg.num_pi_trajs, 1) + for t in range(self.cfg.horizon-1): + pi_actions[t] = self.model.pi(_z, task)[1] + _z = self.model.next(_z, pi_actions[t], task) + pi_actions[-1] = self.model.pi(_z, task)[1] + + # Initialize state and parameters + z = z.repeat(self.cfg.num_samples, 1) + mean = torch.zeros(self.cfg.horizon, self.cfg.action_dim, device=self.device) + std = self.cfg.max_std*torch.ones(self.cfg.horizon, self.cfg.action_dim, device=self.device) + if not t0: + mean[:-1] = self._prev_mean[1:] + actions = torch.empty(self.cfg.horizon, self.cfg.num_samples, self.cfg.action_dim, device=self.device) + if self.cfg.num_pi_trajs > 0: + actions[:, :self.cfg.num_pi_trajs] = pi_actions + + # Iterate MPPI + for i in range(self.cfg.iterations): + + # Sample actions + actions[:, self.cfg.num_pi_trajs:] = (mean.unsqueeze(1) + std.unsqueeze(1) * \ + torch.randn(self.cfg.horizon, self.cfg.num_samples-self.cfg.num_pi_trajs, self.cfg.action_dim, device=std.device)) \ + .clamp(-1, 1) + if self.cfg.multitask: + actions = actions * self.model._action_masks[task] + + # Compute elite actions + value = self._estimate_value(z, actions, task).nan_to_num_(0) + elite_idxs = torch.topk(value.squeeze(1), self.cfg.num_elites, dim=0).indices + elite_value, elite_actions = value[elite_idxs], actions[:, elite_idxs] + + # Update parameters + max_value = elite_value.max(0)[0] + score = torch.exp(self.cfg.temperature*(elite_value - max_value)) + score /= score.sum(0) + mean = torch.sum(score.unsqueeze(0) * elite_actions, dim=1) / (score.sum(0) + 1e-9) + std = torch.sqrt(torch.sum(score.unsqueeze(0) * (elite_actions - mean.unsqueeze(1)) ** 2, dim=1) / (score.sum(0) + 1e-9)) \ + .clamp_(self.cfg.min_std, self.cfg.max_std) + if self.cfg.multitask: + mean = mean * self.model._action_masks[task] + std = std * self.model._action_masks[task] + + # Select action + score = score.squeeze(1).cpu().numpy() + actions = elite_actions[:, np.random.choice(np.arange(score.shape[0]), p=score)] + self._prev_mean = mean + a, std = actions[0], std[0] + if not eval_mode: + a += std * torch.randn(self.cfg.action_dim, device=std.device) + return a.clamp_(-1, 1) + + def update_pi(self, zs, task): + """ + Update policy using a sequence of latent states. + + Args: + zs (torch.Tensor): Sequence of latent states. + task (torch.Tensor): Task index (only used for multi-task experiments). + + Returns: + float: Loss of the policy update. + """ + self.pi_optim.zero_grad(set_to_none=True) + self.model.track_q_grad(False) + _, pis, log_pis, _ = self.model.pi(zs, task) + qs = self.model.Q(zs, pis, task, return_type='avg') + self.scale.update(qs[0]) + qs = self.scale(qs) + + # Loss is a weighted sum of Q-values + rho = torch.pow(self.cfg.rho, torch.arange(len(qs), device=self.device)) + pi_loss = ((self.cfg.entropy_coef * log_pis - qs).mean(dim=(1,2)) * rho).mean() + pi_loss.backward() + torch.nn.utils.clip_grad_norm_(self.model._pi.parameters(), self.cfg.grad_clip_norm) + self.pi_optim.step() + self.model.track_q_grad(True) + + return pi_loss.item() + + @torch.no_grad() + def _td_target(self, next_z, reward, task): + """ + Compute the TD-target from a reward and the observation at the following time step. + + Args: + next_z (torch.Tensor): Latent state at the following time step. + reward (torch.Tensor): Reward at the current time step. + task (torch.Tensor): Task index (only used for multi-task experiments). + + Returns: + torch.Tensor: TD-target. + """ + pi = self.model.pi(next_z, task)[1] + discount = self.discount[task].unsqueeze(-1) if self.cfg.multitask else self.discount + return reward + discount * self.model.Q(next_z, pi, task, return_type='min', target=True) + + def update(self, buffer): + """ + Main update function. Corresponds to one iteration of model learning. + + Args: + buffer (common.buffer.Buffer): Replay buffer. + + Returns: + dict: Dictionary of training statistics. + """ + obs, action, reward, task = buffer.sample() + + # Compute targets + with torch.no_grad(): + next_z = self.model.encode(obs[1:], task) + td_targets = self._td_target(next_z, reward, task) + + # Prepare for update + self.optim.zero_grad(set_to_none=True) + self.model.train() + + # Latent rollout + zs = torch.empty(self.cfg.horizon+1, self.cfg.batch_size, self.cfg.latent_dim, device=self.device) + z = self.model.encode(obs[0], task) + zs[0] = z + consistency_loss = 0 + for t in range(self.cfg.horizon): + z = self.model.next(z, action[t], task) + consistency_loss += F.mse_loss(z, next_z[t]) * self.cfg.rho**t + zs[t+1] = z + + # Predictions + _zs = zs[:-1] + qs = self.model.Q(_zs, action, task, return_type='all') + reward_preds = self.model.reward(_zs, action, task) + + # Compute losses + reward_loss, value_loss = 0, 0 + for t in range(self.cfg.horizon): + reward_loss += math.soft_ce(reward_preds[t], reward[t], self.cfg).mean() * self.cfg.rho**t + for q in range(self.cfg.num_q): + value_loss += math.soft_ce(qs[q][t], td_targets[t], self.cfg).mean() * self.cfg.rho**t + consistency_loss *= (1/self.cfg.horizon) + reward_loss *= (1/self.cfg.horizon) + value_loss *= (1/(self.cfg.horizon * self.cfg.num_q)) + total_loss = ( + self.cfg.consistency_coef * consistency_loss + + self.cfg.reward_coef * reward_loss + + self.cfg.value_coef * value_loss + ) + + # Update model + total_loss.backward() + grad_norm = torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.cfg.grad_clip_norm) + self.optim.step() + + # Update policy + pi_loss = self.update_pi(zs.detach(), task) + + # Update target Q-functions + self.model.soft_update_target_Q() + + # Return training statistics + self.model.eval() + return { + "consistency_loss": float(consistency_loss.mean().item()), + "reward_loss": float(reward_loss.mean().item()), + "value_loss": float(value_loss.mean().item()), + "pi_loss": pi_loss, + "total_loss": float(total_loss.mean().item()), + "grad_norm": float(grad_norm), + "pi_scale": float(self.scale.value), + } diff --git a/tdmpc2/train.py b/tdmpc2/train.py new file mode 100755 index 0000000..a35c11b --- /dev/null +++ b/tdmpc2/train.py @@ -0,0 +1,61 @@ +import os +os.environ['MUJOCO_GL'] = 'egl' +import warnings +warnings.filterwarnings('ignore') +import torch + +import hydra +from termcolor import colored + +from common.parser import parse_cfg +from common.seed import set_seed +from common.buffer import Buffer +from envs import make_env +from tdmpc2 import TDMPC2 +from trainer.offline_trainer import OfflineTrainer +from trainer.online_trainer import OnlineTrainer +from common.logger import Logger + +torch.backends.cudnn.benchmark = True + + +@hydra.main(config_name='config', config_path='.') +def train(cfg: dict): + """ + Script for training single-task / multi-task TD-MPC2 agents. + + Most relevant args: + `task`: task name (or mt30/mt80 for multi-task training) + `model_size`: model size, must be one of `[1, 5, 19, 48, 317]` (default: 5) + `steps`: number of training/environment steps (default: 10M) + `seed`: random seed (default: 1) + + See config.yaml for a full list of args. + + Example usage: + ``` + $ python train.py task=mt80 model_size=48 + $ python train.py task=mt30 model_size=317 + $ python train.py task=dog-run steps=7000000 + ``` + """ + assert torch.cuda.is_available() + assert cfg.steps > 0, 'Must train for at least 1 step.' + cfg = parse_cfg(cfg) + set_seed(cfg.seed) + print(colored('Work dir:', 'yellow', attrs=['bold']), cfg.work_dir) + + trainer_cls = OfflineTrainer if cfg.multitask else OnlineTrainer + trainer = trainer_cls( + cfg=cfg, + env=make_env(cfg), + agent=TDMPC2(cfg), + buffer=Buffer(cfg), + logger=Logger(cfg), + ) + trainer.train() + print('\nTraining completed successfully') + + +if __name__ == '__main__': + train() diff --git a/tdmpc2/trainer/__init__.py b/tdmpc2/trainer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tdmpc2/trainer/base.py b/tdmpc2/trainer/base.py new file mode 100755 index 0000000..aaf1a39 --- /dev/null +++ b/tdmpc2/trainer/base.py @@ -0,0 +1,19 @@ +class Trainer: + """Base trainer class for TD-MPC2.""" + + def __init__(self, cfg, env, agent, buffer, logger): + self.cfg = cfg + self.env = env + self.agent = agent + self.buffer = buffer + self.logger = logger + print("Learnable parameters: {:,}".format(self.agent.model.total_params)) + print('Architecture:', self.agent.model) + + def eval(self): + """Evaluate a TD-MPC2 agent.""" + raise NotImplementedError + + def train(self): + """Train a TD-MPC2 agent.""" + raise NotImplementedError diff --git a/tdmpc2/trainer/offline_trainer.py b/tdmpc2/trainer/offline_trainer.py new file mode 100755 index 0000000..9ed4fd4 --- /dev/null +++ b/tdmpc2/trainer/offline_trainer.py @@ -0,0 +1,92 @@ +import os +from copy import deepcopy +from time import time +from pathlib import Path +from glob import glob + +import numpy as np +import torch +from tqdm import tqdm + +from common.buffer import Buffer +from trainer.base import Trainer + + +class OfflineTrainer(Trainer): + """Trainer class for multi-task offline TD-MPC2 training.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._start_time = time() + + def eval(self): + """Evaluate a TD-MPC2 agent.""" + results = dict() + for task_idx in tqdm(range(len(self.cfg.tasks)), desc='Evaluating'): + ep_rewards, ep_successes = [], [] + for _ in range(self.cfg.eval_episodes): + obs, done, ep_reward, t = self.env.reset(task_idx), False, 0, 0 + while not done: + action = self.agent.act(obs, t0=t==0, eval_mode=True, task=task_idx) + obs, reward, done, info = self.env.step(action) + ep_reward += reward + t += 1 + ep_rewards.append(ep_reward) + ep_successes.append(info['success']) + results.update({ + f'episode_reward+{self.cfg.tasks[task_idx]}': np.nanmean(ep_rewards), + f'episode_success+{self.cfg.tasks[task_idx]}': np.nanmean(ep_successes),}) + return results + + def train(self): + """Train a TD-MPC2 agent.""" + assert self.cfg.multitask and self.cfg.task in {'mt30', 'mt80'}, \ + 'Offline training only supports multitask training with mt30 or mt80 task sets.' + + # Load data + assert self.cfg.task in self.cfg.data_dir, \ + f'Expected data directory {self.cfg.data_dir} to contain {self.cfg.task}, ' \ + f'please double-check your config.' + fp = Path(os.path.join(self.cfg.data_dir, '*.pt')) + fps = sorted(glob(str(fp))) + assert len(fps) > 0, f'No data found at {fp}' + print(f'Found {len(fps)} files in {fp}') + + # Create buffer for sampling + _cfg = deepcopy(self.cfg) + _cfg.episode_length = 101 if self.cfg.task == 'mt80' else 501 + _cfg.buffer_size = 550_450_000 if self.cfg.task == 'mt80' else 345_690_000 + _cfg.steps = _cfg.buffer_size + self.buffer = Buffer(_cfg) + for fp in tqdm(fps, desc='Loading data'): + td = torch.load(fp) + assert td.shape[1] == _cfg.episode_length, \ + f'Expected episode length {td.shape[1]} to match config episode length {_cfg.episode_length}, ' \ + f'please double-check your config.' + for i in range(len(td)): + self.buffer.add(td[i]) + assert self.buffer.num_eps == self.buffer.capacity, \ + f'Buffer has {self.buffer.num_eps} episodes, expected {self.buffer.capacity} episodes.' + + print(f'Training agent for {self.cfg.steps} iterations...') + metrics = {} + for i in range(self.cfg.steps): + + # Update agent + train_metrics = self.agent.update(self.buffer) + + # Evaluate agent periodically + if i % self.cfg.eval_freq == 0 or i == 10_000: + metrics = { + 'iteration': i, + 'total_time': time() - self._start_time, + } + metrics.update(train_metrics) + if i % self.cfg.eval_freq == 0: + metrics.update(self.eval()) + self.logger.pprint_multitask(metrics, self.cfg) + if i > 0: + self.logger.save_agent(self.agent, identifier=f'{i}') + self.logger.log(metrics, 'pretrain') + + self.logger.finish(self.agent) diff --git a/tdmpc2/trainer/online_trainer.py b/tdmpc2/trainer/online_trainer.py new file mode 100755 index 0000000..94835ca --- /dev/null +++ b/tdmpc2/trainer/online_trainer.py @@ -0,0 +1,117 @@ +from time import time + +import numpy as np +import torch +from tensordict.tensordict import TensorDict + +from trainer.base import Trainer + + +class OnlineTrainer(Trainer): + """Trainer class for single-task online TD-MPC2 training.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._step = 0 + self._ep_idx = 0 + self._start_time = time() + + def common_metrics(self): + """Return a dictionary of current metrics.""" + return dict( + step=self._step, + episode=self._ep_idx, + total_time=time() - self._start_time, + ) + + def eval(self): + """Evaluate a TD-MPC2 agent.""" + ep_rewards, ep_successes = [], [] + for i in range(self.cfg.eval_episodes): + obs, done, ep_reward, t = self.env.reset(), False, 0, 0 + if self.cfg.save_video: + self.logger.video.init(self.env, enabled=(i==0)) + while not done: + action = self.agent.act(obs, t0=t==0, eval_mode=True) + obs, reward, done, info = self.env.step(action) + ep_reward += reward + t += 1 + if self.cfg.save_video: + self.logger.video.record(self.env) + ep_rewards.append(ep_reward) + ep_successes.append(info['success']) + if self.cfg.save_video: + self.logger.video.save(self._step) + return dict( + episode_reward=np.nanmean(ep_rewards), + episode_success=np.nanmean(ep_successes), + ) + + def to_td(self, obs, action=None, reward=None): + """Creates a TensorDict for a new episode.""" + if isinstance(obs, dict): + obs = TensorDict({k: v.unsqueeze(0) for k,v in obs.items()}, batch_size=(1,)).cpu() + else: + obs = obs.unsqueeze(0).cpu() + if action is None: + action = torch.empty_like(self.env.rand_act()) + if reward is None: + reward = torch.tensor(float('nan')) + td = TensorDict(dict( + obs=obs, + action=action.unsqueeze(0), + reward=reward.unsqueeze(0), + ), batch_size=(1,)) + return td + + def train(self): + """Train a TD-MPC2 agent.""" + train_metrics, done, eval_next = {}, True, True + while self._step <= self.cfg.steps: + + # Evaluate agent periodically + if self._step % self.cfg.eval_freq == 0: + eval_next = True + + # Reset environment + if done: + if eval_next: + eval_metrics = self.eval() + eval_metrics.update(self.common_metrics()) + self.logger.log(eval_metrics, 'eval') + eval_next = False + + if self._step > 0: + train_metrics.update( + episode_reward=torch.tensor([td['reward'] for td in self._tds[1:]]).sum(), + episode_success=info['success'], + ) + train_metrics.update(self.common_metrics()) + self.logger.log(train_metrics, 'train') + self._ep_idx = self.buffer.add(torch.cat(self._tds)) + + obs = self.env.reset() + self._tds = [self.to_td(obs)] + + # Collect experience + if self._step > self.cfg.seed_steps: + action = self.agent.act(obs, t0=len(self._tds)==1) + else: + action = self.env.rand_act() + obs, reward, done, info = self.env.step(action) + self._tds.append(self.to_td(obs, action, reward)) + + # Update agent + if self._step >= self.cfg.seed_steps: + if self._step == self.cfg.seed_steps: + num_updates = self.cfg.seed_steps + print('Pretraining agent on seed data...') + else: + num_updates = 1 + for _ in range(num_updates): + _train_metrics = self.agent.update(self.buffer) + train_metrics.update(_train_metrics) + + self._step += 1 + + self.logger.finish(self.agent)