added minecraft environment

2023-07-02 11:29:48 +09:00
parent 8fa2274cfc
commit 036e9a8028
6 changed files with 543 additions and 5 deletions
--- a/envs/minecraft.py
+++ b/envs/minecraft.py
@@ -0,0 +1,154 @@
+import numpy as np
+from . import minecraft_base
+
+import gym
+
+def make_env(task, *args, **kwargs):
+    return {
+        'wood': MinecraftWood,
+        'climb': MinecraftClimb,
+        'diamond': MinecraftDiamond,
+        }[task](*args, **kwargs)
+
+
+class MinecraftWood:
+
+  def __init__(self, *args, **kwargs):
+    actions = BASIC_ACTIONS
+    self.rewards = [
+        CollectReward('log', repeated=1),
+        HealthReward(),
+    ]
+    env = minecraft_base.MinecraftBase(actions, *args, **kwargs)
+
+  def step(self, action):
+    obs, reward, done, info = self.env.step(action)
+    reward = sum([fn(obs, self.env.inventory) for fn in self.rewards])
+    obs['reward'] = reward
+    return obs, reward, done, info
+
+
+class MinecraftClimb:
+
+  def __init__(self, *args, **kwargs):
+    actions = BASIC_ACTIONS
+    env = minecraft_base.MinecraftBase(actions, *args, **kwargs)
+    self._previous = None
+    self._health_reward = HealthReward()
+
+  def step(self, action):
+    obs, reward, done, info = self.env.step(action)
+    x, y, z = obs['log_player_pos']
+    height = np.float32(y)
+    if obs['is_first']:
+      self._previous = height
+    reward = height - self._previous
+    reward += self._health_reward(obs)
+    obs['reward'] = reward
+    self._previous = height
+    return obs, reward, done, info
+
+
+class MinecraftDiamond(gym.Wrapper):
+
+  def __init__(self, *args, **kwargs):
+    actions = {
+        **BASIC_ACTIONS,
+        'craft_planks': dict(craft='planks'),
+        'craft_stick': dict(craft='stick'),
+        'craft_crafting_table': dict(craft='crafting_table'),
+        'place_crafting_table': dict(place='crafting_table'),
+        'craft_wooden_pickaxe': dict(nearbyCraft='wooden_pickaxe'),
+        'craft_stone_pickaxe': dict(nearbyCraft='stone_pickaxe'),
+        'craft_iron_pickaxe': dict(nearbyCraft='iron_pickaxe'),
+        'equip_stone_pickaxe': dict(equip='stone_pickaxe'),
+        'equip_wooden_pickaxe': dict(equip='wooden_pickaxe'),
+        'equip_iron_pickaxe': dict(equip='iron_pickaxe'),
+        'craft_furnace': dict(nearbyCraft='furnace'),
+        'place_furnace': dict(place='furnace'),
+        'smelt_iron_ingot': dict(nearbySmelt='iron_ingot'),
+    }
+    self.rewards = [
+        CollectReward('log', once=1),
+        CollectReward('planks', once=1),
+        CollectReward('stick', once=1),
+        CollectReward('crafting_table', once=1),
+        CollectReward('wooden_pickaxe', once=1),
+        CollectReward('cobblestone', once=1),
+        CollectReward('stone_pickaxe', once=1),
+        CollectReward('iron_ore', once=1),
+        CollectReward('furnace', once=1),
+        CollectReward('iron_ingot', once=1),
+        CollectReward('iron_pickaxe', once=1),
+        CollectReward('diamond', once=1),
+        HealthReward(),
+    ]
+    env = minecraft_base.MinecraftBase(actions, *args, **kwargs)
+    super().__init__(env)
+
+  def step(self, action):
+    obs, reward, done, info  = self.env.step(action)
+    reward = sum([fn(obs, self.env.inventory) for fn in self.rewards])
+    obs['reward'] = reward
+    return obs, reward, done, info
+
+  def reset(self):
+    obs = self.env.reset()
+    # called for reset of reward calculations
+    _ = sum([fn(obs, self.env.inventory) for fn in self.rewards])
+    return obs
+
+
+class CollectReward:
+
+  def __init__(self, item, once=0, repeated=0):
+    self.item = item
+    self.once = once
+    self.repeated = repeated
+    self.previous = 0
+    self.maximum = 0
+
+  def __call__(self, obs, inventory):
+    current = inventory[self.item]
+    if obs['is_first']:
+      self.previous = current
+      self.maximum = current
+      return 0
+    reward = self.repeated * max(0, current - self.previous)
+    if self.maximum == 0 and current > 0:
+      reward += self.once
+    self.previous = current
+    self.maximum = max(self.maximum, current)
+    return reward
+
+
+class HealthReward:
+
+  def __init__(self, scale=0.01):
+    self.scale = scale
+    self.previous = None
+
+  def __call__(self, obs, inventory=None):
+    health = obs['health']
+    if obs['is_first']:
+      self.previous = health
+      return 0
+    reward = self.scale * (health - self.previous)
+    self.previous = health
+    return np.float32(reward)
+
+
+BASIC_ACTIONS = {
+    'noop': dict(),
+    'attack': dict(attack=1),
+    'turn_up': dict(camera=(-15, 0)),
+    'turn_down': dict(camera=(15, 0)),
+    'turn_left': dict(camera=(0, -15)),
+    'turn_right': dict(camera=(0, 15)),
+    'forward': dict(forward=1),
+    'back': dict(back=1),
+    'left': dict(left=1),
+    'right': dict(right=1),
+    'jump': dict(jump=1, forward=1),
+    'place_dirt': dict(place='dirt'),
+}