Initial Commit (tested training, testing, and TRT conversion)

This commit is contained in:
Lu Junjie
2024-10-20 17:01:07 +08:00
parent 86d2f311f8
commit 5738088bae
221 changed files with 59249 additions and 6 deletions

View File

View File

@@ -0,0 +1,246 @@
"""
The code is from stable_baseline3.
"""
from abc import ABC, abstractmethod
from gym import spaces
from typing import Any, Dict, Generator, List, Optional, Union, NamedTuple
from stable_baselines3.common.vec_env import VecNormalize
import torch as th
import numpy as np
import warnings
from stable_baselines3.common.type_aliases import (
ReplayBufferSamples,
RolloutBufferSamples,
)
try:
# Check memory used by replay buffer when possible
import psutil
except ImportError:
psutil = None
class BaseBuffer(ABC):
"""
Base class that represent a buffer (rollout or replay)
:param buffer_size: Max number of element in the buffer
:param observation_dim: Observation space
:param action_space: Action space
:param device: PyTorch device
to which the values will be converted
:param n_envs: Number of parallel environments
"""
def __init__(
self,
buffer_size: int,
observation_dim: int,
device: Union[th.device, str] = "cpu",
n_envs: int = 1,
):
super(BaseBuffer, self).__init__()
self.buffer_size = buffer_size
self.observation_dim = observation_dim
self.pos = 0
self.full = False
self.device = device
self.n_envs = n_envs
@staticmethod
def swap_and_flatten(arr: np.ndarray) -> np.ndarray:
"""
Swap and then flatten axes 0 (buffer_size) and 1 (n_envs)
to convert shape from [n_steps, n_envs, ...] (when ... is the shape of the features)
to [n_steps * n_envs, ...] (which maintain the order)
:param arr:
:return:
"""
shape = arr.shape
if len(shape) < 3:
shape = shape + (1,)
return arr.swapaxes(0, 1).reshape(shape[0] * shape[1], *shape[2:])
def size(self) -> int:
"""
:return: The current size of the buffer
"""
if self.full:
return self.buffer_size
return self.pos
def add(self, *args, **kwargs) -> None:
"""
Add elements to the buffer.
"""
raise NotImplementedError()
def extend(self, *args, **kwargs) -> None:
"""
Add a new batch of transitions to the buffer
"""
# Do a for loop along the batch axis
for data in zip(*args):
self.add(*data)
def reset(self) -> None:
"""
Reset the buffer.
"""
self.pos = 0
self.full = False
def sample(self, batch_size: int, env: Optional[VecNormalize] = None):
"""
:param batch_size: Number of element to sample
:param env: associated gym VecEnv
to normalize the observations/rewards when sampling
:return:
"""
upper_bound = self.buffer_size if self.full else self.pos
batch_inds = np.random.randint(0, upper_bound, size=batch_size)
return self._get_samples(batch_inds, env=env)
@abstractmethod
def _get_samples(
self, batch_inds: np.ndarray, env: Optional[VecNormalize] = None
) -> Union[ReplayBufferSamples, RolloutBufferSamples]:
"""
:param batch_inds:
:param env:
:return:
"""
raise NotImplementedError()
def to_torch(self, array: np.ndarray, copy: bool = True) -> th.Tensor:
"""
Convert a numpy array to a PyTorch tensor.
Note: it copies the data by default
:param array:
:param copy: Whether to copy or not the data
(may be useful to avoid changing things be reference)
:return:
"""
if copy:
return th.tensor(array).to(self.device)
return th.as_tensor(array).to(self.device)
class ReplayBufferSamples(NamedTuple):
observations: th.Tensor
goals: th.Tensor
depths: th.Tensor
map_id: th.Tensor
class ReplayBuffer(BaseBuffer):
"""
self.observations
self.goals
self.depths
self.map_ids
"""
def __init__(
self,
buffer_size: int,
observation_dim: spaces.Space,
image_WxH: tuple,
device: Union[th.device, str] = "cpu",
n_envs: int = 1,
optimize_memory_usage: bool = False,
handle_timeout_termination: bool = True,
):
super(ReplayBuffer, self).__init__(buffer_size, observation_dim, device, n_envs=n_envs)
# Adjust buffer size
self.buffer_size = max(buffer_size // n_envs, 1)
# Check that the replay buffer can fit into the memory
if psutil is not None:
mem_available = psutil.virtual_memory().available
self.optimize_memory_usage = optimize_memory_usage
self.observations = np.zeros((self.buffer_size, self.n_envs) + observation_dim, dtype=np.float32)
self.goals = np.zeros((self.buffer_size, self.n_envs, 3), dtype=np.float32)
self.depths = np.zeros((self.buffer_size, self.n_envs, 1, image_WxH[1], image_WxH[0]), dtype=np.float32)
self.map_ids = np.zeros((self.buffer_size, self.n_envs, 1), dtype=np.float32)
# Handle timeouts termination properly if needed
# see https://github.com/DLR-RM/stable-baselines3/issues/284
self.handle_timeout_termination = handle_timeout_termination
self.timeouts = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
if psutil is not None:
total_memory_usage = self.observations.nbytes + self.goals.nbytes + self.depths.nbytes + self.map_ids.nbytes
if total_memory_usage > mem_available:
# Convert to GB
total_memory_usage /= 1e9
mem_available /= 1e9
warnings.warn(
"This system does not have apparently enough memory to store the complete "
f"replay buffer {total_memory_usage:.2f}GB > {mem_available:.2f}GB"
)
def add(
self,
obs: np.ndarray,
goal: np.ndarray,
depth: np.ndarray,
map_id: int,
infos: List[Dict[str, Any]],
) -> None:
# TODO: 删了obs的格式调整检查下还能不能正常放
# Copy to avoid modification by reference
self.observations[self.pos] = np.array(obs).copy()
self.goals[self.pos] = np.array(goal).copy()
self.depths[self.pos] = np.array(depth).copy()
self.map_ids[self.pos] = np.array(map_id).copy()
if self.handle_timeout_termination:
self.timeouts[self.pos] = np.array([info.get("TimeLimit.truncated", False) for info in infos])
self.pos += 1
if self.pos == self.buffer_size:
self.full = True
self.pos = 0
def sample(self, batch_size: int, env: Optional[VecNormalize] = None) -> ReplayBufferSamples:
"""
Sample elements from the replay buffer.
Custom sampling when using memory efficient variant,
as we should not sample the element with index `self.pos`
See https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274
:param batch_size: Number of element to sample
:param env: associated gym VecEnv
to normalize the observations/rewards when sampling
:return:
"""
if not self.optimize_memory_usage:
return super().sample(batch_size=batch_size, env=env)
# Do not sample the element with index `self.pos` as the transitions is invalid
# (we use only one array to store `obs` and `next_obs`)
if self.full:
batch_inds = (np.random.randint(1, self.buffer_size, size=batch_size) + self.pos) % self.buffer_size
else:
batch_inds = np.random.randint(0, self.pos, size=batch_size)
return self._get_samples(batch_inds, env=env)
def _get_samples(self, batch_inds: np.ndarray, env: Optional[VecNormalize] = None) -> ReplayBufferSamples:
env_indices = np.random.randint(0, high=self.n_envs, size=(len(batch_inds),))
data = (
self.observations[batch_inds, env_indices, :],
self.goals[batch_inds, env_indices, :],
self.depths[batch_inds, env_indices, :],
self.map_ids[batch_inds, env_indices, :],
)
return ReplayBufferSamples(*data)

View File

@@ -0,0 +1,106 @@
import os
import cv2
import numpy as np
from torch.utils.data import Dataset, DataLoader
from ruamel.yaml import YAML
import time
from scipy.spatial.transform import Rotation as R
class YopoDataset(Dataset):
def __init__(self):
super(YopoDataset, self).__init__()
cfg = YAML().load(open(os.environ["FLIGHTMARE_PATH"] + "/flightlib/configs/traj_opt.yaml", 'r'))
scale = 32 # 神经网络下采样倍数
self.height = scale * cfg["vertical_num"]
self.width = scale * cfg["horizon_num"]
multiple_ = 0.5 * cfg["vel_max"]
# The x-direction follows a log-normal distribution,
# while the yz-direction follows a normal distribution with a mean of 0.
self.v_max = cfg["vel_max"]
v_des = multiple_ * cfg["vx_mean_unit"]
self.vx_lognorm_mean = np.log(self.v_max - v_des)
self.vx_logmorm_sigma = np.log(np.sqrt(v_des))
self.v_mean = multiple_ * np.array([cfg["vx_mean_unit"], cfg["vy_mean_unit"], cfg["vz_mean_unit"]])
self.v_var = multiple_ * multiple_ * np.array([cfg["vx_var_unit"], cfg["vy_var_unit"], cfg["vz_var_unit"]])
self.a_mean = multiple_ * multiple_ * np.array([cfg["ax_mean_unit"], cfg["ay_mean_unit"], cfg["az_mean_unit"]])
self.a_var = multiple_ * multiple_ * multiple_ * multiple_ * np.array([cfg["ax_var_unit"], cfg["ay_var_unit"], cfg["az_var_unit"]])
print("Loading dataset, it may take a while...")
data_cfg = YAML().load(open(os.environ["FLIGHTMARE_PATH"] + "/flightlib/configs/vec_env.yaml", 'r'))
data_dir = os.environ["FLIGHTMARE_PATH"] + data_cfg["env"]["dataset_path"]
self.img_list = []
self.map_idx = []
self.positions = np.empty((0, 3))
self.quaternions = np.empty((0, 4))
subfolders = [f.path for f in os.scandir(data_dir) if f.is_dir()]
subfolders.sort(key=lambda x: os.path.basename(x).lower())
for i in range(len(subfolders)):
img_dir = subfolders[i]
file_names = [filename
for filename in os.listdir(img_dir)
if os.path.splitext(filename)[1] == '.tif']
file_names.sort(key=lambda x: int(x.split('.')[0].split("_")[1])) # sort by filename
images = [cv2.imread(img_dir + "/" + filename, -1).astype(np.float32) for filename in file_names]
self.img_list.extend(images)
self.map_idx.extend([i] * len(images))
label_path = img_dir + "/label.npz"
labels = np.load(label_path)
self.positions = np.vstack((self.positions, labels["positions"]))
self.quaternions = np.vstack((self.quaternions, labels["quaternions"]))
print("Dataset loaded!")
def __len__(self):
return len(self.img_list)
def __getitem__(self, item):
if self.img_list[item].shape[-2] != self.height or self.img_list[item].shape[-1] != self.width:
self.img_list[item] = cv2.resize(self.img_list[item], (self.width, self.height)) # OpenCV and NumPy is Dif
if len(self.img_list[item].shape) == 2:
self.img_list[item] = np.expand_dims(self.img_list[item], axis=0)
vel, acc = self._get_random_state()
# generate random goal in front of the quadrotor.
q_wxyz = self.quaternions[item, :] # q: wxyz
R_WB = R.from_quat([q_wxyz[1], q_wxyz[2], q_wxyz[3], q_wxyz[0]])
euler_angles = R_WB.as_euler('ZYX', degrees=False) # [yaw(z) pitch(y) roll(x)]
R_wB = R.from_euler('ZYX', [0, euler_angles[1], euler_angles[2]], degrees=False)
goal_w = np.random.randn(3) + np.array([2, 0, 0])
goal_b = R_wB.inv().apply(goal_w)
goal_dist = np.linalg.norm(goal_b)
goal_dir = goal_b / goal_dist
random_obs = np.hstack((vel, acc, goal_dir))
return (self.img_list[item], self.positions[item, :], self.quaternions[item, :], random_obs,
self.map_idx[item]) # in body frame, vel_acc no-normalization
def _get_random_state(self):
vel = self.v_mean + np.sqrt(self.v_var) * np.random.randn(3)
acc = self.a_mean + np.sqrt(self.a_var) * np.random.randn(3)
right_skewed_vx = -1
while right_skewed_vx < 0:
right_skewed_vx = np.random.lognormal(mean=self.vx_lognorm_mean, sigma=self.vx_logmorm_sigma, size=None)
right_skewed_vx = -right_skewed_vx + self.v_max + 0.2 # +0.2 to ensure v_max can be sampled
vel[0] = right_skewed_vx
# distribution of vx is visualized in docs/distribution_of_sampled_velocity.png (v_max=6)
return vel, acc
if __name__ == '__main__':
data_loader = DataLoader(YopoDataset(), batch_size=32, shuffle=True, num_workers=4)
start = time.time()
for epoch in range(1):
last = time.time()
for i, (depth, pos, quat, obs, id) in enumerate(data_loader):
pass
end = time.time()
print("总耗时:", end - start)

View File

@@ -0,0 +1,137 @@
import numpy as np
from scipy.spatial.transform import Rotation as R
class LatticeParam():
def __init__(self, cfg):
self.vel_max = cfg["vel_max"]
segment_time = 2 * cfg["radio_range"] / self.vel_max
self.horizon_num = cfg["horizon_num"]
self.vertical_num = cfg["vertical_num"]
self.radio_num = cfg["radio_num"]
self.vel_num = cfg["vel_num"]
self.horizon_fov = cfg["horizon_camera_fov"] * (self.horizon_num - 1) / self.horizon_num
self.vertical_fov = cfg["vertical_camera_fov"] * (self.vertical_num - 1) / self.vertical_num
self.horizon_anchor_fov = cfg["horizon_anchor_fov"]
self.vertical_anchor_fov = cfg["vertical_anchor_fov"]
self.radio_range = cfg["radio_range"]
self.vel_fov = cfg["vel_fov"]
self.vel_prefile = cfg["vel_prefile"]
self.acc_max = self.vel_max / segment_time
print("---------------------")
print("| max speed = ", round(self.vel_max, 1), " |")
print("| traj time = ", round(segment_time, 1), " |")
print("| max radio = ", round(2 * self.radio_range, 1), " |")
print("---------------------")
# ID in images:
# [8, 7, 6,
# 5, 4, 3,
# 2, 1, 0]
class LatticePrimitive():
def __init__(self, LatticeParam):
self.lattice_param = LatticeParam
if self.lattice_param.horizon_num == 1:
direction_diff = 0
else:
direction_diff = (self.lattice_param.horizon_fov / 180.0 * np.pi) / (self.lattice_param.horizon_num - 1)
if self.lattice_param.vertical_num == 1:
altitude_diff = 0
else:
altitude_diff = (self.lattice_param.vertical_fov / 180.0 * np.pi) / (self.lattice_param.vertical_num - 1)
radio_diff = self.lattice_param.radio_range / self.lattice_param.radio_num
if self.lattice_param.vel_num == 1:
vel_dir_diff = 0
else:
vel_dir_diff = (self.lattice_param.vel_fov / 180.0 * np.pi) / (self.lattice_param.vel_num - 1)
lattice_pos_list = []
lattice_vel_list = []
lattice_angle_list = []
self.lattice_Rbp_list = []
# Primitives: Bottom to Top, Right to Left
# We retain the code of sampling primitives with different velocity directions and length,
# hope to predict multiple outputs in each grid like YOLO, but it does not work well.
for h in range(0, self.lattice_param.radio_num):
for i in range(0, self.lattice_param.vertical_num):
for j in range(0, self.lattice_param.horizon_num):
for k in range(0, self.lattice_param.vel_num):
search_radio = (h + 1) * radio_diff
alpha = -direction_diff * (self.lattice_param.horizon_num - 1) / 2 + j * direction_diff
beta = -altitude_diff * (self.lattice_param.vertical_num - 1) / 2 + i * altitude_diff
gamma = -vel_dir_diff * (self.lattice_param.vel_num - 1) / 2 + k * vel_dir_diff
pos_node = [np.cos(beta) * np.cos(alpha) * search_radio,
np.cos(beta) * np.sin(alpha) * search_radio,
np.sin(beta) * search_radio]
vel_node = [np.cos(alpha + gamma) * self.lattice_param.vel_prefile,
np.sin(alpha + gamma) * self.lattice_param.vel_prefile,
0.0]
lattice_pos_list.append(pos_node)
lattice_vel_list.append(vel_node)
lattice_angle_list.append([alpha, beta])
# inner rotation: yaw-pitch-roll
Rotation = R.from_euler('ZYX', [alpha, -beta, 0.0], degrees=False)
self.lattice_Rbp_list.append(Rotation.as_matrix().astype(np.float32))
self.lattice_pos_node = np.array(lattice_pos_list)
self.lattice_vel_node = np.array(lattice_vel_list)
self.lattice_angle_node = np.array(lattice_angle_list)
self.yaw_diff = 0.5 * self.lattice_param.horizon_anchor_fov / 180.0 * np.pi
self.pitch_diff = 0.5 * self.lattice_param.vertical_anchor_fov / 180.0 * np.pi
def getStateLattice(self, id):
return self.lattice_pos_node[id, :], self.lattice_vel_node[id, :]
# yaw, pitch
def getAngleLattice(self, id):
return self.lattice_angle_node[id, 0], self.lattice_angle_node[id, 1]
def getRotation(self, id):
return self.lattice_Rbp_list[id]
"""
From body to world
p_w = Rwb * p_b + t_w
"""
def rotate(q_wb, pos_b): # quat: wxzy
pos_w = np.zeros_like(pos_b)
if q_wb.ndim == 1:
Rotation_wb = R.from_quat([q_wb[1], q_wb[2], q_wb[3], q_wb[0]]) # xyzw
pos_w[:] = np.dot(Rotation_wb.as_matrix(), pos_b[:])
else:
for i in range(0, q_wb.shape[0]):
Rotation_wb = R.from_quat([q_wb[i, 1], q_wb[i, 2], q_wb[i, 3], q_wb[i, 0]]) # xyzw
pos_w[i, :] = np.dot(Rotation_wb.as_matrix(), pos_b[i, :])
return pos_w
def transform(q_wb, tw, pos_b):
pos_w = rotate(q_wb, pos_b)
return pos_w + tw
"""
From world to body
p_b = Rbw * (p_w - t_w)
"""
def rotate_inv(q_wb, pos_w): # quat: wxzy
pos_b = np.zeros_like(pos_w)
if q_wb.ndim == 1:
Rotation_bw = R.from_quat([-q_wb[1], -q_wb[2], -q_wb[3], q_wb[0]]) # xyzw
pos_b[:] = np.dot(Rotation_bw.as_matrix(), pos_w[:])
else:
for i in range(0, q_wb.shape[0]):
Rotation_bw = R.from_quat([-q_wb[i, 1], -q_wb[i, 2], -q_wb[i, 3], q_wb[i, 0]]) # xyzw
pos_b[i, :] = np.dot(Rotation_bw.as_matrix(), pos_w[i, :])
return pos_b
def transform_inv(q_wb, tw, pos_w):
pos_b = rotate_inv(q_wb, pos_w - tw)
return pos_b

392
flightpolicy/yopo/resnet.py Normal file
View File

@@ -0,0 +1,392 @@
"""
this code is from torchvision.
"""
import torch
from torch import Tensor
import torch.nn as nn
from torch.hub import load_state_dict_from_url
from typing import Type, Any, Callable, Union, List, Optional
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
'wide_resnet50_2', 'wide_resnet101_2']
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-f37072fd.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-b627a593.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-0676ba61.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-63fe2227.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-394f9c45.pth',
'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
}
def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=dilation, groups=groups, bias=False, dilation=dilation)
def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
"""1x1 convolution"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class BasicBlock(nn.Module):
expansion: int = 1
def __init__(
self,
inplanes: int,
planes: int,
stride: int = 1,
downsample: Optional[nn.Module] = None,
groups: int = 1,
base_width: int = 64,
dilation: int = 1,
norm_layer: Optional[Callable[..., nn.Module]] = None
) -> None:
super(BasicBlock, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
if groups != 1 or base_width != 64:
raise ValueError('BasicBlock only supports groups=1 and base_width=64')
if dilation > 1:
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = norm_layer(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = norm_layer(planes)
self.downsample = downsample
self.stride = stride
def forward(self, x: Tensor) -> Tensor:
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
# Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
# while original implementation places the stride at the first 1x1 convolution(self.conv1)
# according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
# This variant is also known as ResNet V1.5 and improves accuracy according to
# https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
expansion: int = 4
def __init__(
self,
inplanes: int,
planes: int,
stride: int = 1,
downsample: Optional[nn.Module] = None,
groups: int = 1,
base_width: int = 64,
dilation: int = 1,
norm_layer: Optional[Callable[..., nn.Module]] = None
) -> None:
super(Bottleneck, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes * (base_width / 64.)) * groups
# Both self.conv2 and self.downsample layers downsample the input when stride != 1
self.conv1 = conv1x1(inplanes, width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width, width, stride, groups, dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width, planes * self.expansion)
self.bn3 = norm_layer(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x: Tensor) -> Tensor:
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(
self,
block: Type[Union[BasicBlock, Bottleneck]],
layers: List[int],
num_classes: int = 1000,
zero_init_residual: bool = False,
groups: int = 1,
width_per_group: int = 64,
replace_stride_with_dilation: Optional[List[bool]] = None,
norm_layer: Optional[Callable[..., nn.Module]] = None
) -> None:
super(ResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.groups = groups
self.base_width = width_per_group
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0], stride=2)
self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
dilate=replace_stride_with_dilation[0])
self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
dilate=replace_stride_with_dilation[1])
self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
dilate=replace_stride_with_dilation[2])
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block behaves like an identity.
# This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0) # type: ignore[arg-type]
elif isinstance(m, BasicBlock):
nn.init.constant_(m.bn2.weight, 0) # type: ignore[arg-type]
def _make_layer(self, block: Type[Union[BasicBlock, Bottleneck]], planes: int, blocks: int,
stride: int = 1, dilate: bool = False) -> nn.Sequential:
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
layers.append(block(self.inplanes, planes, groups=self.groups,
base_width=self.base_width, dilation=self.dilation,
norm_layer=norm_layer))
return nn.Sequential(*layers)
def _forward_impl(self, x: Tensor) -> Tensor:
# See note [TorchScript super()]
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
# x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
# x = torch.flatten(x, 1)
x = self.fc(x)
return x
def forward(self, x: Tensor) -> Tensor:
return self._forward_impl(x)
def _resnet(
arch: str,
block: Type[Union[BasicBlock, Bottleneck]],
layers: List[int],
pretrained: bool,
progress: bool,
**kwargs: Any
) -> ResNet:
model = ResNet(block, layers, **kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls[arch],
progress=progress)
model.load_state_dict(state_dict)
return model
def resnet18(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
r"""ResNet-18 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
**kwargs)
def resnet34(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
r"""ResNet-34 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
**kwargs)
def resnet50(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
r"""ResNet-50 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
**kwargs)
def resnet101(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
r"""ResNet-101 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
**kwargs)
def resnet152(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
r"""ResNet-152 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
**kwargs)
def resnext50_32x4d(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
r"""ResNeXt-50 32x4d model from
`"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['groups'] = 32
kwargs['width_per_group'] = 4
return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
pretrained, progress, **kwargs)
def resnext101_32x8d(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
r"""ResNeXt-101 32x8d model from
`"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['groups'] = 32
kwargs['width_per_group'] = 8
return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
pretrained, progress, **kwargs)
def wide_resnet50_2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
r"""Wide ResNet-50-2 model from
`"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_.
The model is the same as ResNet except for the bottleneck number of channels
which is twice larger in every block. The number of channels in outer 1x1
convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
channels, and in Wide ResNet-50-2 has 2048-1024-2048.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['width_per_group'] = 64 * 2
return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
pretrained, progress, **kwargs)
def wide_resnet101_2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet:
r"""Wide ResNet-101-2 model from
`"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_.
The model is the same as ResNet except for the bottleneck number of channels
which is twice larger in every block. The number of channels in outer 1x1
convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
channels, and in Wide ResNet-50-2 has 2048-1024-2048.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
"""
kwargs['width_per_group'] = 64 * 2
return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
pretrained, progress, **kwargs)

View File

@@ -0,0 +1,610 @@
"""
Training Strategy
supervised learning, imitation learning, testing, rollout
"""
import time
from copy import deepcopy
import os
import random
import cv2
import numpy as np
import torch as th
from torch.nn import functional as F
from stable_baselines3.common.type_aliases import RolloutReturn, TrainFreq, TrainFrequencyUnit
from stable_baselines3.common.utils import should_collect_more_steps, get_schedule_fn, configure_logger
from stable_baselines3.common.vec_env import VecEnv
from stable_baselines3.common.utils import get_device
# -----------
from flightpolicy.yopo.yopo_policy import YopoPolicy
from flightpolicy.yopo.dataloader import YopoDataset
from torch.utils.data import DataLoader
from flightpolicy.yopo.primitive_utils import transform, rotate, transform_inv, rotate_inv
from flightpolicy.yopo.primitive_utils import LatticeParam, LatticePrimitive
from flightpolicy.yopo.buffers import ReplayBuffer
from ruamel.yaml import YAML
class YopoAlgorithm:
def __init__(
self,
env=None,
learning_rate=0.001,
is_imitation=False,
buffer_size=1_000_000,
learning_starts=100,
batch_size=256,
unselect=0.0,
loss_weight=[],
train_freq=(1, "step"),
change_env_freq=-1,
gradient_steps=1,
policy_kwargs=None,
tensorboard_log=None,
verbose=0,
max_grad_norm=10,
):
# env
self.observation_dim = env.observation_dim
self.action_dim = env.action_dim
self.n_envs = env.num_envs
self.env = env
# training
self.learning_rate = learning_rate
self.batch_size = batch_size
self.max_grad_norm = max_grad_norm
self.unselect = unselect
self.loss_weight = loss_weight
self.device = get_device('auto')
self.policy_kwargs = {} if policy_kwargs is None else policy_kwargs
# imitation learning
self.is_imitation = is_imitation
self.buffer_size = buffer_size
self.train_freq = train_freq
self.change_env_freq = change_env_freq
self.learning_starts = learning_starts
self.gradient_steps = gradient_steps
self.freq_reset = False
self.replay_buffer = None
# logger
self.verbose = verbose
self.tensorboard_log = tensorboard_log
self.logger = configure_logger(self.verbose, self.tensorboard_log, "YOPO")
# trajectory
cfg = YAML().load(open(os.environ["FLIGHTMARE_PATH"] + "/flightlib/configs/traj_opt.yaml", 'r'))
self.lattice_space = LatticeParam(cfg)
self.lattice_primitive = LatticePrimitive(self.lattice_space)
self._setup_model()
def _setup_model(self):
self.lr_schedule = get_schedule_fn(self.learning_rate)
# buffer: pos, quat, vel, acc, depth
if self.replay_buffer is None and self.is_imitation:
self.replay_buffer = ReplayBuffer(
self.buffer_size,
self.observation_dim,
(self.env.network_width, self.env.network_height),
device=self.device,
n_envs=self.n_envs,
)
print("Loading Network...")
self.policy = YopoPolicy(
observation_dim=self.observation_dim,
action_dim=self.action_dim,
lattice_space=self.lattice_space,
lattice_primitive=self.lattice_primitive,
lr_schedule=self.lr_schedule,
train_env=self.env,
device=self.device,
**self.policy_kwargs
)
self.policy = self.policy.to(self.device)
print("Network Loaded!")
if self.is_imitation:
self._convert_train_freq()
def supervised_learning(self, epoch, log_interval):
self.policy.set_training_mode(True)
data_loader = DataLoader(YopoDataset(), batch_size=self.batch_size, shuffle=True, num_workers=0)
n_updates = 0
start_time = time.time()
for epoch_ in range(epoch):
cost_losses = [] # Performance (score) of prediction
score_losses = [] # Accuracy of the predicted score
for step, (depth, pos, quat, obs_b, map_id) in enumerate(data_loader): # obs: body frame
if depth.shape[0] != self.batch_size: # batch size == num of env
continue
n_updates = n_updates + 1
depth = depth.to(self.device)
obs_b = obs_b.numpy()
goal_dir = obs_b[:, 6:9]
goal_w = transform(quat.numpy(), pos.numpy(), 10 * goal_dir) # Rwb * g_b + t_wb
vel_w = rotate(quat.numpy(), obs_b[:, 0:3])
acc_w = rotate(quat.numpy(), obs_b[:, 3:6])
self.env.setState(pos.numpy(), vel_w, acc_w, quat.numpy())
self.env.setGoal(goal_w)
self.env.setMapID(map_id.numpy())
obs_b[:, 0:6] = self.normalize_obs(obs_b[:, 0:6])
obs_norm_input = self.prapare_input_observation(obs_b)
obs_norm_input = obs_norm_input.to(self.device)
endstate_score_predictions, cost_labels = self.policy.inference(depth, obs_norm_input)
score_labels = cost_labels.clone().detach()
cost_labels_record = th.mean(cost_labels)
cost_labels_filtered = self.cost_filter(cost_labels)
cost_loss = th.mean(cost_labels_filtered)
score_loss = F.smooth_l1_loss(endstate_score_predictions[:, 9, :], score_labels)
loss = self.loss_weight[0] * cost_loss + self.loss_weight[1] * score_loss
cost_losses.append(self.loss_weight[0] * cost_labels_record.item())
score_losses.append(self.loss_weight[1] * score_loss.item())
# Optimize the policy
self.policy.optimizer.zero_grad()
loss.backward()
# Clip gradient norm
th.nn.utils.clip_grad_norm_(self.policy.parameters(), self.max_grad_norm)
self.policy.optimizer.step()
if log_interval is not None and n_updates % log_interval[0] == 0:
self.logger.record("time/epoch", epoch_, exclude="tensorboard")
self.logger.record("time/steps", n_updates, exclude="tensorboard")
self.logger.record("time/batch_fps", log_interval[0] / (time.time() - start_time),
exclude="tensorboard")
self.logger.record("train/trajectory_cost", np.mean(cost_losses))
self.logger.record("train/score_loss", np.mean(score_losses))
self.logger.dump(step=n_updates)
cost_losses = []
score_losses = []
start_time = time.time()
if log_interval is not None and n_updates % log_interval[1] == 0:
policy_path = self.logger.get_dir() + "/Policy"
os.makedirs(policy_path, exist_ok=True)
path = policy_path + "/epoch{}_iter{}.pth".format(epoch_, step)
th.save({"state_dict": self.policy.state_dict(), "data": self.policy.get_constructor_parameters()}, path)
# 模仿学习: 已弃用(暂未删除以备后续使用)
# 0、reset_state、get_depth、reset_goal
# 1、执行若干步env_num * 200
# 2、训练若干步batch_size = env_num, 训200次=1eposide
# 3、reset_state、get_depth、reset_goal
def imitation_learning(
self,
total_timesteps,
callback=None,
log_interval=4,
eval_env=None,
eval_freq=-1,
n_eval_episodes=5,
tb_log_name="YOPO",
eval_log_path=None,
reset_num_timesteps=True,
):
# 0. 初始化第一次观测
total_timesteps, callback = self._setup_learn(
total_timesteps,
eval_env,
callback,
eval_freq,
n_eval_episodes,
eval_log_path,
reset_num_timesteps,
tb_log_name,
)
self.pretrained = self.env.pretrained
callback.on_training_start(locals(), globals())
while self.num_timesteps < total_timesteps:
# 1. 数据收集
rollout = self.collect_rollouts(
self.env,
train_freq=self.train_freq,
action_noise=self.action_noise,
callback=callback,
replay_buffer=self.replay_buffer,
log_interval=log_interval,
)
if rollout.continue_training is False:
break
# 2. 训练模型
if self.num_timesteps > 0 and self.num_timesteps > self.learning_starts:
# If no `gradient_steps` is specified,
# do as many gradients steps as steps performed during the rollout
gradient_steps = self.gradient_steps if self.gradient_steps >= 0 else rollout.episode_timesteps
# Special case when the user passes `gradient_steps=0`
if gradient_steps > 0:
self.train(batch_size=self.batch_size, gradient_steps=gradient_steps)
self.reset_state()
iteration = int(self.num_timesteps / (self.train_freq.frequency * self.env.num_envs))
# 3. 重置环境
if self.change_env_freq > 0 and iteration % self.change_env_freq == 0:
self.env.spawnTreesAndSavePointcloud()
self._map_id = self._map_id + 1
self.reset_state()
# 4. 终端打印log
if log_interval is not None and iteration % log_interval[0] == 0:
self._dump_logs()
if log_interval is not None and iteration % log_interval[1] == 0:
policy_path = self.logger.get_dir() + "/Policy"
os.makedirs(policy_path, exist_ok=True)
path = policy_path + "/epoch0_iter{}.pth".format(iteration)
th.save({"state_dict": self.policy.state_dict(), "data": self.policy.get_constructor_parameters()}, path)
callback.on_training_end()
def test_policy(self, num_rollouts: int = 10):
max_ep_length = 400
self.policy.set_training_mode(False)
for n_roll in range(num_rollouts):
obs, done, ep_len = self.env.reset(), False, 0
costs = []
# Randomly initialize the position and goal on the map.
random_y_goal = 20 * random.uniform(-1, 1) + 20
random_y = 20 * random.uniform(-1, 1) + 20
goal_w = np.array([[20, random_y_goal, 2]])
obs = np.array([[-20, random_y, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]])
self.env.setGoal(goal_w)
self.env.setState(np.array([[-20, random_y, 2]]), np.array([[0, 0, 0]]),
np.array([[0, 0, 0]]), np.array([[1, 0, 0, 0]]))
self.env.render()
while not (done or (ep_len >= max_ep_length)):
depth = self.env.getDepthImage()
depth_vis = cv2.resize(depth[0][0], (320, 180))
cv2.imshow("depth", depth_vis)
cv2.waitKey(10)
depth = th.from_numpy(depth).to(self.device)
# transform observation to body frame
quat_bw = -obs[:, 9:13] # inv of quat: [w, -x, -y, -z]
quat_bw[:, 0] = -quat_bw[:, 0]
goal_dir_w = (goal_w - obs[:, 0:3]) / np.linalg.norm(goal_w - obs[:, 0:3])
goal_dir_b = rotate(quat_bw, goal_dir_w)
vel_acc_norm_b = self.normalize_obs(obs[:, 3:9])
obs_norm_b = np.hstack((vel_acc_norm_b, goal_dir_b))
obs_norm_input = self.prapare_input_observation(obs_norm_b)
obs_norm_input = obs_norm_input.to(self.device)
endstate_pred, score_pred = self.policy.predict(depth, obs_norm_input)
endstate_pred = endstate_pred.cpu().numpy()
# obs: p_wb, v_b, a_b, q_wb; endstate_pred: pva in body frame
obs, rew, done = self.env.step(endstate_pred)
costs.append(rew)
ep_len += 1
print("round ", n_roll, ", total steps:", len(costs), ", avg cost:", sum(costs) / len(costs))
def train(self, gradient_steps: int, batch_size: int) -> None:
"""
Sample the replay buffer and do the updates
(gradient descent and update target networks)
"""
# Switch to train mode (this affects batch norm / dropout)
self.policy.set_training_mode(True)
# Update learning rate according to schedule (TODO in supervised learning)
self._update_learning_rate(self.policy.optimizer)
cost_losses = []
score_losses = [] # dy, dz, r, p, vx, vy, vz
for _ in range(gradient_steps):
# Sample replay buffer
replay_data = self.replay_buffer.sample(batch_size, env=self._vec_normalize_env)
depth = th.from_numpy(replay_data.depths).to(self.device)
pos = replay_data.observations[:, 0:3]
vel_acc_b = replay_data.observations[:, 3:9]
quat_wb = replay_data.observations[:, 9:13]
goal_w = replay_data.goals
map_id = replay_data.map_id
goal_dir_w = (goal_w - pos) / np.linalg.norm(goal_w - pos, axis=1)[:, np.newaxis]
goal_dir_b = rotate_inv(quat_wb, goal_dir_w)
vel_w = rotate(quat_wb, vel_acc_b[:, 0:3])
acc_w = rotate(quat_wb, vel_acc_b[:, 3:6])
self.env.setState(pos, vel_w, acc_w, quat_wb)
self.env.setGoal(goal_w)
self.env.setMapID(map_id)
vel_acc_norm_b = self.normalize_obs(vel_acc_b)
obs_norm_b = np.hstack((vel_acc_norm_b, goal_dir_b))
obs_norm_input = self.prapare_input_observation(obs_norm_b)
obs_norm_input = obs_norm_input.to(self.device)
endstate_score_predictions, cost_labels = self.policy.inference(depth, obs_norm_input)
score_labels = cost_labels.clone().detach()
cost_labels_record = th.mean(cost_labels)
cost_labels_filtered = self.cost_filter(cost_labels)
cost_loss = th.mean(cost_labels_filtered)
score_loss = F.smooth_l1_loss(endstate_score_predictions[:, 9, :], score_labels)
loss = self.loss_weight[0] * cost_loss + self.loss_weight[1] * score_loss
cost_losses.append(self.loss_weight[0] * cost_labels_record.item())
score_losses.append(self.loss_weight[1] * score_loss.item())
# Optimize the policy
self.policy.optimizer.zero_grad()
loss.backward()
# Clip gradient norm
th.nn.utils.clip_grad_norm_(self.policy.parameters(), self.max_grad_norm)
self.policy.optimizer.step()
# Increase update counter
self._n_updates += gradient_steps
self.logger.record("train/n_updates", self._n_updates, exclude="tensorboard")
self.logger.record("train/trajectory_cost", np.mean(cost_losses))
self.logger.record("train/score_loss", np.mean(score_losses))
def collect_rollouts(
self,
env,
callback,
train_freq,
replay_buffer,
action_noise=None,
log_interval=None,
) -> RolloutReturn:
self.policy.set_training_mode(False)
num_collected_steps, num_collected_episodes = 0, 0
assert isinstance(env, VecEnv), "You must pass a VecEnv"
assert train_freq.frequency > 0, "Should at least collect one step or episode."
if env.num_envs > 1:
assert train_freq.unit == TrainFrequencyUnit.STEP, "You must use only one env when doing episodic training."
callback.on_rollout_start()
continue_training = True
"""
1、pred endstate
2、get obs: self._last_obs = env.step(endstate)
3、get depth: self._last_depth = env.getDepthImage()
4、record to buffer and back to 1
"""
while should_collect_more_steps(train_freq, num_collected_steps, num_collected_episodes):
# 1. pred endstate used latest policy or pre-trained policy
sampled_endstate = self._sample_action(action_noise, env.num_envs)
# 2. perform action
new_obs, rewards, dones = env.step(sampled_endstate)
self.num_timesteps += env.num_envs
num_collected_steps += 1
# Give access to local variables
callback.update_locals(locals())
# Only stop training if return value is False, not when it is None.
if callback.on_step() is False:
return RolloutReturn(num_collected_steps * env.num_envs, num_collected_episodes,
continue_training=False)
# 3. store the last obs, depth, and goal
# self._update_info_buffer(infos, dones)
self._store_transition(replay_buffer)
self._update_current_progress_remaining(self.num_timesteps, self._total_timesteps)
# 4. update the obs, depth, goal, and reset the goal for the done-env
self._last_obs = new_obs
self._last_depth = env.getDepthImage()
for idx, done in enumerate(dones):
if done:
# Update stats
num_collected_episodes += 1
self._episode_num += 1
# reset goal for the 'done' env
self._last_goal[idx] = self.get_random_goal(self._last_obs[idx])
callback.on_rollout_end()
return RolloutReturn(num_collected_steps * env.num_envs, num_collected_episodes, continue_training)
def prapare_input_observation(self, obs):
"""
convert the observation from body frame to primitive frame,
and then concatenate it with the depth features (to ensure the translational invariance)
"""
obs_return = np.ones(
(obs.shape[0], self.lattice_space.vertical_num, self.lattice_space.horizon_num, obs.shape[1]),
dtype=np.float32)
id = 0
v_b = obs[:, 0:3]
a_b = obs[:, 3:6]
g_b = obs[:, 6:9]
for i in range(self.lattice_space.vertical_num - 1, -1, -1):
for j in range(self.lattice_space.horizon_num - 1, -1, -1):
Rbp = self.lattice_primitive.getRotation(id)
v_p = np.dot(Rbp.T, v_b.T).T
a_p = np.dot(Rbp.T, a_b.T).T
g_p = np.dot(Rbp.T, g_b.T).T
obs_return[:, i, j, 0:3] = v_p
obs_return[:, i, j, 3:6] = a_p
obs_return[:, i, j, 6:9] = g_p
# obs_return[:, i, j, 0:6] = self.normalize_obs(obs_return[:, i, j, 0:6])
id = id + 1
obs_return = np.transpose(obs_return, [0, 3, 1, 2])
return th.from_numpy(obs_return)
def unnormalize_obs(self, vel_acc_norm):
vel = vel_acc_norm[:, 0:3] * self.lattice_space.vel_max
acc = vel_acc_norm[:, 3:6] * self.lattice_space.acc_max
return np.hstack((vel, acc))
def normalize_obs(self, vel_acc):
vel_norm = vel_acc[:, 0:3] / self.lattice_space.vel_max
acc_norm = vel_acc[:, 3:6] / self.lattice_space.acc_max
return np.hstack((vel_norm, acc_norm))
def cost_filter(self, costs_):
# costs_ = costs.clone() # NOTE: numpy.ndarray is reference invocation!
if self.unselect <= 0 or self.unselect >= 1:
return costs_
# filter the negative samples
rows, cols = costs_.size()
unselect = int(cols * self.unselect)
for i in range(rows):
row = costs_[i]
_, indices = th.topk(row, unselect)
costs_[i][indices] = 0.0
return costs_
def _setup_learn(
self,
total_timesteps,
eval_env=None,
callback=None,
eval_freq=10000,
n_eval_episodes=5,
log_path=None,
reset_num_timesteps=True,
tb_log_name="run",
):
# ----------------- Init the First Observation -----------------
# super()._setup_learn() 中: self._last_obs = self.env.reset()
total_timesteps_, callback_ = super()._setup_learn(
total_timesteps,
eval_env,
callback,
eval_freq,
n_eval_episodes,
log_path,
reset_num_timesteps,
tb_log_name,
)
self._last_depth = self.env.getDepthImage()
self._last_goal = np.zeros([self.env.num_envs, 3], dtype=np.float32)
for i in range(0, self.env.num_envs):
self._last_goal[i] = self.get_random_goal(self._last_obs[i])
self._map_id = np.zeros((self.env.num_envs, 1), dtype=np.float32)
return total_timesteps_, callback_
def _sample_action(self) -> np.ndarray:
"""
use pretrained model or current model to sample the actions (endstate)
self._last_obs: last state obs [p, v, a, q]
self._last_depth: last depth image
"""
obs = self._last_obs.copy()
goal_w = self._last_goal.copy()
depth = th.from_numpy(self._last_depth).to(self.device)
# wxyz 四元数的逆[w, -x, -y, -z]
quat_bw = -obs[:, 9:13]
quat_bw[:, 0] = -quat_bw[:, 0]
vel_acc_norm_b = self.normalize_obs(obs[:, 3:9])
goal_dir_w = (goal_w - obs[:, 0:3]) / np.linalg.norm(goal_w - obs[:, 0:3], axis=1)[:, np.newaxis]
goal_dir_b = rotate(quat_bw, goal_dir_w)
obs_norm_b = np.hstack((vel_acc_norm_b, goal_dir_b))
obs_norm_input = self.prapare_input_observation(obs_norm_b)
obs_norm_input = obs_norm_input.to(self.device)
endstate_pred, score_pred = self.policy.predict(depth, obs_norm_input)
endstate_pred = endstate_pred.cpu().numpy()
return endstate_pred
def _dump_logs(self) -> None:
"""
Write log.
"""
time_elapsed = time.time() - self.start_time
fps = int((self.num_timesteps - self._num_timesteps_at_start) / (time_elapsed + 1e-8))
self.logger.record("time/fps", fps, exclude="tensorboard")
self.logger.record("time/minute_elapsed", int(time_elapsed / 60), exclude="tensorboard")
self.logger.record("time/total_timesteps", self.num_timesteps, exclude="tensorboard")
self.logger.record("train/map_id", self._map_id[0][0], exclude="tensorboard")
# Pass the number of timesteps for tensorboard
self.logger.dump(step=self.num_timesteps)
def _store_transition(self, replay_buffer):
# Avoid modification by reference
obs = deepcopy(self._last_obs)
goal = deepcopy(self._last_goal)
depth = deepcopy(self._last_depth)
map_id = deepcopy(self._map_id)
replay_buffer.add(
obs,
goal,
depth,
map_id
)
def get_random_goal(self, uav_state=None):
world = self.env.world_box
# 1. Use random goal in map
if uav_state is None:
world_center = np.array([world[3] + world[0], world[4] + world[1], world[5] + world[2]]) / 2
world_scale = np.array([world[3] - world[0], world[4] - world[1], 1.0])
# The goal can be out of the world, if strictly in world: np.random.uniform(-0.5, 0.5, 3)
random_numbers = np.random.uniform(-1, 1, 3)
random_goal = random_numbers * world_scale + world_center
# 2. Use goal in front of the UAV (for better imitation learning)
else:
q_wb = uav_state[9:]
p_wb = uav_state[0:3]
goal = np.random.randn(3) + np.array([2, 0, 0])
goal_dir = goal / np.linalg.norm(goal)
random_goal_b = 50 * goal_dir
random_goal_w = transform(q_wb, p_wb, random_goal_b)
random_goal_w[2] = np.random.uniform(-1, 1) * 1 + (world[5] + world[2]) / 2
random_goal = random_goal_w
return random_goal
def reset_state(self):
"""
Reset the state and map_id after every train step, because the state and map_id are manually set in training,
which will affect the cost, controller, image render, and other parts for next rollout
"""
self.env.setMapID(-np.ones((self.env.num_envs, 1)))
self._last_obs = self.env.reset()
self._last_depth = self.env.getDepthImage()
for i in range(0, self.env.num_envs):
self._last_goal[i] = self.get_random_goal(self._last_obs[i])
def _convert_train_freq(self) -> None:
"""
Convert `train_freq` parameter (int or tuple)
to a TrainFreq object.
"""
if not isinstance(self.train_freq, TrainFreq):
train_freq = self.train_freq
# The value of the train frequency will be checked later
if not isinstance(train_freq, tuple):
train_freq = (train_freq, "step")
try:
train_freq = (train_freq[0], TrainFrequencyUnit(train_freq[1]))
except ValueError:
raise ValueError(
f"The unit of the `train_freq` must be either 'step' or 'episode' not '{train_freq[1]}'!")
if not isinstance(train_freq[0], int):
raise ValueError(f"The frequency of `train_freq` must be an integer and not {train_freq[0]}")
self.train_freq = TrainFreq(*train_freq)

View File

@@ -0,0 +1,71 @@
# The backbone and the custom gradient layer.
import time
import torch as th
import torch.nn
import numpy as np
from torchvision.models import mobilenet_v3_small
from flightpolicy.yopo.resnet import resnet18
from torch.autograd import Function
# 18ms, Fast and effective.
class ResNet18(torch.nn.Module):
def __init__(self, output_dim: int, primitive_shape: int):
super(ResNet18, self).__init__()
self.cnn = resnet18(pretrained=False)
self.cnn.conv1 = th.nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
if (primitive_shape != 1):
self.cnn.avgpool = th.nn.Sequential()
self.cnn.fc = th.nn.Conv2d(512, output_dim, kernel_size=1, stride=1, padding=0, bias=False)
self.features_dim = output_dim
def forward(self, depth: th.Tensor) -> th.Tensor:
return self.cnn(depth)
# 20ms, Performs worse than ResNet and is slower than ResNet-18.
class MobileNet(th.nn.Module):
def __init__(self, output_dim: int):
super(MobileNet, self).__init__()
self.cnn = mobilenet_v3_small(pretrained=False)
self.cnn.features[0][0] = th.nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1, bias=False)
self.cnn.classifier = th.nn.Linear(576, output_dim)
self.features_dim = output_dim
def forward(self, depth: th.Tensor) -> th.Tensor:
return self.cnn(depth)
def YopoBackbone(output_dim, primitive_shape):
return ResNet18(output_dim, primitive_shape)
class CostAndGradLayer(Function):
@staticmethod
def forward(ctx, input_dp, train_env, primitive_id):
# print("input ", input_dp.shape)
device = input_dp.device
cost, grad = train_env.getCostAndGradient(input_dp, primitive_id)
grad = np.minimum(grad, 1.0) # Gradient clipping: Prevent excessively large values.
cost = torch.tensor(cost).to(device)
grad = torch.tensor(grad).to(device)
ctx.save_for_backward(grad)
cost.requires_grad = True
return cost
@staticmethod
def backward(ctx, cost_grad_input):
grad, = ctx.saved_tensors
return_grad = th.bmm(grad.unsqueeze(-1), cost_grad_input.unsqueeze(-1)).squeeze(dim=2)
# print("grad ", return_grad.shape)
# print("grad: ", return_grad)
return return_grad, None, None
if __name__ == '__main__':
net = YopoBackbone(64, 3)
input_ = torch.zeros((1, 1, 96, 96))
start = time.time()
output = net(input_)
print(time.time() - start)

View File

@@ -0,0 +1,213 @@
"""
YOPO Network
forward, prediction, pre-processing, post-processing
"""
import torch as th
from torch import nn
import numpy as np
from typing import Any, Dict, List, Type
from flightpolicy.yopo.yopo_network import YopoBackbone, CostAndGradLayer
class YopoPolicy(nn.Module):
def __init__(
self,
observation_dim,
action_dim, # x_pva, y_pva, z_pva, score
hidden_state,
lattice_space,
lattice_primitive,
lr_schedule=None,
train_env=None,
net_arch=None,
activation_fn=nn.ReLU,
normalize_images=True,
optimizer_class=th.optim.Adam,
optimizer_kwargs=None,
device=None
):
super(YopoPolicy, self).__init__()
self.observation_dim = observation_dim
self.action_dim = action_dim
self.lattice_space = lattice_space
self.hidden_state = hidden_state
self.lattice_primitive = lattice_primitive
self.optimizer_class = optimizer_class
self.optimizer_kwargs = optimizer_kwargs
self.net_arch = net_arch
self.activation_fn = activation_fn
self.normalize_images = normalize_images
self.yaw_diff = lattice_primitive.yaw_diff
self.pitch_diff = lattice_primitive.pitch_diff
self.train_env = train_env
self.device = device
self._build(lr_schedule)
def _build(self, lr_schedule=None) -> None:
# output state dim = action dim + score
output_dim = (self.action_dim + 1) * self.lattice_space.vel_num * self.lattice_space.radio_num
# input state dim = hidden_state + vel + acc + goal
input_dim = self.hidden_state + 9
self.image_backbone = YopoBackbone(self.hidden_state,
self.lattice_space.horizon_num * self.lattice_space.vertical_num)
self.state_backbone = nn.Sequential()
self.yopo_header = self.create_header(input_dim, output_dim, self.net_arch, self.activation_fn, True)
self.grad_layer = CostAndGradLayer.apply
# Setup optimizer with initial learning rate
learning_rate = lr_schedule(1) if lr_schedule is not None else 1e-3
self.optimizer = self.optimizer_class(self.parameters(), lr=learning_rate)
# TenserRT Transfer
def forward(self, depth: th.Tensor, obs: th.Tensor) -> th.Tensor:
"""
forward propagation of neural network, only used for TensorRT conversion.
"""
depth_feature = self.image_backbone(depth)
obs_feature = self.state_backbone(obs)
input_tensor = th.cat((obs_feature, depth_feature), 1)
output = self.yopo_header(input_tensor)
# [batch, endstate+score, lattice_row, lattice_col]
return output
# Training Policy
def inference(self, depth: th.Tensor, obs: th.Tensor) -> th.Tensor:
"""
For network training:
(1) predicted the endstate(end_state) and score
(2) record the gradients and costs of prediction
"""
depth_feature = self.image_backbone(depth)
obs_feature = self.state_backbone(obs)
input_tensor = th.cat((obs_feature, depth_feature), 1)
output = self.yopo_header(input_tensor)
# [batch, endstate+score, lattice_num]
batch_size = obs.shape[0]
output = output.view(batch_size, 10, self.lattice_space.horizon_num * self.lattice_space.vertical_num)
# output.register_hook(self.print_grad)
endstate_pred = output[:, 0:9, :]
score_pred = output[:, 9, :]
endstate_score_predictions = th.zeros_like(output).to(self.device)
cost_labels = th.zeros((batch_size, self.lattice_space.horizon_num * self.lattice_space.vertical_num)).to(self.device)
for i in range(0, self.lattice_space.horizon_num * self.lattice_space.vertical_num):
id = self.lattice_space.horizon_num * self.lattice_space.vertical_num - 1 - i
ids = id * np.ones((batch_size, 1))
endstate = self.pred_to_endstate(endstate_pred[:, :, i], id)
# endstate.register_hook(self.print_grad)
cost_label = self.grad_layer(endstate, self.train_env, ids)
endstate_score_predictions[:, 0:9, i] = endstate
endstate_score_predictions[:, 9, i] = score_pred[:, i]
cost_labels[:, i] = cost_label.squeeze()
return endstate_score_predictions, cost_labels
# Testing Policy
def predict(self, depth: th.Tensor, obs: th.Tensor, return_all_preds=False) -> th.Tensor:
"""
For network testing:
(1) predicted the endstate(end_state) and score
"""
with th.no_grad():
depth_feature = self.image_backbone(depth)
obs_feature = self.state_backbone(obs.float())
input_tensor = th.cat((obs_feature, depth_feature), 1)
output = self.yopo_header(input_tensor)
batch_size = obs.shape[0]
output = output.view(batch_size, 10, self.lattice_space.horizon_num * self.lattice_space.vertical_num)
endstate_pred = output[:, 0:9, :]
score_pred = output[:, 9, :]
if not return_all_preds:
endstate_prediction = th.zeros(batch_size, self.action_dim)
score_prediction = th.zeros(batch_size, 1)
for i in range(0, batch_size):
action_id = th.argmin(score_pred[i]).item()
lattice_id = self.lattice_space.horizon_num * self.lattice_space.vertical_num - 1 - action_id
endstate_prediction[i] = self.pred_to_endstate(th.unsqueeze(endstate_pred[i, :, action_id], 0), lattice_id)
score_prediction[i] = score_pred[i, action_id]
else:
endstate_prediction = th.zeros_like(endstate_pred)
score_prediction = score_pred
for i in range(0, self.lattice_space.horizon_num * self.lattice_space.vertical_num):
lattice_id = self.lattice_space.horizon_num * self.lattice_space.vertical_num - 1 - i
endstate = self.pred_to_endstate(endstate_pred[:, :, i], lattice_id)
endstate_prediction[:, :, i] = endstate
return endstate_prediction, score_prediction
def pred_to_endstate(self, endstate_pred: th.Tensor, id: int):
"""
Transform the predicted state to the body frame.
"""
delta_yaw = endstate_pred[:, 0] * self.yaw_diff
delta_pitch = endstate_pred[:, 1] * self.pitch_diff
radio = endstate_pred[:, 2] * self.lattice_space.radio_range + self.lattice_space.radio_range
yaw, pitch = self.lattice_primitive.getAngleLattice(id)
endstate_x = th.cos(pitch + delta_pitch) * th.cos(yaw + delta_yaw) * radio
endstate_y = th.cos(pitch + delta_pitch) * th.sin(yaw + delta_yaw) * radio
endstate_z = th.sin(pitch + delta_pitch) * radio
endstate_p = th.stack((endstate_x, endstate_y, endstate_z), dim=1)
endstate_vp = endstate_pred[:, 3:6] * self.lattice_space.vel_max
endstate_ap = endstate_pred[:, 6:9] * self.lattice_space.acc_max
Rbp = self.lattice_primitive.getRotation(id)
endstate_vb = th.matmul(th.tensor(Rbp).to(self.device), endstate_vp.t()).t()
endstate_ab = th.matmul(th.tensor(Rbp).to(self.device), endstate_ap.t()).t()
endstate = th.cat((endstate_p, endstate_vb, endstate_ab), dim=1)
endstate[:, [0, 1, 2, 3, 4, 5, 6, 7, 8]] = endstate[:, [0, 3, 6, 1, 4, 7, 2, 5, 8]]
return endstate
def create_header(self,
input_dim: int,
output_dim: int,
net_arch: List[int],
activation_fn: Type[nn.Module] = nn.ReLU,
squash_output: bool = False,
) -> nn.Sequential:
if len(net_arch) > 0:
modules = [nn.Conv2d(in_channels=input_dim, out_channels=net_arch[0], kernel_size=1, stride=1, padding=0),
activation_fn()]
else:
modules = []
for idx in range(len(net_arch) - 1):
modules.append(nn.Conv2d(in_channels=net_arch[idx], out_channels=net_arch[idx + 1], kernel_size=1, stride=1,
padding=0))
modules.append(activation_fn())
if output_dim > 0:
last_layer_dim = net_arch[-1] if len(net_arch) > 0 else input_dim
modules.append(nn.Conv2d(in_channels=last_layer_dim, out_channels=output_dim, kernel_size=1, stride=1,
padding=0))
if squash_output:
modules.append(nn.Tanh())
return nn.Sequential(*modules)
def get_constructor_parameters(self) -> Dict[str, Any]:
data = {"net_arch": self.net_arch,
"hidden_state": self.hidden_state,
"observation_dim": self.observation_dim,
"action_dim": self.action_dim,
"activation_fn": self.activation_fn,
"lattice_space": self.lattice_space,
"lattice_primitive": self.lattice_primitive
}
return data
def print_grad(ctx, grad):
print("grad of hook: ", grad)
def set_training_mode(self, mode: bool) -> None:
"""
Put the policy in either training or evaluation mode.
This affects certain modules, such as batch normalisation and dropout.
:param mode: if true, set to training mode, else set to evaluation mode
"""
self.train(mode)