Replace Tanh with ReLU of scores and simplify matrix operations

This commit is contained in:
TJU_Lu
2024-12-22 15:43:04 +08:00
parent d21a2f818b
commit 43cf7bea6d
3 changed files with 45 additions and 90 deletions

View File

@@ -157,8 +157,7 @@ class YopoAlgorithm:
if log_interval is not None and n_updates % log_interval[0] == 0: if log_interval is not None and n_updates % log_interval[0] == 0:
self.logger.record("time/epoch", epoch_, exclude="tensorboard") self.logger.record("time/epoch", epoch_, exclude="tensorboard")
self.logger.record("time/steps", n_updates, exclude="tensorboard") self.logger.record("time/steps", n_updates, exclude="tensorboard")
self.logger.record("time/batch_fps", log_interval[0] / (time.time() - start_time), self.logger.record("time/batch_fps", log_interval[0] / (time.time() - start_time), exclude="tensorboard")
exclude="tensorboard")
self.logger.record("train/trajectory_cost", np.mean(cost_losses)) self.logger.record("train/trajectory_cost", np.mean(cost_losses))
self.logger.record("train/score_loss", np.mean(score_losses)) self.logger.record("train/score_loss", np.mean(score_losses))
self.logger.dump(step=n_updates) self.logger.dump(step=n_updates)
@@ -260,6 +259,7 @@ class YopoAlgorithm:
costs.append(rew) costs.append(rew)
ep_len += 1 ep_len += 1
print("round ", n_roll, ", total steps:", len(costs), ", avg cost:", sum(costs) / len(costs)) print("round ", n_roll, ", total steps:", len(costs), ", avg cost:", sum(costs) / len(costs))
self.env.disconnectUnity()
def train(self, gradient_steps: int, batch_size: int) -> None: def train(self, gradient_steps: int, batch_size: int) -> None:
""" """
@@ -364,25 +364,17 @@ class YopoAlgorithm:
convert the observation from body frame to primitive frame, convert the observation from body frame to primitive frame,
and then concatenate it with the depth features (to ensure the translational invariance) and then concatenate it with the depth features (to ensure the translational invariance)
""" """
obs_return = np.ones( obs_return = np.ones((obs.shape[0], obs.shape[1], self.lattice_space.vertical_num, self.lattice_space.horizon_num), dtype=np.float32)
(obs.shape[0], self.lattice_space.vertical_num, self.lattice_space.horizon_num, obs.shape[1]),
dtype=np.float32)
id = 0 id = 0
v_b = obs[:, 0:3] v_b, a_b, g_b = obs[:, 0:3], obs[:, 3:6], obs[:, 6:9]
a_b = obs[:, 3:6]
g_b = obs[:, 6:9]
for i in range(self.lattice_space.vertical_num - 1, -1, -1): for i in range(self.lattice_space.vertical_num - 1, -1, -1):
for j in range(self.lattice_space.horizon_num - 1, -1, -1): for j in range(self.lattice_space.horizon_num - 1, -1, -1):
Rbp = self.lattice_primitive.getRotation(id) Rbp = self.lattice_primitive.getRotation(id)
v_p = np.dot(Rbp.T, v_b.T).T obs_return[:, 0:3, i, j] = np.dot(v_b, Rbp) # v_p
a_p = np.dot(Rbp.T, a_b.T).T obs_return[:, 3:6, i, j] = np.dot(a_b, Rbp) # a_p
g_p = np.dot(Rbp.T, g_b.T).T obs_return[:, 6:9, i, j] = np.dot(g_b, Rbp) # g_p
obs_return[:, i, j, 0:3] = v_p # obs_return[:, 0:6, i, j] = self.normalize_obs(obs_return[:, 0:6, i, j])
obs_return[:, i, j, 3:6] = a_p
obs_return[:, i, j, 6:9] = g_p
# obs_return[:, i, j, 0:6] = self.normalize_obs(obs_return[:, i, j, 0:6])
id = id + 1 id = id + 1
obs_return = np.transpose(obs_return, [0, 3, 1, 2])
return th.from_numpy(obs_return) return th.from_numpy(obs_return)
def unnormalize_obs(self, vel_acc_norm): def unnormalize_obs(self, vel_acc_norm):
@@ -421,7 +413,7 @@ class YopoAlgorithm:
self._last_obs = self.env.reset() self._last_obs = self.env.reset()
self._last_depth = self.env.getDepthImage() self._last_depth = self.env.getDepthImage()
self._last_goal = np.zeros([self.env.num_envs, 3], dtype=np.float32) self._last_goal = np.zeros([self.env.num_envs, 3], dtype=np.float32)
for i in range(0, self.env.num_envs): for i in range(self.env.num_envs):
self._last_goal[i] = self.get_random_goal(self._last_obs[i]) self._last_goal[i] = self.get_random_goal(self._last_obs[i])
self._map_id = np.zeros((self.env.num_envs, 1), dtype=np.float32) self._map_id = np.zeros((self.env.num_envs, 1), dtype=np.float32)
@@ -503,7 +495,7 @@ class YopoAlgorithm:
self.env.setMapID(-np.ones((self.env.num_envs, 1))) self.env.setMapID(-np.ones((self.env.num_envs, 1)))
self._last_obs = self.env.reset() self._last_obs = self.env.reset()
self._last_depth = self.env.getDepthImage() self._last_depth = self.env.getDepthImage()
for i in range(0, self.env.num_envs): for i in range(self.env.num_envs):
self._last_goal[i] = self.get_random_goal(self._last_obs[i]) self._last_goal[i] = self.get_random_goal(self._last_obs[i])
def _convert_train_freq(self) -> None: def _convert_train_freq(self) -> None:

View File

@@ -51,10 +51,9 @@ class YopoPolicy(nn.Module):
output_dim = (self.action_dim + 1) * self.lattice_space.vel_num * self.lattice_space.radio_num output_dim = (self.action_dim + 1) * self.lattice_space.vel_num * self.lattice_space.radio_num
# input state dim = hidden_state + vel + acc + goal # input state dim = hidden_state + vel + acc + goal
input_dim = self.hidden_state + 9 input_dim = self.hidden_state + 9
self.image_backbone = YopoBackbone(self.hidden_state, self.image_backbone = YopoBackbone(self.hidden_state, self.lattice_space.horizon_num * self.lattice_space.vertical_num)
self.lattice_space.horizon_num * self.lattice_space.vertical_num)
self.state_backbone = nn.Sequential() self.state_backbone = nn.Sequential()
self.yopo_header = self.create_header(input_dim, output_dim, self.net_arch, self.activation_fn, True) self.yopo_header = self.create_header(input_dim, output_dim, self.net_arch, self.activation_fn)
self.grad_layer = CostAndGradLayer.apply self.grad_layer = CostAndGradLayer.apply
# Setup optimizer with initial learning rate # Setup optimizer with initial learning rate
learning_rate = lr_schedule(1) if lr_schedule is not None else 1e-3 learning_rate = lr_schedule(1) if lr_schedule is not None else 1e-3
@@ -63,26 +62,24 @@ class YopoPolicy(nn.Module):
# TenserRT Transfer # TenserRT Transfer
def forward(self, depth: th.Tensor, obs: th.Tensor) -> th.Tensor: def forward(self, depth: th.Tensor, obs: th.Tensor) -> th.Tensor:
""" """
forward propagation of neural network, only used for TensorRT conversion. forward propagation of neural network, separated for TensorRT conversion.
""" """
depth_feature = self.image_backbone(depth) depth_feature = self.image_backbone(depth)
obs_feature = self.state_backbone(obs) obs_feature = self.state_backbone(obs)
input_tensor = th.cat((obs_feature, depth_feature), 1) input_tensor = th.cat((obs_feature, depth_feature), 1)
output = self.yopo_header(input_tensor) output = self.yopo_header(input_tensor)
# [batch, endstate+score, lattice_row, lattice_col] endstate = th.tanh(output[:, :9])
return output score = th.relu(output[:, 9:])
return th.cat((endstate, score), dim=1) # [batch, endstate+score, lattice_row, lattice_col]
# Training Policy # Training Policy
def inference(self, depth: th.Tensor, obs: th.Tensor) -> th.Tensor: def inference(self, depth: th.Tensor, obs: th.Tensor) -> th.Tensor:
""" """
For network training: For network training:
(1) predicted the endstate(end_state) and score (1) predicted the endstate and score
(2) record the gradients and costs of prediction (2) record the gradients and costs of prediction
""" """
depth_feature = self.image_backbone(depth) output = self.forward(depth, obs)
obs_feature = self.state_backbone(obs)
input_tensor = th.cat((obs_feature, depth_feature), 1)
output = self.yopo_header(input_tensor)
# [batch, endstate+score, lattice_num] # [batch, endstate+score, lattice_num]
batch_size = obs.shape[0] batch_size = obs.shape[0]
@@ -93,7 +90,7 @@ class YopoPolicy(nn.Module):
endstate_score_predictions = th.zeros_like(output).to(self.device) endstate_score_predictions = th.zeros_like(output).to(self.device)
cost_labels = th.zeros((batch_size, self.lattice_space.horizon_num * self.lattice_space.vertical_num)).to(self.device) cost_labels = th.zeros((batch_size, self.lattice_space.horizon_num * self.lattice_space.vertical_num)).to(self.device)
for i in range(0, self.lattice_space.horizon_num * self.lattice_space.vertical_num): for i in range(self.lattice_space.horizon_num * self.lattice_space.vertical_num):
id = self.lattice_space.horizon_num * self.lattice_space.vertical_num - 1 - i id = self.lattice_space.horizon_num * self.lattice_space.vertical_num - 1 - i
ids = id * np.ones((batch_size, 1)) ids = id * np.ones((batch_size, 1))
endstate = self.pred_to_endstate(endstate_pred[:, :, i], id) endstate = self.pred_to_endstate(endstate_pred[:, :, i], id)
@@ -106,40 +103,30 @@ class YopoPolicy(nn.Module):
return endstate_score_predictions, cost_labels return endstate_score_predictions, cost_labels
# Testing Policy # Testing Policy
def predict(self, depth: th.Tensor, obs: th.Tensor, return_all_preds=False) -> th.Tensor: def predict(self, depth: th.Tensor, obs: th.Tensor) -> th.Tensor:
""" """
For network testing: For network testing:
(1) predicted the endstate(end_state) and score (1) predicted the endstate and score, and return the optimal
""" """
with th.no_grad(): with th.no_grad():
depth_feature = self.image_backbone(depth) output = self.forward(depth, obs.float())
obs_feature = self.state_backbone(obs.float())
input_tensor = th.cat((obs_feature, depth_feature), 1)
output = self.yopo_header(input_tensor)
batch_size = obs.shape[0] batch_size = obs.shape[0]
output = output.view(batch_size, 10, self.lattice_space.horizon_num * self.lattice_space.vertical_num) output = output.view(batch_size, 10, self.lattice_space.horizon_num * self.lattice_space.vertical_num)
endstate_pred = output[:, 0:9, :] endstate_pred = output[:, 0:9, :]
score_pred = output[:, 9, :] score_pred = output[:, 9, :]
if not return_all_preds: endstate_prediction = th.zeros(batch_size, self.action_dim)
endstate_prediction = th.zeros(batch_size, self.action_dim) score_prediction = th.zeros(batch_size, 1)
score_prediction = th.zeros(batch_size, 1) for i in range(batch_size):
for i in range(0, batch_size): action_id = th.argmin(score_pred[i]).item()
action_id = th.argmin(score_pred[i]).item() lattice_id = self.lattice_space.horizon_num * self.lattice_space.vertical_num - 1 - action_id
lattice_id = self.lattice_space.horizon_num * self.lattice_space.vertical_num - 1 - action_id endstate_prediction[i] = self.pred_to_endstate(th.unsqueeze(endstate_pred[i, :, action_id], 0), lattice_id)
endstate_prediction[i] = self.pred_to_endstate(th.unsqueeze(endstate_pred[i, :, action_id], 0), lattice_id) score_prediction[i] = score_pred[i, action_id]
score_prediction[i] = score_pred[i, action_id]
else:
endstate_prediction = th.zeros_like(endstate_pred)
score_prediction = score_pred
for i in range(0, self.lattice_space.horizon_num * self.lattice_space.vertical_num):
lattice_id = self.lattice_space.horizon_num * self.lattice_space.vertical_num - 1 - i
endstate = self.pred_to_endstate(endstate_pred[:, :, i], lattice_id)
endstate_prediction[:, :, i] = endstate
return endstate_prediction, score_prediction return endstate_prediction, score_prediction
def pred_to_endstate(self, endstate_pred: th.Tensor, id: int): def pred_to_endstate(self, endstate_pred: th.Tensor, id: int) -> th.Tensor:
""" """
Transform the predicted state to the body frame. Transform the predicted state to the body frame.
""" """
@@ -154,9 +141,9 @@ class YopoPolicy(nn.Module):
endstate_vp = endstate_pred[:, 3:6] * self.lattice_space.vel_max endstate_vp = endstate_pred[:, 3:6] * self.lattice_space.vel_max
endstate_ap = endstate_pred[:, 6:9] * self.lattice_space.acc_max endstate_ap = endstate_pred[:, 6:9] * self.lattice_space.acc_max
Rbp = self.lattice_primitive.getRotation(id) Rpb = th.tensor(self.lattice_primitive.getRotation(id).T).to(self.device)
endstate_vb = th.matmul(th.tensor(Rbp).to(self.device), endstate_vp.t()).t() endstate_vb = th.matmul(endstate_vp, Rpb)
endstate_ab = th.matmul(th.tensor(Rbp).to(self.device), endstate_ap.t()).t() endstate_ab = th.matmul(endstate_ap, Rpb)
endstate = th.cat((endstate_p, endstate_vb, endstate_ab), dim=1) endstate = th.cat((endstate_p, endstate_vb, endstate_ab), dim=1)
endstate[:, [0, 1, 2, 3, 4, 5, 6, 7, 8]] = endstate[:, [0, 3, 6, 1, 4, 7, 2, 5, 8]] endstate[:, [0, 1, 2, 3, 4, 5, 6, 7, 8]] = endstate[:, [0, 3, 6, 1, 4, 7, 2, 5, 8]]
return endstate return endstate
@@ -170,20 +157,18 @@ class YopoPolicy(nn.Module):
) -> nn.Sequential: ) -> nn.Sequential:
if len(net_arch) > 0: if len(net_arch) > 0:
modules = [nn.Conv2d(in_channels=input_dim, out_channels=net_arch[0], kernel_size=1, stride=1, padding=0), modules = [nn.Conv2d(in_channels=input_dim, out_channels=net_arch[0], kernel_size=1, stride=1, padding=0), activation_fn()]
activation_fn()]
else: else:
modules = [] modules = []
for idx in range(len(net_arch) - 1): for idx in range(len(net_arch) - 1):
modules.append(nn.Conv2d(in_channels=net_arch[idx], out_channels=net_arch[idx + 1], kernel_size=1, stride=1, modules.append(nn.Conv2d(in_channels=net_arch[idx], out_channels=net_arch[idx + 1], kernel_size=1, stride=1, padding=0))
padding=0))
modules.append(activation_fn()) modules.append(activation_fn())
if output_dim > 0: if output_dim > 0:
last_layer_dim = net_arch[-1] if len(net_arch) > 0 else input_dim last_layer_dim = net_arch[-1] if len(net_arch) > 0 else input_dim
modules.append(nn.Conv2d(in_channels=last_layer_dim, out_channels=output_dim, kernel_size=1, stride=1, modules.append(nn.Conv2d(in_channels=last_layer_dim, out_channels=output_dim, kernel_size=1, stride=1, padding=0))
padding=0))
if squash_output: if squash_output:
modules.append(nn.Tanh()) modules.append(nn.Tanh())
return nn.Sequential(*modules) return nn.Sequential(*modules)

View File

@@ -1,10 +1,11 @@
""" """
将yopo模型转换为Tensorrt 将yopo模型转换为Tensorrt
prepare: prepare:
1 pip install -U nvidia-tensorrt --index-url https://pypi.ngc.nvidia.com 0. make sure you install already install TensorRT
2 git clone https://github.com/NVIDIA-AI-IOT/torch2trt 1. pip install -U nvidia-tensorrt --index-url https://pypi.ngc.nvidia.com
cd torch2trt 2. git clone https://github.com/NVIDIA-AI-IOT/torch2trt
python setup.py install cd torch2trt
python setup.py install
""" """
import argparse import argparse
@@ -19,28 +20,6 @@ from flightpolicy.envs import vec_env_wrapper as wrapper
from flightpolicy.yopo.yopo_algorithm import YopoAlgorithm from flightpolicy.yopo.yopo_algorithm import YopoAlgorithm
def prapare_input_observation(obs, lattice_space, lattice_primitive):
obs_return = np.ones(
(obs.shape[0], lattice_space.vertical_num, lattice_space.horizon_num, obs.shape[1]),
dtype=np.float32)
id = 0
v_b = obs[:, 0:3]
a_b = obs[:, 3:6]
g_b = obs[:, 6:9]
for i in range(lattice_space.vertical_num - 1, -1, -1):
for j in range(lattice_space.horizon_num - 1, -1, -1):
Rbp = lattice_primitive.getRotation(id)
v_p = np.dot(Rbp.T, v_b.T).T
a_p = np.dot(Rbp.T, a_b.T).T
g_p = np.dot(Rbp.T, g_b.T).T
obs_return[:, i, j, 0:3] = v_p
obs_return[:, i, j, 3:6] = a_p
obs_return[:, i, j, 6:9] = g_p
id = id + 1
obs_return = np.transpose(obs_return, [0, 3, 1, 2])
return obs_return
def parser(): def parser():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--trial", type=int, default=1, help="trial number") parser.add_argument("--trial", type=int, default=1, help="trial number")
@@ -84,10 +63,9 @@ if __name__ == "__main__":
# The inputs should be consistent with training # The inputs should be consistent with training
print("TensorRT Transfer...") print("TensorRT Transfer...")
depth = np.zeros(shape=[1, 1, 96, 160], dtype=np.float32) depth = np.zeros(shape=[1, 1, 96, 160], dtype=np.float32)
obs = np.zeros(shape=[1, 9], dtype=np.float32) obs = np.zeros(shape=[1, 9, lattice_space.vertical_num, lattice_space.horizon_num], dtype=np.float32)
obs_input = prapare_input_observation(obs, lattice_space, lattice_primitive)
depth_in = torch.from_numpy(depth).cuda() depth_in = torch.from_numpy(depth).cuda()
obs_in = torch.from_numpy(obs_input).cuda() obs_in = torch.from_numpy(obs).cuda()
model_trt = torch2trt(model.policy, [depth_in, obs_in], fp16_mode=args.fp16_mode) model_trt = torch2trt(model.policy, [depth_in, obs_in], fp16_mode=args.fp16_mode)
torch.save(model_trt.state_dict(), args.filename) torch.save(model_trt.state_dict(), args.filename)
print("TensorRT Transfer Finish!") print("TensorRT Transfer Finish!")