From 98b6e6dc3be6f88abb72e351c8f2da2b23b8ab85 Mon Sep 17 00:00:00 2001 From: Jordan Gong Date: Thu, 7 Jan 2021 19:55:00 +0800 Subject: Type hint for python version lower than 3.9 --- models/rgb_part_net.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'models/rgb_part_net.py') diff --git a/models/rgb_part_net.py b/models/rgb_part_net.py index 3037da0..39cbed6 100644 --- a/models/rgb_part_net.py +++ b/models/rgb_part_net.py @@ -1,4 +1,5 @@ import random +from typing import Tuple, List import torch import torch.nn as nn @@ -16,14 +17,14 @@ class RGBPartNet(nn.Module): num_class: int = 74, ae_in_channels: int = 3, ae_feature_channels: int = 64, - f_a_c_p_dims: tuple[int, int, int] = (128, 128, 64), - hpm_scales: tuple[int, ...] = (1, 2, 4), + f_a_c_p_dims: Tuple[int, int, int] = (128, 128, 64), + hpm_scales: Tuple[int, ...] = (1, 2, 4), hpm_use_avg_pool: bool = True, hpm_use_max_pool: bool = True, fpfe_feature_channels: int = 32, - fpfe_kernel_sizes: tuple[tuple, ...] = ((5, 3), (3, 3), (3, 3)), - fpfe_paddings: tuple[tuple, ...] = ((2, 1), (1, 1), (1, 1)), - fpfe_halving: tuple[int, ...] = (0, 2, 3), + fpfe_kernel_sizes: Tuple[Tuple, ...] = ((5, 3), (3, 3), (3, 3)), + fpfe_paddings: Tuple[Tuple, ...] = ((2, 1), (1, 1), (1, 1)), + fpfe_halving: Tuple[int, ...] = (0, 2, 3), tfa_squeeze_ratio: int = 4, tfa_num_parts: int = 16, embedding_dims: int = 256, @@ -142,8 +143,8 @@ class RGBPartNet(nn.Module): return (x_c_c1, x_p_c1), None @staticmethod - def _pose_sim_loss(f_p_c1: list[torch.Tensor], - f_p_c2: list[torch.Tensor]) -> torch.Tensor: + def _pose_sim_loss(f_p_c1: List[torch.Tensor], + f_p_c2: List[torch.Tensor]) -> torch.Tensor: f_p_c1_mean = torch.stack(f_p_c1).mean(dim=0) f_p_c2_mean = torch.stack(f_p_c2).mean(dim=0) return F.mse_loss(f_p_c1_mean, f_p_c2_mean) -- cgit v1.2.3 From 24b5968bfc5799e44c9bbbc00e3a9be00f4509ac Mon Sep 17 00:00:00 2001 From: Jordan Gong Date: Mon, 15 Feb 2021 11:08:52 +0800 Subject: Revert "Memory usage improvement" This reverts commit be508061 --- models/rgb_part_net.py | 117 ++++++++++++++++++++++++------------------------- 1 file changed, 57 insertions(+), 60 deletions(-) (limited to 'models/rgb_part_net.py') diff --git a/models/rgb_part_net.py b/models/rgb_part_net.py index c489ec6..260eabd 100644 --- a/models/rgb_part_net.py +++ b/models/rgb_part_net.py @@ -58,67 +58,64 @@ class RGBPartNet(nn.Module): def fc(self, x): return x @ self.fc_mat - def forward(self, x, y=None, is_c1=True): - # Step 1a: Disentangle condition 1 clips - if is_c1: - # n, t, c, h, w - ((x_c, x_p), xrecon_loss, images) = self._disentangle(x, is_c1) - - # Step 2.a: Static Gait Feature Aggregation & HPM - # n, c, h, w - x_c = self.hpm(x_c) - # p, n, c - - # Step 2.b: FPFE & TFA (Dynamic Gait Feature Aggregation) - # n, t, c, h, w - x_p = self.pn(x_p) - # p, n, c - - # Step 3: Cat feature map together and fc - x = torch.cat((x_c, x_p)) - x = self.fc(x) - - if self.training: - y = y.T - hpm_ba_trip = self.hpm_ba_trip( - x[:self.hpm_num_parts], y[:self.hpm_num_parts] - ) - pn_ba_trip = self.pn_ba_trip( - x[self.hpm_num_parts:], y[self.hpm_num_parts:] - ) - return (xrecon_loss, hpm_ba_trip, pn_ba_trip), images - else: # evaluating - return x.unsqueeze(1).view(-1) - else: # Step 1b: Disentangle condition 2 clips - return self._disentangle(x, is_c1) - - def _disentangle(self, x_t2, is_c1=True): - if is_c1: # condition 1 - n, t, *_ = x_size = x_t2.size() - device = x_t2.device - if self.training: - (f_a_, f_c_, f_p_), xrecon_loss = self.ae(x_t2, is_c1) - # Decode features - with torch.no_grad(): - x_c = self._decode_cano_feature(f_c_, n, t, device) - x_p = self._decode_pose_feature(f_p_, *x_size, device) - - i_a, i_c, i_p = None, None, None - if self.image_log_on: - i_a = self._decode_appr_feature(f_a_, *x_size, device) - # Continue decoding canonical features - i_c = self.ae.decoder.trans_conv3(x_c) - i_c = torch.sigmoid(self.ae.decoder.trans_conv4(i_c)) - i_p = x_p - - return (x_c, x_p), xrecon_loss, (i_a, i_c, i_p) - else: # evaluating - f_c_, f_p_ = self.ae(x_t2) + def forward(self, x_c1, x_c2=None, y=None): + # Step 1: Disentanglement + # n, t, c, h, w + ((x_c, x_p), losses, images) = self._disentangle(x_c1, x_c2) + + # Step 2.a: Static Gait Feature Aggregation & HPM + # n, c, h, w + x_c = self.hpm(x_c) + # p, n, c + + # Step 2.b: FPFE & TFA (Dynamic Gait Feature Aggregation) + # n, t, c, h, w + x_p = self.pn(x_p) + # p, n, c + + # Step 3: Cat feature map together and fc + x = torch.cat((x_c, x_p)) + x = self.fc(x) + + if self.training: + y = y.T + hpm_ba_trip = self.hpm_ba_trip( + x[:self.hpm_num_parts], y[:self.hpm_num_parts] + ) + pn_ba_trip = self.pn_ba_trip( + x[self.hpm_num_parts:], y[self.hpm_num_parts:] + ) + losses = torch.stack((*losses, hpm_ba_trip, pn_ba_trip)) + return losses, images + else: + return x.unsqueeze(1).view(-1) + + def _disentangle(self, x_c1_t2, x_c2_t2=None): + n, t, c, h, w = x_c1_t2.size() + device = x_c1_t2.device + x_c1_t1 = x_c1_t2[:, torch.randperm(t), :, :, :] + if self.training: + ((f_a_, f_c_, f_p_), losses) = self.ae(x_c1_t2, x_c1_t1, x_c2_t2) + # Decode features + with torch.no_grad(): x_c = self._decode_cano_feature(f_c_, n, t, device) - x_p = self._decode_pose_feature(f_p_, *x_size, device) - return (x_c, x_p), None, None - else: # condition 2 - return self.ae(x_t2, is_c1) + x_p = self._decode_pose_feature(f_p_, n, t, c, h, w, device) + + i_a, i_c, i_p = None, None, None + if self.image_log_on: + i_a = self._decode_appr_feature(f_a_, n, t, c, h, w, device) + # Continue decoding canonical features + i_c = self.ae.decoder.trans_conv3(x_c) + i_c = torch.sigmoid(self.ae.decoder.trans_conv4(i_c)) + i_p = x_p + + return (x_c, x_p), losses, (i_a, i_c, i_p) + + else: # evaluating + f_c_, f_p_ = self.ae(x_c1_t2) + x_c = self._decode_cano_feature(f_c_, n, t, device) + x_p = self._decode_pose_feature(f_p_, n, t, c, h, w, device) + return (x_c, x_p), None, None def _decode_appr_feature(self, f_a_, n, t, c, h, w, device): # Decode appearance features -- cgit v1.2.3