From 98b6e6dc3be6f88abb72e351c8f2da2b23b8ab85 Mon Sep 17 00:00:00 2001 From: Jordan Gong Date: Thu, 7 Jan 2021 19:55:00 +0800 Subject: Type hint for python version lower than 3.9 --- models/auto_encoder.py | 8 +++++--- models/hpm.py | 4 +++- models/layers.py | 20 ++++++++++---------- models/model.py | 14 +++++++------- models/part_net.py | 13 +++++++------ models/rgb_part_net.py | 15 ++++++++------- 6 files changed, 40 insertions(+), 34 deletions(-) (limited to 'models') diff --git a/models/auto_encoder.py b/models/auto_encoder.py index 7c1f7ef..1e7c323 100644 --- a/models/auto_encoder.py +++ b/models/auto_encoder.py @@ -1,3 +1,5 @@ +from typing import Tuple + import torch import torch.nn as nn import torch.nn.functional as F @@ -12,7 +14,7 @@ class Encoder(nn.Module): self, in_channels: int = 3, feature_channels: int = 64, - output_dims: tuple[int, int, int] = (128, 128, 64) + output_dims: Tuple[int, int, int] = (128, 128, 64) ): super().__init__() self.feature_channels = feature_channels @@ -67,7 +69,7 @@ class Decoder(nn.Module): def __init__( self, - input_dims: tuple[int, int, int] = (128, 128, 64), + input_dims: Tuple[int, int, int] = (128, 128, 64), feature_channels: int = 64, out_channels: int = 3, ): @@ -116,7 +118,7 @@ class AutoEncoder(nn.Module): num_class: int = 74, channels: int = 3, feature_channels: int = 64, - embedding_dims: tuple[int, int, int] = (128, 128, 64) + embedding_dims: Tuple[int, int, int] = (128, 128, 64) ): super().__init__() self.encoder = Encoder(channels, feature_channels, embedding_dims) diff --git a/models/hpm.py b/models/hpm.py index 66503e3..7505ed7 100644 --- a/models/hpm.py +++ b/models/hpm.py @@ -1,3 +1,5 @@ +from typing import Tuple + import torch import torch.nn as nn @@ -9,7 +11,7 @@ class HorizontalPyramidMatching(nn.Module): self, in_channels: int, out_channels: int = 128, - scales: tuple[int, ...] = (1, 2, 4), + scales: Tuple[int, ...] = (1, 2, 4), use_avg_pool: bool = True, use_max_pool: bool = True, **kwargs diff --git a/models/layers.py b/models/layers.py index a9f04b3..7f2ccec 100644 --- a/models/layers.py +++ b/models/layers.py @@ -1,4 +1,4 @@ -from typing import Union +from typing import Union, Tuple import torch import torch.nn as nn @@ -10,7 +10,7 @@ class BasicConv2d(nn.Module): self, in_channels: int, out_channels: int, - kernel_size: Union[int, tuple[int, int]], + kernel_size: Union[int, Tuple[int, int]], **kwargs ): super().__init__() @@ -29,7 +29,7 @@ class VGGConv2d(BasicConv2d): self, in_channels: int, out_channels: int, - kernel_size: Union[int, tuple[int, int]] = 3, + kernel_size: Union[int, Tuple[int, int]] = 3, padding: int = 1, **kwargs ): @@ -47,7 +47,7 @@ class BasicConvTranspose2d(nn.Module): self, in_channels: int, out_channels: int, - kernel_size: Union[int, tuple[int, int]], + kernel_size: Union[int, Tuple[int, int]], **kwargs ): super().__init__() @@ -66,7 +66,7 @@ class DCGANConvTranspose2d(BasicConvTranspose2d): self, in_channels: int, out_channels: int, - kernel_size: Union[int, tuple[int, int]] = 4, + kernel_size: Union[int, Tuple[int, int]] = 4, stride: int = 2, padding: int = 1, is_last_layer: bool = False, @@ -104,7 +104,7 @@ class FocalConv2d(BasicConv2d): self, in_channels: int, out_channels: int, - kernel_size: Union[int, tuple[int, int]], + kernel_size: Union[int, Tuple[int, int]], halving: int, **kwargs ): @@ -124,8 +124,8 @@ class FocalConv2dBlock(nn.Module): self, in_channels: int, out_channels: int, - kernel_sizes: tuple[int, int], - paddings: tuple[int, int], + kernel_sizes: Tuple[int, int], + paddings: Tuple[int, int], halving: int, use_pool: bool = True, **kwargs @@ -151,7 +151,7 @@ class BasicConv1d(nn.Module): self, in_channels: int, out_channels: int, - kernel_size: Union[int, tuple[int]], + kernel_size: Union[int, Tuple[int]], **kwargs ): super().__init__() @@ -167,7 +167,7 @@ class HorizontalPyramidPooling(BasicConv2d): self, in_channels: int, out_channels: int, - kernel_size: Union[int, tuple[int, int]] = 1, + kernel_size: Union[int, Tuple[int, int]] = 1, use_avg_pool: bool = True, use_max_pool: bool = True, **kwargs diff --git a/models/model.py b/models/model.py index 1dc0f23..4deced0 100644 --- a/models/model.py +++ b/models/model.py @@ -1,5 +1,5 @@ import os -from typing import Union, Optional +from typing import Union, Optional, Tuple, List import numpy as np import torch @@ -195,9 +195,9 @@ class Model: def _batch_splitter( self, - batch: list[dict[str, Union[np.int64, str, torch.Tensor]]] - ) -> tuple[dict[str, Union[list[str], torch.Tensor]], - dict[str, Union[list[str], torch.Tensor]]]: + batch: List[dict[str, Union[np.int64, str, torch.Tensor]]] + ) -> Tuple[dict[str, Union[List[str], torch.Tensor]], + dict[str, Union[List[str], torch.Tensor]]]: """ Disentanglement need two random conditions, this function will split pr * k * 2 samples to 2 dicts each containing pr * k @@ -212,7 +212,7 @@ class Model: def _make_signature(self, config: dict, - popped_keys: Optional[list] = None) -> str: + popped_keys: Optional[List] = None) -> str: _config = config.copy() if popped_keys: for key in popped_keys: @@ -220,12 +220,12 @@ class Model: return self._gen_sig(list(_config.values())) - def _gen_sig(self, values: Union[tuple, list, str, int, float]) -> str: + def _gen_sig(self, values: Union[Tuple, List, str, int, float]) -> str: strings = [] for v in values: if isinstance(v, str): strings.append(v) - elif isinstance(v, (tuple, list)): + elif isinstance(v, (Tuple, List)): strings.append(self._gen_sig(v)) else: strings.append(str(v)) diff --git a/models/part_net.py b/models/part_net.py index ac7c434..6d8d4e1 100644 --- a/models/part_net.py +++ b/models/part_net.py @@ -1,4 +1,5 @@ import copy +from typing import Tuple import torch import torch.nn as nn @@ -12,9 +13,9 @@ class FrameLevelPartFeatureExtractor(nn.Module): self, in_channels: int = 3, feature_channels: int = 32, - kernel_sizes: tuple[tuple, ...] = ((5, 3), (3, 3), (3, 3)), - paddings: tuple[tuple, ...] = ((2, 1), (1, 1), (1, 1)), - halving: tuple[int, ...] = (0, 2, 3) + kernel_sizes: Tuple[Tuple, ...] = ((5, 3), (3, 3), (3, 3)), + paddings: Tuple[Tuple, ...] = ((2, 1), (1, 1), (1, 1)), + halving: Tuple[int, ...] = (0, 2, 3) ): super().__init__() num_blocks = len(kernel_sizes) @@ -112,9 +113,9 @@ class PartNet(nn.Module): self, in_channels: int = 3, feature_channels: int = 32, - kernel_sizes: tuple[tuple, ...] = ((5, 3), (3, 3), (3, 3)), - paddings: tuple[tuple, ...] = ((2, 1), (1, 1), (1, 1)), - halving: tuple[int, ...] = (0, 2, 3), + kernel_sizes: Tuple[Tuple, ...] = ((5, 3), (3, 3), (3, 3)), + paddings: Tuple[Tuple, ...] = ((2, 1), (1, 1), (1, 1)), + halving: Tuple[int, ...] = (0, 2, 3), squeeze_ratio: int = 4, num_part: int = 16 ): diff --git a/models/rgb_part_net.py b/models/rgb_part_net.py index 3037da0..39cbed6 100644 --- a/models/rgb_part_net.py +++ b/models/rgb_part_net.py @@ -1,4 +1,5 @@ import random +from typing import Tuple, List import torch import torch.nn as nn @@ -16,14 +17,14 @@ class RGBPartNet(nn.Module): num_class: int = 74, ae_in_channels: int = 3, ae_feature_channels: int = 64, - f_a_c_p_dims: tuple[int, int, int] = (128, 128, 64), - hpm_scales: tuple[int, ...] = (1, 2, 4), + f_a_c_p_dims: Tuple[int, int, int] = (128, 128, 64), + hpm_scales: Tuple[int, ...] = (1, 2, 4), hpm_use_avg_pool: bool = True, hpm_use_max_pool: bool = True, fpfe_feature_channels: int = 32, - fpfe_kernel_sizes: tuple[tuple, ...] = ((5, 3), (3, 3), (3, 3)), - fpfe_paddings: tuple[tuple, ...] = ((2, 1), (1, 1), (1, 1)), - fpfe_halving: tuple[int, ...] = (0, 2, 3), + fpfe_kernel_sizes: Tuple[Tuple, ...] = ((5, 3), (3, 3), (3, 3)), + fpfe_paddings: Tuple[Tuple, ...] = ((2, 1), (1, 1), (1, 1)), + fpfe_halving: Tuple[int, ...] = (0, 2, 3), tfa_squeeze_ratio: int = 4, tfa_num_parts: int = 16, embedding_dims: int = 256, @@ -142,8 +143,8 @@ class RGBPartNet(nn.Module): return (x_c_c1, x_p_c1), None @staticmethod - def _pose_sim_loss(f_p_c1: list[torch.Tensor], - f_p_c2: list[torch.Tensor]) -> torch.Tensor: + def _pose_sim_loss(f_p_c1: List[torch.Tensor], + f_p_c2: List[torch.Tensor]) -> torch.Tensor: f_p_c1_mean = torch.stack(f_p_c1).mean(dim=0) f_p_c2_mean = torch.stack(f_p_c2).mean(dim=0) return F.mse_loss(f_p_c1_mean, f_p_c2_mean) -- cgit v1.2.3 From e5a73abd80578aa5e46d8d444466d1e6346ec6ec Mon Sep 17 00:00:00 2001 From: Jordan Gong Date: Thu, 7 Jan 2021 19:55:00 +0800 Subject: Type hint for python version lower than 3.9 --- models/model.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'models') diff --git a/models/model.py b/models/model.py index 4deced0..725988a 100644 --- a/models/model.py +++ b/models/model.py @@ -1,5 +1,5 @@ import os -from typing import Union, Optional, Tuple, List +from typing import Union, Optional, Tuple, List, Dict import numpy as np import torch @@ -166,7 +166,7 @@ class Model: popped_keys=['root_dir', 'cache_on'] ) self.log_name = '_'.join((self.log_name, self._dataset_sig)) - config: dict = dataset_config.copy() + config: Dict = dataset_config.copy() name = config.pop('name') if name == 'CASIA-B': return CASIAB(**config, is_train=self.is_train) @@ -180,7 +180,7 @@ class Model: dataset: Union[CASIAB], dataloader_config: DataloaderConfiguration ) -> DataLoader: - config: dict = dataloader_config.copy() + config: Dict = dataloader_config.copy() if self.is_train: (self.pr, self.k) = config.pop('batch_size') self.log_name = '_'.join((self.log_name, str(self.pr), str(self.k))) @@ -195,9 +195,9 @@ class Model: def _batch_splitter( self, - batch: List[dict[str, Union[np.int64, str, torch.Tensor]]] - ) -> Tuple[dict[str, Union[List[str], torch.Tensor]], - dict[str, Union[List[str], torch.Tensor]]]: + batch: List[Dict[str, Union[np.int64, str, torch.Tensor]]] + ) -> Tuple[Dict[str, Union[List[str], torch.Tensor]], + Dict[str, Union[List[str], torch.Tensor]]]: """ Disentanglement need two random conditions, this function will split pr * k * 2 samples to 2 dicts each containing pr * k @@ -211,7 +211,7 @@ class Model: return default_collate(_batch[0]), default_collate(_batch[1]) def _make_signature(self, - config: dict, + config: Dict, popped_keys: Optional[List] = None) -> str: _config = config.copy() if popped_keys: -- cgit v1.2.3 From d750dd9dafe3cda3b1331ad2bfecb53c8c2b1267 Mon Sep 17 00:00:00 2001 From: Jordan Gong Date: Thu, 21 Jan 2021 23:47:11 +0800 Subject: A type hint fix --- models/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'models') diff --git a/models/model.py b/models/model.py index bed28a5..f4604c8 100644 --- a/models/model.py +++ b/models/model.py @@ -133,7 +133,7 @@ class Model: dataloader = self._parse_dataloader_config(dataset, dataloader_config) # Prepare for model, optimizer and scheduler model_hp = self.hp.get('model', {}) - optim_hp: dict = self.hp.get('optimizer', {}).copy() + optim_hp: Dict = self.hp.get('optimizer', {}).copy() ae_optim_hp = optim_hp.pop('auto_encoder', {}) pn_optim_hp = optim_hp.pop('part_net', {}) hpm_optim_hp = optim_hp.pop('hpm', {}) -- cgit v1.2.3 From a040400d7caa267d4bfbe8e5520568806f92b3d4 Mon Sep 17 00:00:00 2001 From: Jordan Gong Date: Sat, 23 Jan 2021 00:43:20 +0800 Subject: Type hint fixes --- models/model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'models') diff --git a/models/model.py b/models/model.py index 8992914..1dd195e 100644 --- a/models/model.py +++ b/models/model.py @@ -220,7 +220,7 @@ class Model: def predict_all( self, - iters: tuple[int], + iters: Tuple[int], dataset_config: DatasetConfiguration, dataset_selectors: Dict[ str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]] @@ -266,7 +266,7 @@ class Model: return self._evaluate(gallery_samples, probe_samples) - def _get_eval_sample(self, sample: dict[str, Union[list, torch.Tensor]]): + def _get_eval_sample(self, sample: Dict[str, Union[List, torch.Tensor]]): label = sample.pop('label').item() clip = sample.pop('clip').to(self.device) feature = self.rgb_pn(clip).detach() @@ -326,7 +326,7 @@ class Model: def _load_pretrained( self, - iters: tuple[int], + iters: Tuple[int], dataset_config: DatasetConfiguration, dataset_selectors: Dict[ str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]] -- cgit v1.2.3 From 24b5968bfc5799e44c9bbbc00e3a9be00f4509ac Mon Sep 17 00:00:00 2001 From: Jordan Gong Date: Mon, 15 Feb 2021 11:08:52 +0800 Subject: Revert "Memory usage improvement" This reverts commit be508061 --- models/auto_encoder.py | 70 +++++++++++++---------------- models/model.py | 21 +++------ models/rgb_part_net.py | 117 ++++++++++++++++++++++++------------------------- 3 files changed, 92 insertions(+), 116 deletions(-) (limited to 'models') diff --git a/models/auto_encoder.py b/models/auto_encoder.py index 918a95c..7b9b29f 100644 --- a/models/auto_encoder.py +++ b/models/auto_encoder.py @@ -119,47 +119,32 @@ class AutoEncoder(nn.Module): embedding_dims: Tuple[int, int, int] = (128, 128, 64) ): super().__init__() - self.f_c_c1_t2_ = None - self.f_p_c1_t2_ = None - self.f_c_c1_t1_ = None self.encoder = Encoder(channels, feature_channels, embedding_dims) self.decoder = Decoder(embedding_dims, feature_channels, channels) - def forward(self, x_t2, is_c1=True): - n, t, c, h, w = x_t2.size() - if is_c1: # condition 1 - # x_c1_t2 is the frame for later module - x_c1_t2_ = x_t2.view(n * t, c, h, w) - (f_a_c1_t2_, self.f_c_c1_t2_, self.f_p_c1_t2_) \ - = self.encoder(x_c1_t2_) - - if self.training: - # t1 is random time step - x_c1_t1 = x_t2[:, torch.randperm(t), :, :, :] - x_c1_t1_ = x_c1_t1.view(n * t, c, h, w) - (f_a_c1_t1_, self.f_c_c1_t1_, _) = self.encoder(x_c1_t1_) - - x_c1_t2_pred_ = self.decoder( - f_a_c1_t1_, self.f_c_c1_t1_, self.f_p_c1_t2_ - ) - x_c1_t2_pred = x_c1_t2_pred_.view(n, t, c, h, w) - - xrecon_loss = torch.stack([ - F.mse_loss(x_t2[:, i, :, :, :], x_c1_t2_pred[:, i, :, :, :]) - for i in range(t) - ]).sum() - - return ((f_a_c1_t2_, self.f_c_c1_t2_, self.f_p_c1_t2_), - xrecon_loss) - else: # evaluating - return self.f_c_c1_t2_, self.f_p_c1_t2_ - else: # condition 2 - # c2 is another condition - x_c2_t2_ = x_t2.view(n * t, c, h, w) - (_, f_c_c2_t2_, f_p_c2_t2_) = self.encoder(x_c2_t2_) - - f_c_c1_t1 = self.f_c_c1_t1_.view(n, t, -1) - f_c_c1_t2 = self.f_c_c1_t2_.view(n, t, -1) + def forward(self, x_c1_t2, x_c1_t1=None, x_c2_t2=None): + n, t, c, h, w = x_c1_t2.size() + # x_c1_t2 is the frame for later module + x_c1_t2_ = x_c1_t2.view(n * t, c, h, w) + (f_a_c1_t2_, f_c_c1_t2_, f_p_c1_t2_) = self.encoder(x_c1_t2_) + + if self.training: + # t1 is random time step, c2 is another condition + x_c1_t1 = x_c1_t1.view(n * t, c, h, w) + (f_a_c1_t1_, f_c_c1_t1_, _) = self.encoder(x_c1_t1) + x_c2_t2 = x_c2_t2.view(n * t, c, h, w) + (_, f_c_c2_t2_, f_p_c2_t2_) = self.encoder(x_c2_t2) + + x_c1_t2_pred_ = self.decoder(f_a_c1_t1_, f_c_c1_t1_, f_p_c1_t2_) + x_c1_t2_pred = x_c1_t2_pred_.view(n, t, c, h, w) + + xrecon_loss = torch.stack([ + F.mse_loss(x_c1_t2[:, i, :, :, :], x_c1_t2_pred[:, i, :, :, :]) + for i in range(t) + ]).sum() + + f_c_c1_t1 = f_c_c1_t1_.view(n, t, -1) + f_c_c1_t2 = f_c_c1_t2_.view(n, t, -1) f_c_c2_t2 = f_c_c2_t2_.view(n, t, -1) cano_cons_loss = torch.stack([ F.mse_loss(f_c_c1_t1[:, i, :], f_c_c1_t2[:, i, :]) @@ -167,8 +152,13 @@ class AutoEncoder(nn.Module): for i in range(t) ]).mean() - f_p_c1_t2 = self.f_p_c1_t2_.view(n, t, -1) + f_p_c1_t2 = f_p_c1_t2_.view(n, t, -1) f_p_c2_t2 = f_p_c2_t2_.view(n, t, -1) pose_sim_loss = F.mse_loss(f_p_c1_t2.mean(1), f_p_c2_t2.mean(1)) - return cano_cons_loss, pose_sim_loss * 10 + return ( + (f_a_c1_t2_, f_c_c1_t2_, f_p_c1_t2_), + (xrecon_loss, cano_cons_loss, pose_sim_loss * 10) + ) + else: # evaluating + return f_c_c1_t2_, f_p_c1_t2_ diff --git a/models/model.py b/models/model.py index 3aeb754..9748e46 100644 --- a/models/model.py +++ b/models/model.py @@ -182,7 +182,7 @@ class Model: # Training start start_time = datetime.now() running_loss = torch.zeros(5, device=self.device) - print(f"{'Time':^8} {'Iter':^5} {'Loss':^5}", + print(f"{'Time':^8} {'Iter':^5} {'Loss':^6}", f"{'Xrecon':^8} {'CanoCons':^8} {'PoseSim':^8}", f"{'BATripH':^8} {'BATripP':^8} {'LRs':^19}") for (batch_c1, batch_c2) in dataloader: @@ -190,21 +190,12 @@ class Model: # Zero the parameter gradients self.optimizer.zero_grad() # forward + backward + optimize - # Feed data twice in order to reduce memory usage x_c1 = batch_c1['clip'].to(self.device) + x_c2 = batch_c2['clip'].to(self.device) y = batch_c1['label'].to(self.device) # Duplicate labels for each part y = y.unsqueeze(1).repeat(1, self.rgb_pn.num_total_parts) - # Feed condition 1 clips first - losses, images = self.rgb_pn(x_c1, y) - (xrecon_loss, hpm_ba_trip, pn_ba_trip) = losses - x_c2 = batch_c2['clip'].to(self.device) - # Then feed condition 2 clips - cano_cons_loss, pose_sim_loss = self.rgb_pn(x_c2, is_c1=False) - losses = torch.stack(( - xrecon_loss, cano_cons_loss, pose_sim_loss, - hpm_ba_trip, pn_ba_trip - )) + losses, images = self.rgb_pn(x_c1, x_c2, y) loss = losses.sum() loss.backward() self.optimizer.step() @@ -234,9 +225,7 @@ class Model: self.writer.add_images( 'Canonical image', i_c, self.curr_iter ) - for (i, (o, a, p)) in enumerate(zip( - batch_c1['clip'], i_a, i_p - )): + for (i, (o, a, p)) in enumerate(zip(x_c1, i_a, i_p)): self.writer.add_images( f'Original image/batch {i}', o, self.curr_iter ) @@ -250,7 +239,7 @@ class Model: remaining_minute, second = divmod(time_used.seconds, 60) hour, minute = divmod(remaining_minute, 60) print(f'{hour:02}:{minute:02}:{second:02}', - f'{self.curr_iter:5d} {running_loss.sum() / 100:5.3f}', + f'{self.curr_iter:5d} {running_loss.sum() / 100:6.3f}', '{:f} {:f} {:f} {:f} {:f}'.format(*running_loss / 100), '{:.3e} {:.3e}'.format(lrs[0], lrs[1])) running_loss.zero_() diff --git a/models/rgb_part_net.py b/models/rgb_part_net.py index c489ec6..260eabd 100644 --- a/models/rgb_part_net.py +++ b/models/rgb_part_net.py @@ -58,67 +58,64 @@ class RGBPartNet(nn.Module): def fc(self, x): return x @ self.fc_mat - def forward(self, x, y=None, is_c1=True): - # Step 1a: Disentangle condition 1 clips - if is_c1: - # n, t, c, h, w - ((x_c, x_p), xrecon_loss, images) = self._disentangle(x, is_c1) - - # Step 2.a: Static Gait Feature Aggregation & HPM - # n, c, h, w - x_c = self.hpm(x_c) - # p, n, c - - # Step 2.b: FPFE & TFA (Dynamic Gait Feature Aggregation) - # n, t, c, h, w - x_p = self.pn(x_p) - # p, n, c - - # Step 3: Cat feature map together and fc - x = torch.cat((x_c, x_p)) - x = self.fc(x) - - if self.training: - y = y.T - hpm_ba_trip = self.hpm_ba_trip( - x[:self.hpm_num_parts], y[:self.hpm_num_parts] - ) - pn_ba_trip = self.pn_ba_trip( - x[self.hpm_num_parts:], y[self.hpm_num_parts:] - ) - return (xrecon_loss, hpm_ba_trip, pn_ba_trip), images - else: # evaluating - return x.unsqueeze(1).view(-1) - else: # Step 1b: Disentangle condition 2 clips - return self._disentangle(x, is_c1) - - def _disentangle(self, x_t2, is_c1=True): - if is_c1: # condition 1 - n, t, *_ = x_size = x_t2.size() - device = x_t2.device - if self.training: - (f_a_, f_c_, f_p_), xrecon_loss = self.ae(x_t2, is_c1) - # Decode features - with torch.no_grad(): - x_c = self._decode_cano_feature(f_c_, n, t, device) - x_p = self._decode_pose_feature(f_p_, *x_size, device) - - i_a, i_c, i_p = None, None, None - if self.image_log_on: - i_a = self._decode_appr_feature(f_a_, *x_size, device) - # Continue decoding canonical features - i_c = self.ae.decoder.trans_conv3(x_c) - i_c = torch.sigmoid(self.ae.decoder.trans_conv4(i_c)) - i_p = x_p - - return (x_c, x_p), xrecon_loss, (i_a, i_c, i_p) - else: # evaluating - f_c_, f_p_ = self.ae(x_t2) + def forward(self, x_c1, x_c2=None, y=None): + # Step 1: Disentanglement + # n, t, c, h, w + ((x_c, x_p), losses, images) = self._disentangle(x_c1, x_c2) + + # Step 2.a: Static Gait Feature Aggregation & HPM + # n, c, h, w + x_c = self.hpm(x_c) + # p, n, c + + # Step 2.b: FPFE & TFA (Dynamic Gait Feature Aggregation) + # n, t, c, h, w + x_p = self.pn(x_p) + # p, n, c + + # Step 3: Cat feature map together and fc + x = torch.cat((x_c, x_p)) + x = self.fc(x) + + if self.training: + y = y.T + hpm_ba_trip = self.hpm_ba_trip( + x[:self.hpm_num_parts], y[:self.hpm_num_parts] + ) + pn_ba_trip = self.pn_ba_trip( + x[self.hpm_num_parts:], y[self.hpm_num_parts:] + ) + losses = torch.stack((*losses, hpm_ba_trip, pn_ba_trip)) + return losses, images + else: + return x.unsqueeze(1).view(-1) + + def _disentangle(self, x_c1_t2, x_c2_t2=None): + n, t, c, h, w = x_c1_t2.size() + device = x_c1_t2.device + x_c1_t1 = x_c1_t2[:, torch.randperm(t), :, :, :] + if self.training: + ((f_a_, f_c_, f_p_), losses) = self.ae(x_c1_t2, x_c1_t1, x_c2_t2) + # Decode features + with torch.no_grad(): x_c = self._decode_cano_feature(f_c_, n, t, device) - x_p = self._decode_pose_feature(f_p_, *x_size, device) - return (x_c, x_p), None, None - else: # condition 2 - return self.ae(x_t2, is_c1) + x_p = self._decode_pose_feature(f_p_, n, t, c, h, w, device) + + i_a, i_c, i_p = None, None, None + if self.image_log_on: + i_a = self._decode_appr_feature(f_a_, n, t, c, h, w, device) + # Continue decoding canonical features + i_c = self.ae.decoder.trans_conv3(x_c) + i_c = torch.sigmoid(self.ae.decoder.trans_conv4(i_c)) + i_p = x_p + + return (x_c, x_p), losses, (i_a, i_c, i_p) + + else: # evaluating + f_c_, f_p_ = self.ae(x_c1_t2) + x_c = self._decode_cano_feature(f_c_, n, t, device) + x_p = self._decode_pose_feature(f_p_, n, t, c, h, w, device) + return (x_c, x_p), None, None def _decode_appr_feature(self, f_a_, n, t, c, h, w, device): # Decode appearance features -- cgit v1.2.3