From db7edf86af56aa76eaaa2b30f3b6568e401c7ecd Mon Sep 17 00:00:00 2001 From: Jordan Gong Date: Tue, 12 Jan 2021 11:38:14 +0800 Subject: Remove TypeDict for python 3.7 --- config.py | 4 +--- models/model.py | 31 ++++++++++++-------------- utils/configuration.py | 59 -------------------------------------------------- utils/misc.py | 5 ++--- 4 files changed, 17 insertions(+), 82 deletions(-) delete mode 100644 utils/configuration.py diff --git a/config.py b/config.py index 47ded38..cbe5e07 100644 --- a/config.py +++ b/config.py @@ -1,6 +1,4 @@ -from utils.configuration import Configuration - -config: Configuration = { +config = { 'system': { # Disable accelerator 'disable_acc': False, diff --git a/models/model.py b/models/model.py index 1154d7f..7cf6ed0 100644 --- a/models/model.py +++ b/models/model.py @@ -13,9 +13,6 @@ from torch.utils.tensorboard import SummaryWriter from tqdm import tqdm from models.rgb_part_net import RGBPartNet -from utils.configuration import DataloaderConfiguration, \ - HyperparameterConfiguration, DatasetConfiguration, ModelConfiguration, \ - SystemConfiguration from utils.dataset import CASIAB, ClipConditions, ClipViews, ClipClasses from utils.sampler import TripletSampler @@ -23,9 +20,9 @@ from utils.sampler import TripletSampler class Model: def __init__( self, - system_config: SystemConfiguration, - model_config: ModelConfiguration, - hyperparameter_config: HyperparameterConfiguration + system_config: Dict, + model_config: Dict, + hyperparameter_config: Dict ): self.disable_acc = system_config['disable_acc'] if self.disable_acc: @@ -89,11 +86,11 @@ class Model: def fit_all( self, - dataset_config: DatasetConfiguration, + dataset_config: Dict, dataset_selectors: Dict[ str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]] ], - dataloader_config: DataloaderConfiguration, + dataloader_config: Dict, ): for (condition, selector) in dataset_selectors.items(): print(f'Training model {condition} ...') @@ -104,8 +101,8 @@ class Model: def fit( self, - dataset_config: DatasetConfiguration, - dataloader_config: DataloaderConfiguration, + dataset_config: Dict, + dataloader_config: Dict, ): self.is_train = True dataset = self._parse_dataset_config(dataset_config) @@ -184,11 +181,11 @@ class Model: def predict_all( self, iter_: int, - dataset_config: DatasetConfiguration, + dataset_config: dict, dataset_selectors: Dict[ str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]] ], - dataloader_config: DataloaderConfiguration, + dataloader_config: dict, ) -> Dict[str, torch.Tensor]: self.is_train = False # Split gallery and probe dataset @@ -296,7 +293,7 @@ class Model: def _load_pretrained( self, iter_: int, - dataset_config: DatasetConfiguration, + dataset_config: Dict, dataset_selectors: Dict[ str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]] ] @@ -313,8 +310,8 @@ class Model: def _split_gallery_probe( self, - dataset_config: DatasetConfiguration, - dataloader_config: DataloaderConfiguration, + dataset_config: Dict, + dataloader_config: Dict, ) -> Tuple[DataLoader, Dict[str: DataLoader]]: dataset_name = dataset_config.get('name', 'CASIA-B') if dataset_name == 'CASIA-B': @@ -364,7 +361,7 @@ class Model: def _parse_dataset_config( self, - dataset_config: DatasetConfiguration + dataset_config: Dict ) -> Union[CASIAB]: self.train_size = dataset_config.get('train_size', 74) self.in_channels = dataset_config.get('num_input_channels', 3) @@ -385,7 +382,7 @@ class Model: def _parse_dataloader_config( self, dataset: Union[CASIAB], - dataloader_config: DataloaderConfiguration + dataloader_config: Dict ) -> DataLoader: config: Dict = dataloader_config.copy() (self.pr, self.k) = config.pop('batch_size') diff --git a/utils/configuration.py b/utils/configuration.py deleted file mode 100644 index 455abe8..0000000 --- a/utils/configuration.py +++ /dev/null @@ -1,59 +0,0 @@ -from typing import TypedDict, Optional, Union, Tuple, Dict - -from utils.dataset import ClipClasses, ClipConditions, ClipViews - - -class SystemConfiguration(TypedDict): - disable_acc: bool - CUDA_VISIBLE_DEVICES: str - save_dir: str - - -class DatasetConfiguration(TypedDict): - name: str - root_dir: str - train_size: int - num_sampled_frames: int - discard_threshold: int - selector: Optional[Dict[str, Union[ClipClasses, ClipConditions, ClipViews]]] - num_input_channels: int - frame_size: Tuple[int, int] - cache_on: bool - - -class DataloaderConfiguration(TypedDict): - batch_size: Tuple[int, int] - num_workers: int - pin_memory: bool - - -class HyperparameterConfiguration(TypedDict): - ae_feature_channels: int - f_a_c_p_dims: Tuple[int, int, int] - hpm_scales: Tuple[int, ...] - hpm_use_avg_pool: bool - hpm_use_max_pool: bool - fpfe_feature_channels: int - fpfe_kernel_sizes: Tuple[Tuple, ...] - fpfe_paddings: Tuple[Tuple, ...] - fpfe_halving: Tuple[int, ...] - tfa_squeeze_ratio: int - tfa_num_parts: int - embedding_dims: int - triplet_margin: float - lr: int - betas: Tuple[float, float] - - -class ModelConfiguration(TypedDict): - name: str - restore_iter: int - total_iter: int - - -class Configuration(TypedDict): - system: SystemConfiguration - dataset: DatasetConfiguration - dataloader: DataloaderConfiguration - hyperparameter: HyperparameterConfiguration - model: ModelConfiguration diff --git a/utils/misc.py b/utils/misc.py index b850830..c047618 100644 --- a/utils/misc.py +++ b/utils/misc.py @@ -1,9 +1,8 @@ import os +from typing import Dict -from utils.configuration import SystemConfiguration - -def set_visible_cuda(config: SystemConfiguration): +def set_visible_cuda(config: Dict): """Set environment variable CUDA device(s)""" CUDA_VISIBLE_DEVICES = config.get('CUDA_VISIBLE_DEVICES', None) if CUDA_VISIBLE_DEVICES: -- cgit v1.2.3 From 61f79339fd0168bc200117daf0459764b93d0acb Mon Sep 17 00:00:00 2001 From: Jordan Gong Date: Tue, 12 Jan 2021 14:26:03 +0800 Subject: Some type hint fixes --- models/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/model.py b/models/model.py index 617966f..af37bb2 100644 --- a/models/model.py +++ b/models/model.py @@ -182,11 +182,11 @@ class Model: def predict_all( self, iter_: int, - dataset_config: dict, + dataset_config: Dict, dataset_selectors: Dict[ str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]] ], - dataloader_config: dict, + dataloader_config: Dict, ) -> Dict[str, torch.Tensor]: self.is_train = False # Split gallery and probe dataset -- cgit v1.2.3 From 2fa77264fb5002683da902577acea6451e4c2942 Mon Sep 17 00:00:00 2001 From: Jordan Gong Date: Tue, 12 Jan 2021 20:39:39 +0800 Subject: Delete empty files --- utils/configuration.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 utils/configuration.py diff --git a/utils/configuration.py b/utils/configuration.py deleted file mode 100644 index e69de29..0000000 -- cgit v1.2.3 From 42847b721a99350e1eed423dce99574c584d97ef Mon Sep 17 00:00:00 2001 From: Jordan Gong Date: Thu, 14 Jan 2021 17:17:10 +0800 Subject: Update startup script --- startup | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/startup b/startup index c871f54..028f136 100644 --- a/startup +++ b/startup @@ -16,7 +16,7 @@ pip3 install scikit-learn tqdm tensorboard cd /root git clone https://git.jordangong.com/jordangong/gait-recognition.git -cd gait-recognition; git checkout python3.8; cd .. +cd gait-recognition; git checkout python3.7; cd .. mkdir -p gait-recognition/data/CASIA-B-MRCNN wget https://storage.googleapis.com/gait-dataset/CASIA-B-MRCNN-SEG.tar.zst tar -I zstd -xf CASIA-B-MRCNN-SEG.tar.zst -C gait-recognition/data/CASIA-B-MRCNN -- cgit v1.2.3 From 8ed6492faa7d04c19d15bf137ae4d7ef447fd334 Mon Sep 17 00:00:00 2001 From: Jordan Gong Date: Mon, 15 Feb 2021 11:09:25 +0800 Subject: Revert "Memory usage improvement" This reverts commit be508061 --- models/auto_encoder.py | 70 +++++++++++++---------------- models/model.py | 21 +++------ models/rgb_part_net.py | 117 ++++++++++++++++++++++++------------------------- 3 files changed, 92 insertions(+), 116 deletions(-) diff --git a/models/auto_encoder.py b/models/auto_encoder.py index 918a95c..7b9b29f 100644 --- a/models/auto_encoder.py +++ b/models/auto_encoder.py @@ -119,47 +119,32 @@ class AutoEncoder(nn.Module): embedding_dims: Tuple[int, int, int] = (128, 128, 64) ): super().__init__() - self.f_c_c1_t2_ = None - self.f_p_c1_t2_ = None - self.f_c_c1_t1_ = None self.encoder = Encoder(channels, feature_channels, embedding_dims) self.decoder = Decoder(embedding_dims, feature_channels, channels) - def forward(self, x_t2, is_c1=True): - n, t, c, h, w = x_t2.size() - if is_c1: # condition 1 - # x_c1_t2 is the frame for later module - x_c1_t2_ = x_t2.view(n * t, c, h, w) - (f_a_c1_t2_, self.f_c_c1_t2_, self.f_p_c1_t2_) \ - = self.encoder(x_c1_t2_) - - if self.training: - # t1 is random time step - x_c1_t1 = x_t2[:, torch.randperm(t), :, :, :] - x_c1_t1_ = x_c1_t1.view(n * t, c, h, w) - (f_a_c1_t1_, self.f_c_c1_t1_, _) = self.encoder(x_c1_t1_) - - x_c1_t2_pred_ = self.decoder( - f_a_c1_t1_, self.f_c_c1_t1_, self.f_p_c1_t2_ - ) - x_c1_t2_pred = x_c1_t2_pred_.view(n, t, c, h, w) - - xrecon_loss = torch.stack([ - F.mse_loss(x_t2[:, i, :, :, :], x_c1_t2_pred[:, i, :, :, :]) - for i in range(t) - ]).sum() - - return ((f_a_c1_t2_, self.f_c_c1_t2_, self.f_p_c1_t2_), - xrecon_loss) - else: # evaluating - return self.f_c_c1_t2_, self.f_p_c1_t2_ - else: # condition 2 - # c2 is another condition - x_c2_t2_ = x_t2.view(n * t, c, h, w) - (_, f_c_c2_t2_, f_p_c2_t2_) = self.encoder(x_c2_t2_) - - f_c_c1_t1 = self.f_c_c1_t1_.view(n, t, -1) - f_c_c1_t2 = self.f_c_c1_t2_.view(n, t, -1) + def forward(self, x_c1_t2, x_c1_t1=None, x_c2_t2=None): + n, t, c, h, w = x_c1_t2.size() + # x_c1_t2 is the frame for later module + x_c1_t2_ = x_c1_t2.view(n * t, c, h, w) + (f_a_c1_t2_, f_c_c1_t2_, f_p_c1_t2_) = self.encoder(x_c1_t2_) + + if self.training: + # t1 is random time step, c2 is another condition + x_c1_t1 = x_c1_t1.view(n * t, c, h, w) + (f_a_c1_t1_, f_c_c1_t1_, _) = self.encoder(x_c1_t1) + x_c2_t2 = x_c2_t2.view(n * t, c, h, w) + (_, f_c_c2_t2_, f_p_c2_t2_) = self.encoder(x_c2_t2) + + x_c1_t2_pred_ = self.decoder(f_a_c1_t1_, f_c_c1_t1_, f_p_c1_t2_) + x_c1_t2_pred = x_c1_t2_pred_.view(n, t, c, h, w) + + xrecon_loss = torch.stack([ + F.mse_loss(x_c1_t2[:, i, :, :, :], x_c1_t2_pred[:, i, :, :, :]) + for i in range(t) + ]).sum() + + f_c_c1_t1 = f_c_c1_t1_.view(n, t, -1) + f_c_c1_t2 = f_c_c1_t2_.view(n, t, -1) f_c_c2_t2 = f_c_c2_t2_.view(n, t, -1) cano_cons_loss = torch.stack([ F.mse_loss(f_c_c1_t1[:, i, :], f_c_c1_t2[:, i, :]) @@ -167,8 +152,13 @@ class AutoEncoder(nn.Module): for i in range(t) ]).mean() - f_p_c1_t2 = self.f_p_c1_t2_.view(n, t, -1) + f_p_c1_t2 = f_p_c1_t2_.view(n, t, -1) f_p_c2_t2 = f_p_c2_t2_.view(n, t, -1) pose_sim_loss = F.mse_loss(f_p_c1_t2.mean(1), f_p_c2_t2.mean(1)) - return cano_cons_loss, pose_sim_loss * 10 + return ( + (f_a_c1_t2_, f_c_c1_t2_, f_p_c1_t2_), + (xrecon_loss, cano_cons_loss, pose_sim_loss * 10) + ) + else: # evaluating + return f_c_c1_t2_, f_p_c1_t2_ diff --git a/models/model.py b/models/model.py index 199e371..4f5a234 100644 --- a/models/model.py +++ b/models/model.py @@ -179,7 +179,7 @@ class Model: # Training start start_time = datetime.now() running_loss = torch.zeros(5, device=self.device) - print(f"{'Time':^8} {'Iter':^5} {'Loss':^5}", + print(f"{'Time':^8} {'Iter':^5} {'Loss':^6}", f"{'Xrecon':^8} {'CanoCons':^8} {'PoseSim':^8}", f"{'BATripH':^8} {'BATripP':^8} {'LRs':^19}") for (batch_c1, batch_c2) in dataloader: @@ -187,21 +187,12 @@ class Model: # Zero the parameter gradients self.optimizer.zero_grad() # forward + backward + optimize - # Feed data twice in order to reduce memory usage x_c1 = batch_c1['clip'].to(self.device) + x_c2 = batch_c2['clip'].to(self.device) y = batch_c1['label'].to(self.device) # Duplicate labels for each part y = y.unsqueeze(1).repeat(1, self.rgb_pn.num_total_parts) - # Feed condition 1 clips first - losses, images = self.rgb_pn(x_c1, y) - (xrecon_loss, hpm_ba_trip, pn_ba_trip) = losses - x_c2 = batch_c2['clip'].to(self.device) - # Then feed condition 2 clips - cano_cons_loss, pose_sim_loss = self.rgb_pn(x_c2, is_c1=False) - losses = torch.stack(( - xrecon_loss, cano_cons_loss, pose_sim_loss, - hpm_ba_trip, pn_ba_trip - )) + losses, images = self.rgb_pn(x_c1, x_c2, y) loss = losses.sum() loss.backward() self.optimizer.step() @@ -231,9 +222,7 @@ class Model: self.writer.add_images( 'Canonical image', i_c, self.curr_iter ) - for (i, (o, a, p)) in enumerate(zip( - batch_c1['clip'], i_a, i_p - )): + for (i, (o, a, p)) in enumerate(zip(x_c1, i_a, i_p)): self.writer.add_images( f'Original image/batch {i}', o, self.curr_iter ) @@ -247,7 +236,7 @@ class Model: remaining_minute, second = divmod(time_used.seconds, 60) hour, minute = divmod(remaining_minute, 60) print(f'{hour:02}:{minute:02}:{second:02}', - f'{self.curr_iter:5d} {running_loss.sum() / 100:5.3f}', + f'{self.curr_iter:5d} {running_loss.sum() / 100:6.3f}', '{:f} {:f} {:f} {:f} {:f}'.format(*running_loss / 100), '{:.3e} {:.3e}'.format(lrs[0], lrs[1])) running_loss.zero_() diff --git a/models/rgb_part_net.py b/models/rgb_part_net.py index c489ec6..260eabd 100644 --- a/models/rgb_part_net.py +++ b/models/rgb_part_net.py @@ -58,67 +58,64 @@ class RGBPartNet(nn.Module): def fc(self, x): return x @ self.fc_mat - def forward(self, x, y=None, is_c1=True): - # Step 1a: Disentangle condition 1 clips - if is_c1: - # n, t, c, h, w - ((x_c, x_p), xrecon_loss, images) = self._disentangle(x, is_c1) - - # Step 2.a: Static Gait Feature Aggregation & HPM - # n, c, h, w - x_c = self.hpm(x_c) - # p, n, c - - # Step 2.b: FPFE & TFA (Dynamic Gait Feature Aggregation) - # n, t, c, h, w - x_p = self.pn(x_p) - # p, n, c - - # Step 3: Cat feature map together and fc - x = torch.cat((x_c, x_p)) - x = self.fc(x) - - if self.training: - y = y.T - hpm_ba_trip = self.hpm_ba_trip( - x[:self.hpm_num_parts], y[:self.hpm_num_parts] - ) - pn_ba_trip = self.pn_ba_trip( - x[self.hpm_num_parts:], y[self.hpm_num_parts:] - ) - return (xrecon_loss, hpm_ba_trip, pn_ba_trip), images - else: # evaluating - return x.unsqueeze(1).view(-1) - else: # Step 1b: Disentangle condition 2 clips - return self._disentangle(x, is_c1) - - def _disentangle(self, x_t2, is_c1=True): - if is_c1: # condition 1 - n, t, *_ = x_size = x_t2.size() - device = x_t2.device - if self.training: - (f_a_, f_c_, f_p_), xrecon_loss = self.ae(x_t2, is_c1) - # Decode features - with torch.no_grad(): - x_c = self._decode_cano_feature(f_c_, n, t, device) - x_p = self._decode_pose_feature(f_p_, *x_size, device) - - i_a, i_c, i_p = None, None, None - if self.image_log_on: - i_a = self._decode_appr_feature(f_a_, *x_size, device) - # Continue decoding canonical features - i_c = self.ae.decoder.trans_conv3(x_c) - i_c = torch.sigmoid(self.ae.decoder.trans_conv4(i_c)) - i_p = x_p - - return (x_c, x_p), xrecon_loss, (i_a, i_c, i_p) - else: # evaluating - f_c_, f_p_ = self.ae(x_t2) + def forward(self, x_c1, x_c2=None, y=None): + # Step 1: Disentanglement + # n, t, c, h, w + ((x_c, x_p), losses, images) = self._disentangle(x_c1, x_c2) + + # Step 2.a: Static Gait Feature Aggregation & HPM + # n, c, h, w + x_c = self.hpm(x_c) + # p, n, c + + # Step 2.b: FPFE & TFA (Dynamic Gait Feature Aggregation) + # n, t, c, h, w + x_p = self.pn(x_p) + # p, n, c + + # Step 3: Cat feature map together and fc + x = torch.cat((x_c, x_p)) + x = self.fc(x) + + if self.training: + y = y.T + hpm_ba_trip = self.hpm_ba_trip( + x[:self.hpm_num_parts], y[:self.hpm_num_parts] + ) + pn_ba_trip = self.pn_ba_trip( + x[self.hpm_num_parts:], y[self.hpm_num_parts:] + ) + losses = torch.stack((*losses, hpm_ba_trip, pn_ba_trip)) + return losses, images + else: + return x.unsqueeze(1).view(-1) + + def _disentangle(self, x_c1_t2, x_c2_t2=None): + n, t, c, h, w = x_c1_t2.size() + device = x_c1_t2.device + x_c1_t1 = x_c1_t2[:, torch.randperm(t), :, :, :] + if self.training: + ((f_a_, f_c_, f_p_), losses) = self.ae(x_c1_t2, x_c1_t1, x_c2_t2) + # Decode features + with torch.no_grad(): x_c = self._decode_cano_feature(f_c_, n, t, device) - x_p = self._decode_pose_feature(f_p_, *x_size, device) - return (x_c, x_p), None, None - else: # condition 2 - return self.ae(x_t2, is_c1) + x_p = self._decode_pose_feature(f_p_, n, t, c, h, w, device) + + i_a, i_c, i_p = None, None, None + if self.image_log_on: + i_a = self._decode_appr_feature(f_a_, n, t, c, h, w, device) + # Continue decoding canonical features + i_c = self.ae.decoder.trans_conv3(x_c) + i_c = torch.sigmoid(self.ae.decoder.trans_conv4(i_c)) + i_p = x_p + + return (x_c, x_p), losses, (i_a, i_c, i_p) + + else: # evaluating + f_c_, f_p_ = self.ae(x_c1_t2) + x_c = self._decode_cano_feature(f_c_, n, t, device) + x_p = self._decode_pose_feature(f_p_, n, t, c, h, w, device) + return (x_c, x_p), None, None def _decode_appr_feature(self, f_a_, n, t, c, h, w, device): # Decode appearance features -- cgit v1.2.3 From d0edaf066d2c7069bce9af4102ec4c75ee4e2911 Mon Sep 17 00:00:00 2001 From: Jordan Gong Date: Thu, 18 Feb 2021 20:57:37 +0800 Subject: Type hint fixes --- models/model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/model.py b/models/model.py index a3b6d3a..4bf8dbc 100644 --- a/models/model.py +++ b/models/model.py @@ -279,11 +279,11 @@ class Model: def transform( self, iters: Tuple[int], - dataset_config: DatasetConfiguration, + dataset_config: Dict, dataset_selectors: Dict[ str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]] ], - dataloader_config: DataloaderConfiguration + dataloader_config: Dict ): self.is_train = False # Split gallery and probe dataset -- cgit v1.2.3