diff options
-rw-r--r-- | config.py | 16 | ||||
-rw-r--r-- | models/auto_encoder.py | 19 | ||||
-rw-r--r-- | models/model.py | 33 | ||||
-rw-r--r-- | models/rgb_part_net.py | 5 | ||||
-rw-r--r-- | requirements.txt | 2 | ||||
-rw-r--r-- | test/model.py | 19 |
6 files changed, 59 insertions, 35 deletions
@@ -31,7 +31,7 @@ config = { # Batch size (pr, k) # `pr` denotes number of persons # `k` denotes number of sequences per person - 'batch_size': (8, 16), + 'batch_size': (4, 8), # Number of workers of Dataloader 'num_workers': 4, # Faster data transfer from RAM to GPU if enabled @@ -67,13 +67,23 @@ config = { 'triplet_margin': 0.2, }, 'optimizer': { + # Global parameters # Initial learning rate of Adam Optimizer 'lr': 1e-4, # Coefficients used for computing running averages of # gradient and its square - 'betas': (0.9, 0.999), + # 'betas': (0.9, 0.999), + # Term added to the denominator + # 'eps': 1e-8, # Weight decay (L2 penalty) - 'weight_decay': 0.001, + # 'weight_decay': 0, + # Use AMSGrad or not + # 'amsgrad': False, + + # Local parameters (override global ones) + 'auto_encoder': { + 'weight_decay': 0.001 + }, }, 'scheduler': { # Period of learning rate decay diff --git a/models/auto_encoder.py b/models/auto_encoder.py index 64c52e3..befd2d3 100644 --- a/models/auto_encoder.py +++ b/models/auto_encoder.py @@ -134,15 +134,16 @@ class AutoEncoder(nn.Module): # x_c1_t2 is the frame for later module (f_a_c1_t2, f_c_c1_t2, f_p_c1_t2) = self.encoder(x_c1_t2) - # Decode canonical features for HPM - x_c_c1_t2 = self.decoder( - torch.zeros_like(f_a_c1_t2), f_c_c1_t2, torch.zeros_like(f_p_c1_t2), - no_trans_conv=True - ) - # Decode pose features for Part Net - x_p_c1_t2 = self.decoder( - torch.zeros_like(f_a_c1_t2), torch.zeros_like(f_c_c1_t2), f_p_c1_t2 - ) + with torch.no_grad(): + # Decode canonical features for HPM + x_c_c1_t2 = self.decoder( + torch.zeros_like(f_a_c1_t2), f_c_c1_t2, torch.zeros_like(f_p_c1_t2), + no_trans_conv=True + ) + # Decode pose features for Part Net + x_p_c1_t2 = self.decoder( + torch.zeros_like(f_a_c1_t2), torch.zeros_like(f_c_c1_t2), f_p_c1_t2 + ) if self.training: # t1 is random time step, c2 is another condition diff --git a/models/model.py b/models/model.py index aa45d66..5a8c0e8 100644 --- a/models/model.py +++ b/models/model.py @@ -130,12 +130,21 @@ class Model: dataloader = self._parse_dataloader_config(dataset, dataloader_config) # Prepare for model, optimizer and scheduler model_hp = self.hp.get('model', {}) - optim_hp = self.hp.get('optimizer', {}) + optim_hp: Dict = self.hp.get('optimizer', {}).copy() + ae_optim_hp = optim_hp.pop('auto_encoder', {}) + pn_optim_hp = optim_hp.pop('part_net', {}) + hpm_optim_hp = optim_hp.pop('hpm', {}) + fc_optim_hp = optim_hp.pop('fc', {}) sched_hp = self.hp.get('scheduler', {}) self.rgb_pn = RGBPartNet(self.train_size, self.in_channels, **model_hp) # Try to accelerate computation using CUDA or others self.rgb_pn = self.rgb_pn.to(self.device) - self.optimizer = optim.Adam(self.rgb_pn.parameters(), **optim_hp) + self.optimizer = optim.Adam([ + {'params': self.rgb_pn.ae.parameters(), **ae_optim_hp}, + {'params': self.rgb_pn.pn.parameters(), **pn_optim_hp}, + {'params': self.rgb_pn.hpm.parameters(), **hpm_optim_hp}, + {'params': self.rgb_pn.fc_mat, **fc_optim_hp}, + ], **optim_hp) self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, **sched_hp) self.writer = SummaryWriter(self._log_name) @@ -152,6 +161,9 @@ class Model: # Training start start_time = datetime.now() + running_loss = torch.zeros(4).to(self.device) + print(f"{'Iter':^5} {'Loss':^6} {'Xrecon':^8} {'PoseSim':^8}", + f"{'CanoCons':^8} {'BATrip':^8} {'LR':^9}") for (batch_c1, batch_c2) in dataloader: self.curr_iter += 1 # Zero the parameter gradients @@ -160,24 +172,27 @@ class Model: x_c1 = batch_c1['clip'].to(self.device) x_c2 = batch_c2['clip'].to(self.device) y = batch_c1['label'].to(self.device) - loss, metrics = self.rgb_pn(x_c1, x_c2, y) + losses = self.rgb_pn(x_c1, x_c2, y) + loss = losses.sum() loss.backward() self.optimizer.step() # Step scheduler self.scheduler.step() + # Statistics and checkpoint + running_loss += losses.detach() # Write losses to TensorBoard - self.writer.add_scalar('Loss/all', loss.item(), self.curr_iter) + self.writer.add_scalar('Loss/all', loss, self.curr_iter) self.writer.add_scalars('Loss/details', dict(zip([ 'Cross reconstruction loss', 'Pose similarity loss', 'Canonical consistency loss', 'Batch All triplet loss' - ], metrics)), self.curr_iter) + ], losses)), self.curr_iter) if self.curr_iter % 100 == 0: - print('{0:5d} loss: {1:6.3f}'.format(self.curr_iter, loss), - '(xrecon = {:f}, pose_sim = {:f},' - ' cano_cons = {:f}, ba_trip = {:f})'.format(*metrics), - 'lr:', self.scheduler.get_last_lr()[0]) + print(f'{self.curr_iter:5d} {running_loss.sum() / 100:6.3f}', + '{:f} {:f} {:f} {:f}'.format(*running_loss / 100), + f'{self.scheduler.get_last_lr()[0]:.3e}') + running_loss.zero_() if self.curr_iter % 1000 == 0: torch.save({ diff --git a/models/rgb_part_net.py b/models/rgb_part_net.py index 95a3f2e..326ec81 100644 --- a/models/rgb_part_net.py +++ b/models/rgb_part_net.py @@ -81,9 +81,8 @@ class RGBPartNet(nn.Module): if self.training: batch_all_triplet_loss = self.ba_triplet_loss(x, y) - losses = (*losses, batch_all_triplet_loss) - loss = torch.sum(torch.stack(losses)) - return loss, [loss.item() for loss in losses] + losses = torch.stack((*losses, batch_all_triplet_loss)) + return losses else: return x.unsqueeze(1).view(-1) diff --git a/requirements.txt b/requirements.txt index deaa0d4..f64085a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,4 @@ torchvision~=0.8.0a0+ecf4e9c numpy~=1.19.4 tqdm~=4.56.0 Pillow~=8.1.0 -scikit-learn~=0.23.2
\ No newline at end of file +scikit-learn~=0.24.0
\ No newline at end of file diff --git a/test/model.py b/test/model.py index f7fc57e..6fbcf00 100644 --- a/test/model.py +++ b/test/model.py @@ -14,12 +14,12 @@ def test_default_signature(): model._parse_dataloader_config(casiab, conf['dataloader']) assert model._log_name == os.path.join( 'runs', 'logs', 'RGB-GaitPart_80000_64_128_128_64_1_2_4_True_True_32_5_' - '3_3_3_3_3_2_1_1_1_1_1_0_2_3_4_16_256_0.2_0.0001_0.9_' - '0.999_0.001_500_0.9_CASIA-B_74_30_15_3_64_32_8_16') + '3_3_3_3_3_2_1_1_1_1_1_0_2_3_4_16_256_0.2_0.0001_0.001_' + '500_0.9_CASIA-B_74_30_15_3_64_32_8_16') assert model._checkpoint_sig == ('RGB-GaitPart_0_80000_64_128_128_64_1_2_4_' 'True_True_32_5_3_3_3_3_3_2_1_1_1_1_1_0_2_' - '3_4_16_256_0.2_0.0001_0.9_0.999_0.001_' - '500_0.9_CASIA-B_74_30_15_3_64_32_8_16') + '3_4_16_256_0.2_0.0001_0.001_500_0.9_' + 'CASIA-B_74_30_15_3_64_32_8_16') def test_default_signature_with_selector(): @@ -31,11 +31,10 @@ def test_default_signature_with_selector(): model._parse_dataloader_config(casiab, conf['dataloader']) assert model._log_name == os.path.join( 'runs', 'logs', 'RGB-GaitPart_80000_64_128_128_64_1_2_4_True_True_32_5_' - '3_3_3_3_3_2_1_1_1_1_1_0_2_3_4_16_256_0.2_0.0001_0.9_' - '0.999_0.001_500_0.9_CASIA-B_74_30_15_3_64_32_bg-0\\d_' - 'nm-0\\d_8_16') + '3_3_3_3_3_2_1_1_1_1_1_0_2_3_4_16_256_0.2_0.0001_0.001_' + '500_0.9_CASIA-B_74_30_15_3_64_32_bg-0\\d_nm-0\\d_8_16') assert model._checkpoint_sig == ('RGB-GaitPart_0_80000_64_128_128_64_1_2_4_' 'True_True_32_5_3_3_3_3_3_2_1_1_1_1_1_0_2_' - '3_4_16_256_0.2_0.0001_0.9_0.999_0.001_' - '500_0.9_CASIA-B_74_30_15_3_64_32_bg-0\\d_' - 'nm-0\\d_8_16') + '3_4_16_256_0.2_0.0001_0.001_500_0.9_' + 'CASIA-B_74_30_15_3_64_32_bg-0\\d_nm-0\\d_' + '8_16') |