diff options
-rw-r--r-- | .idea/csv-plugin.xml | 16 | ||||
-rw-r--r-- | config.py | 6 | ||||
-rw-r--r-- | models/auto_encoder.py | 15 | ||||
-rw-r--r-- | models/model.py | 50 | ||||
-rw-r--r-- | models/rgb_part_net.py | 8 |
5 files changed, 67 insertions, 28 deletions
diff --git a/.idea/csv-plugin.xml b/.idea/csv-plugin.xml new file mode 100644 index 0000000..5e5cec1 --- /dev/null +++ b/.idea/csv-plugin.xml @@ -0,0 +1,16 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="CsvFileAttributes"> + <option name="attributeMap"> + <map> + <entry key="/models/model.py"> + <value> + <Attribute> + <option name="separator" value="," /> + </Attribute> + </value> + </entry> + </map> + </option> + </component> +</project>
\ No newline at end of file @@ -5,7 +5,7 @@ config: Configuration = { # Disable accelerator 'disable_acc': False, # GPU(s) used in training or testing if available - 'CUDA_VISIBLE_DEVICES': '0', + 'CUDA_VISIBLE_DEVICES': '0,1', # Directory used in training or testing for temporary storage 'save_dir': 'runs', # Recorde disentangled image or not @@ -32,14 +32,14 @@ config: Configuration = { # Resolution after resize, can be divided 16 'frame_size': (64, 48), # Cache dataset or not - 'cache_on': False, + 'cache_on': True, }, # Dataloader settings 'dataloader': { # Batch size (pr, k) # `pr` denotes number of persons # `k` denotes number of sequences per person - 'batch_size': (4, 6), + 'batch_size': (6, 8), # Number of workers of Dataloader 'num_workers': 4, # Faster data transfer from RAM to GPU if enabled diff --git a/models/auto_encoder.py b/models/auto_encoder.py index 4fece69..0694ff1 100644 --- a/models/auto_encoder.py +++ b/models/auto_encoder.py @@ -151,27 +151,18 @@ class AutoEncoder(nn.Module): x_c1_t2_pred_ = self.decoder(f_a_c1_t1_, f_c_c1_t1_, f_p_c1_t2_) x_c1_t2_pred = x_c1_t2_pred_.view(n, t, c, h, w) - xrecon_loss = torch.stack([ - F.mse_loss(x_c1_t2[:, i, :, :, :], x_c1_t2_pred[:, i, :, :, :]) - for i in range(t) - ]).sum() - f_c_c1_t1 = f_c_c1_t1_.view(n, t, -1) f_c_c1_t2 = f_c_c1_t2_.view(n, t, -1) f_c_c2_t2 = f_c_c2_t2_.view(n, t, -1) - cano_cons_loss = torch.stack([ - F.mse_loss(f_c_c1_t1[:, i, :], f_c_c1_t2[:, i, :]) - + F.mse_loss(f_c_c1_t2[:, i, :], f_c_c2_t2[:, i, :]) - for i in range(t) - ]).mean() f_p_c1_t2 = f_p_c1_t2_.view(n, t, -1) f_p_c2_t2 = f_p_c2_t2_.view(n, t, -1) - pose_sim_loss = F.mse_loss(f_p_c1_t2.mean(1), f_p_c2_t2.mean(1)) return ( (f_a_c1_t2_, f_c_c1_t2_, f_p_c1_t2_), - (xrecon_loss, cano_cons_loss, pose_sim_loss * 10) + (x_c1_t2_pred, + (f_c_c1_t1, f_c_c1_t2, f_c_c2_t2), + (f_p_c1_t2, f_p_c2_t2)) ) else: # evaluating return f_c_c1_t2_, f_p_c1_t2_ diff --git a/models/model.py b/models/model.py index ceadb92..9cac5e5 100644 --- a/models/model.py +++ b/models/model.py @@ -196,18 +196,21 @@ class Model: triplet_is_hard, triplet_is_mean, None ) + num_sampled_frames = dataset_config.get('num_sampled_frames', 30) self.num_pairs = (self.pr*self.k-1) * (self.pr*self.k) // 2 self.num_pos_pairs = (self.k*(self.k-1)//2) * self.pr # Try to accelerate computation using CUDA or others + self.rgb_pn = nn.DataParallel(self.rgb_pn) self.rgb_pn = self.rgb_pn.to(self.device) + self.triplet_loss_hpm = nn.DataParallel(self.triplet_loss_hpm) self.triplet_loss_hpm = self.triplet_loss_hpm.to(self.device) + self.triplet_loss_pn = nn.DataParallel(self.triplet_loss_pn) self.triplet_loss_pn = self.triplet_loss_pn.to(self.device) - self.optimizer = optim.Adam([ - {'params': self.rgb_pn.ae.parameters(), **ae_optim_hp}, - {'params': self.rgb_pn.hpm.parameters(), **hpm_optim_hp}, - {'params': self.rgb_pn.pn.parameters(), **pn_optim_hp}, + {'params': self.rgb_pn.module.ae.parameters(), **ae_optim_hp}, + {'params': self.rgb_pn.module.hpm.parameters(), **hpm_optim_hp}, + {'params': self.rgb_pn.module.pn.parameters(), **pn_optim_hp}, ], **optim_hp) # Scheduler @@ -259,7 +262,11 @@ class Model: # forward + backward + optimize x_c1 = batch_c1['clip'].to(self.device) x_c2 = batch_c2['clip'].to(self.device) - embed_c, embed_p, ae_losses, images = self.rgb_pn(x_c1, x_c2) + embed_c, embed_p, images, f_loss = self.rgb_pn(x_c1, x_c2) + ae_losses = self._disentangling_loss( + x_c1, f_loss, num_sampled_frames + ) + embed_c, embed_p = embed_c.transpose(0, 1), embed_p.transpose(0, 1) y = batch_c1['label'].to(self.device) losses, hpm_result, pn_result = self._classification_loss( embed_c, embed_p, ae_losses, y @@ -307,7 +314,12 @@ class Model: x_c1 = batch_c1['clip'].to(self.device) x_c2 = batch_c2['clip'].to(self.device) with torch.no_grad(): - embed_c, embed_p, ae_losses, _ = self.rgb_pn(x_c1, x_c2) + embed_c, embed_p, _, f_loss = self.rgb_pn(x_c1, x_c2) + ae_losses = self._disentangling_loss( + x_c1, f_loss, num_sampled_frames + ) + embed_c = embed_c.transpose(0, 1) + embed_p = embed_p.transpose(0, 1) y = batch_c1['label'].to(self.device) losses, hpm_result, pn_result = self._classification_loss( embed_c, embed_p, ae_losses, y @@ -333,14 +345,33 @@ class Model: self.writer.close() + @staticmethod + def _disentangling_loss(x_c1, feature_for_loss, num_sampled_frames): + x_c1_pred = feature_for_loss[0] + xrecon_loss = torch.stack([ + F.mse_loss(x_c1_pred[:, i, :, :, :], x_c1[:, i, :, :, :]) + for i in range(num_sampled_frames) + ]).sum() + f_c_c1_t1, f_c_c1_t2, f_c_c2_t2 = feature_for_loss[1] + cano_cons_loss = torch.stack([ + F.mse_loss(f_c_c1_t1[:, i, :], f_c_c1_t2[:, i, :]) + + F.mse_loss(f_c_c1_t2[:, i, :], f_c_c2_t2[:, i, :]) + for i in range(num_sampled_frames) + ]).mean() + f_p_c1_t2, f_p_c2_t2 = feature_for_loss[2] + pose_sim_loss = F.mse_loss( + f_p_c1_t2.mean(1), f_p_c2_t2.mean(1) + ) * 10 + return xrecon_loss, cano_cons_loss, pose_sim_loss + def _classification_loss(self, embed_c, embed_p, ae_losses, y): # Duplicate labels for each part - y_triplet = y.repeat(self.rgb_pn.num_parts, 1) + y_triplet = y.repeat(self.rgb_pn.module.num_parts, 1) hpm_result = self.triplet_loss_hpm( - embed_c, y_triplet[:self.rgb_pn.hpm.num_parts] + embed_c, y_triplet[:self.rgb_pn.module.hpm.num_parts] ) pn_result = self.triplet_loss_pn( - embed_p, y_triplet[self.rgb_pn.hpm.num_parts:] + embed_p, y_triplet[self.rgb_pn.module.hpm.num_parts:] ) losses = torch.stack(( *ae_losses, @@ -471,6 +502,7 @@ class Model: model_hp.pop('triplet_margins', None) self.rgb_pn = RGBPartNet(self.in_channels, self.in_size, **model_hp) # Try to accelerate computation using CUDA or others + self.rgb_pn = nn.DataParallel(self.rgb_pn) self.rgb_pn = self.rgb_pn.to(self.device) self.rgb_pn.eval() diff --git a/models/rgb_part_net.py b/models/rgb_part_net.py index 4a82da3..5d2c142 100644 --- a/models/rgb_part_net.py +++ b/models/rgb_part_net.py @@ -42,7 +42,7 @@ class RGBPartNet(nn.Module): def forward(self, x_c1, x_c2=None): # Step 1: Disentanglement # n, t, c, h, w - ((x_c, x_p), ae_losses, images) = self._disentangle(x_c1, x_c2) + ((x_c, x_p), images, f_loss) = self._disentangle(x_c1, x_c2) # Step 2.a: Static Gait Feature Aggregation & HPM # n, c, h, w @@ -55,7 +55,7 @@ class RGBPartNet(nn.Module): # p, n, d if self.training: - return x_c, x_p, ae_losses, images + return x_c.transpose(0, 1), x_p.transpose(0, 1), images, f_loss else: return x_c, x_p @@ -64,7 +64,7 @@ class RGBPartNet(nn.Module): device = x_c1_t2.device if self.training: x_c1_t1 = x_c1_t2[:, torch.randperm(t), :, :, :] - ((f_a_, f_c_, f_p_), losses) = self.ae(x_c1_t2, x_c1_t1, x_c2_t2) + (f_a_, f_c_, f_p_), f_loss = self.ae(x_c1_t2, x_c1_t1, x_c2_t2) # Decode features x_c = self._decode_cano_feature(f_c_, n, t, device) x_p_ = self._decode_pose_feature(f_p_, n, t, device) @@ -81,7 +81,7 @@ class RGBPartNet(nn.Module): i_p_ = torch.sigmoid(self.ae.decoder.trans_conv4(i_p_)) i_p = i_p_.view(n, t, c, h, w) - return (x_c, x_p), losses, (i_a, i_c, i_p) + return (x_c, x_p), (i_a, i_c, i_p), f_loss else: # evaluating f_c_, f_p_ = self.ae(x_c1_t2) |