diff options
author | Jordan Gong <jordan.gong@protonmail.com> | 2021-02-09 21:28:38 +0800 |
---|---|---|
committer | Jordan Gong <jordan.gong@protonmail.com> | 2021-02-09 21:28:38 +0800 |
commit | 58ef39d75098bce92654492e09edf1e83033d0c8 (patch) | |
tree | 8af7fe4fb5adfe1b189353dcff4efc38f62cd0c4 /models/auto_encoder.py | |
parent | d380e04df37593e414bd5641db100613fb2ad882 (diff) | |
parent | 916cf90d04e57fee23092c966740fbe94fd92cff (diff) |
Merge branch 'master' into python3.8
# Conflicts:
# models/rgb_part_net.py
Diffstat (limited to 'models/auto_encoder.py')
-rw-r--r-- | models/auto_encoder.py | 43 |
1 files changed, 31 insertions, 12 deletions
diff --git a/models/auto_encoder.py b/models/auto_encoder.py index 69dae4e..7b9b29f 100644 --- a/models/auto_encoder.py +++ b/models/auto_encoder.py @@ -123,23 +123,42 @@ class AutoEncoder(nn.Module): self.decoder = Decoder(embedding_dims, feature_channels, channels) def forward(self, x_c1_t2, x_c1_t1=None, x_c2_t2=None): + n, t, c, h, w = x_c1_t2.size() # x_c1_t2 is the frame for later module - (f_a_c1_t2, f_c_c1_t2, f_p_c1_t2) = self.encoder(x_c1_t2) + x_c1_t2_ = x_c1_t2.view(n * t, c, h, w) + (f_a_c1_t2_, f_c_c1_t2_, f_p_c1_t2_) = self.encoder(x_c1_t2_) if self.training: # t1 is random time step, c2 is another condition - (f_a_c1_t1, f_c_c1_t1, _) = self.encoder(x_c1_t1) - (_, f_c_c2_t2, f_p_c2_t2) = self.encoder(x_c2_t2) - - x_c1_t2_ = self.decoder(f_a_c1_t1, f_c_c1_t1, f_p_c1_t2) - xrecon_loss_t2 = F.mse_loss(x_c1_t2, x_c1_t2_) - cano_cons_loss_t2 = (F.mse_loss(f_c_c1_t1, f_c_c1_t2) - + F.mse_loss(f_c_c1_t2, f_c_c2_t2)) + x_c1_t1 = x_c1_t1.view(n * t, c, h, w) + (f_a_c1_t1_, f_c_c1_t1_, _) = self.encoder(x_c1_t1) + x_c2_t2 = x_c2_t2.view(n * t, c, h, w) + (_, f_c_c2_t2_, f_p_c2_t2_) = self.encoder(x_c2_t2) + + x_c1_t2_pred_ = self.decoder(f_a_c1_t1_, f_c_c1_t1_, f_p_c1_t2_) + x_c1_t2_pred = x_c1_t2_pred_.view(n, t, c, h, w) + + xrecon_loss = torch.stack([ + F.mse_loss(x_c1_t2[:, i, :, :, :], x_c1_t2_pred[:, i, :, :, :]) + for i in range(t) + ]).sum() + + f_c_c1_t1 = f_c_c1_t1_.view(n, t, -1) + f_c_c1_t2 = f_c_c1_t2_.view(n, t, -1) + f_c_c2_t2 = f_c_c2_t2_.view(n, t, -1) + cano_cons_loss = torch.stack([ + F.mse_loss(f_c_c1_t1[:, i, :], f_c_c1_t2[:, i, :]) + + F.mse_loss(f_c_c1_t2[:, i, :], f_c_c2_t2[:, i, :]) + for i in range(t) + ]).mean() + + f_p_c1_t2 = f_p_c1_t2_.view(n, t, -1) + f_p_c2_t2 = f_p_c2_t2_.view(n, t, -1) + pose_sim_loss = F.mse_loss(f_p_c1_t2.mean(1), f_p_c2_t2.mean(1)) return ( - (f_a_c1_t2, f_c_c1_t2, f_p_c1_t2), - (f_p_c1_t2, f_p_c2_t2), - (xrecon_loss_t2, cano_cons_loss_t2) + (f_a_c1_t2_, f_c_c1_t2_, f_p_c1_t2_), + (xrecon_loss, cano_cons_loss, pose_sim_loss * 10) ) else: # evaluating - return f_c_c1_t2, f_p_c1_t2 + return f_c_c1_t2_, f_p_c1_t2_ |