summaryrefslogtreecommitdiff
path: root/models/auto_encoder.py
diff options
context:
space:
mode:
authorJordan Gong <jordan.gong@protonmail.com>2021-02-09 21:29:06 +0800
committerJordan Gong <jordan.gong@protonmail.com>2021-02-09 21:29:06 +0800
commit045fdb1d8f381ef1dafdec33e87fc2b6736615e4 (patch)
tree644053b57c152e3eb8e7e885d87890991740834e /models/auto_encoder.py
parent31e0294cdb2ffd5241c7e85a6e1e98a4ee20ae28 (diff)
parent58ef39d75098bce92654492e09edf1e83033d0c8 (diff)
Merge branch 'python3.8' into python3.7
Diffstat (limited to 'models/auto_encoder.py')
-rw-r--r--models/auto_encoder.py43
1 files changed, 31 insertions, 12 deletions
diff --git a/models/auto_encoder.py b/models/auto_encoder.py
index 69dae4e..7b9b29f 100644
--- a/models/auto_encoder.py
+++ b/models/auto_encoder.py
@@ -123,23 +123,42 @@ class AutoEncoder(nn.Module):
self.decoder = Decoder(embedding_dims, feature_channels, channels)
def forward(self, x_c1_t2, x_c1_t1=None, x_c2_t2=None):
+ n, t, c, h, w = x_c1_t2.size()
# x_c1_t2 is the frame for later module
- (f_a_c1_t2, f_c_c1_t2, f_p_c1_t2) = self.encoder(x_c1_t2)
+ x_c1_t2_ = x_c1_t2.view(n * t, c, h, w)
+ (f_a_c1_t2_, f_c_c1_t2_, f_p_c1_t2_) = self.encoder(x_c1_t2_)
if self.training:
# t1 is random time step, c2 is another condition
- (f_a_c1_t1, f_c_c1_t1, _) = self.encoder(x_c1_t1)
- (_, f_c_c2_t2, f_p_c2_t2) = self.encoder(x_c2_t2)
-
- x_c1_t2_ = self.decoder(f_a_c1_t1, f_c_c1_t1, f_p_c1_t2)
- xrecon_loss_t2 = F.mse_loss(x_c1_t2, x_c1_t2_)
- cano_cons_loss_t2 = (F.mse_loss(f_c_c1_t1, f_c_c1_t2)
- + F.mse_loss(f_c_c1_t2, f_c_c2_t2))
+ x_c1_t1 = x_c1_t1.view(n * t, c, h, w)
+ (f_a_c1_t1_, f_c_c1_t1_, _) = self.encoder(x_c1_t1)
+ x_c2_t2 = x_c2_t2.view(n * t, c, h, w)
+ (_, f_c_c2_t2_, f_p_c2_t2_) = self.encoder(x_c2_t2)
+
+ x_c1_t2_pred_ = self.decoder(f_a_c1_t1_, f_c_c1_t1_, f_p_c1_t2_)
+ x_c1_t2_pred = x_c1_t2_pred_.view(n, t, c, h, w)
+
+ xrecon_loss = torch.stack([
+ F.mse_loss(x_c1_t2[:, i, :, :, :], x_c1_t2_pred[:, i, :, :, :])
+ for i in range(t)
+ ]).sum()
+
+ f_c_c1_t1 = f_c_c1_t1_.view(n, t, -1)
+ f_c_c1_t2 = f_c_c1_t2_.view(n, t, -1)
+ f_c_c2_t2 = f_c_c2_t2_.view(n, t, -1)
+ cano_cons_loss = torch.stack([
+ F.mse_loss(f_c_c1_t1[:, i, :], f_c_c1_t2[:, i, :])
+ + F.mse_loss(f_c_c1_t2[:, i, :], f_c_c2_t2[:, i, :])
+ for i in range(t)
+ ]).mean()
+
+ f_p_c1_t2 = f_p_c1_t2_.view(n, t, -1)
+ f_p_c2_t2 = f_p_c2_t2_.view(n, t, -1)
+ pose_sim_loss = F.mse_loss(f_p_c1_t2.mean(1), f_p_c2_t2.mean(1))
return (
- (f_a_c1_t2, f_c_c1_t2, f_p_c1_t2),
- (f_p_c1_t2, f_p_c2_t2),
- (xrecon_loss_t2, cano_cons_loss_t2)
+ (f_a_c1_t2_, f_c_c1_t2_, f_p_c1_t2_),
+ (xrecon_loss, cano_cons_loss, pose_sim_loss * 10)
)
else: # evaluating
- return f_c_c1_t2, f_p_c1_t2
+ return f_c_c1_t2_, f_p_c1_t2_