From 507e1d163aaa6ea4be23e7f08ff6ce0ef58c830b Mon Sep 17 00:00:00 2001
From: Jordan Gong <jordan.gong@protonmail.com>
Date: Sat, 23 Jan 2021 22:19:51 +0800
Subject: Remove the third term in canonical consistency loss

---
 models/auto_encoder.py | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

(limited to 'models/auto_encoder.py')

diff --git a/models/auto_encoder.py b/models/auto_encoder.py
index 36be868..35cb629 100644
--- a/models/auto_encoder.py
+++ b/models/auto_encoder.py
@@ -113,7 +113,6 @@ class Decoder(nn.Module):
 class AutoEncoder(nn.Module):
     def __init__(
             self,
-            num_class: int = 74,
             channels: int = 3,
             feature_channels: int = 64,
             embedding_dims: tuple[int, int, int] = (128, 128, 64)
@@ -122,25 +121,23 @@ class AutoEncoder(nn.Module):
         self.encoder = Encoder(channels, feature_channels, embedding_dims)
         self.decoder = Decoder(embedding_dims, feature_channels, channels)
 
-        f_c_dim = embedding_dims[1]
-        self.classifier = nn.Sequential(
-            nn.LeakyReLU(0.2, inplace=True),
-            BasicLinear(f_c_dim, num_class)
-        )
-
-    def forward(self, x_c1_t2, x_c1_t1=None, x_c2_t2=None, y=None):
+    def forward(self, x_c1_t2, x_c1_t1=None, x_c2_t2=None):
         # x_c1_t2 is the frame for later module
         (f_a_c1_t2, f_c_c1_t2, f_p_c1_t2) = self.encoder(x_c1_t2)
 
         with torch.no_grad():
             # Decode canonical features for HPM
             x_c_c1_t2 = self.decoder(
-                torch.zeros_like(f_a_c1_t2), f_c_c1_t2, torch.zeros_like(f_p_c1_t2),
+                torch.zeros_like(f_a_c1_t2),
+                f_c_c1_t2,
+                torch.zeros_like(f_p_c1_t2),
                 no_trans_conv=True
             )
             # Decode pose features for Part Net
             x_p_c1_t2 = self.decoder(
-                torch.zeros_like(f_a_c1_t2), torch.zeros_like(f_c_c1_t2), f_p_c1_t2
+                torch.zeros_like(f_a_c1_t2),
+                torch.zeros_like(f_c_c1_t2),
+                f_p_c1_t2
             )
 
         if self.training:
@@ -150,11 +147,8 @@ class AutoEncoder(nn.Module):
 
             x_c1_t2_ = self.decoder(f_a_c1_t1, f_c_c1_t1, f_p_c1_t2)
             xrecon_loss_t2 = F.mse_loss(x_c1_t2, x_c1_t2_)
-
-            y_ = self.classifier(f_c_c1_t2.contiguous())
             cano_cons_loss_t2 = (F.mse_loss(f_c_c1_t1, f_c_c1_t2)
-                                 + F.mse_loss(f_c_c1_t2, f_c_c2_t2)
-                                 + F.cross_entropy(y_, y))
+                                 + F.mse_loss(f_c_c1_t2, f_c_c2_t2))
 
             return (
                 (x_c_c1_t2, x_p_c1_t2),
-- 
cgit v1.2.3


From 99ddd7c142a4ec97cb8bd14b204651790b3cf4ee Mon Sep 17 00:00:00 2001
From: Jordan Gong <jordan.gong@protonmail.com>
Date: Mon, 8 Feb 2021 18:11:25 +0800
Subject: Code refactoring, modifications and new features

1. Decode features outside of auto-encoder
2. Turn off HPM 1x1 conv by default
3. Change canonical feature map size from `feature_channels * 8 x 4 x 2` to `feature_channels * 2 x 16 x 8`
4. Use mean of canonical embeddings instead of mean of static features
5. Calculate static and dynamic loss separately
6. Calculate mean of parts in triplet loss instead of sum of parts
7. Add switch to log disentangled images
8. Change default configuration
---
 models/auto_encoder.py | 26 +++++---------------------
 1 file changed, 5 insertions(+), 21 deletions(-)

(limited to 'models/auto_encoder.py')

diff --git a/models/auto_encoder.py b/models/auto_encoder.py
index 35cb629..f04ffdb 100644
--- a/models/auto_encoder.py
+++ b/models/auto_encoder.py
@@ -95,15 +95,14 @@ class Decoder(nn.Module):
         self.trans_conv4 = DCGANConvTranspose2d(feature_channels, out_channels,
                                                 is_last_layer=True)
 
-    def forward(self, f_appearance, f_canonical, f_pose, no_trans_conv=False):
+    def forward(self, f_appearance, f_canonical, f_pose, cano_only=False):
         x = torch.cat((f_appearance, f_canonical, f_pose), dim=1)
         x = self.fc(x)
         x = F.relu(x.view(-1, self.feature_channels * 8, 4, 2), inplace=True)
-        # Decode canonical features without transpose convolutions
-        if no_trans_conv:
-            return x
         x = self.trans_conv1(x)
         x = self.trans_conv2(x)
+        if cano_only:
+            return x
         x = self.trans_conv3(x)
         x = torch.sigmoid(self.trans_conv4(x))
 
@@ -125,21 +124,6 @@ class AutoEncoder(nn.Module):
         # x_c1_t2 is the frame for later module
         (f_a_c1_t2, f_c_c1_t2, f_p_c1_t2) = self.encoder(x_c1_t2)
 
-        with torch.no_grad():
-            # Decode canonical features for HPM
-            x_c_c1_t2 = self.decoder(
-                torch.zeros_like(f_a_c1_t2),
-                f_c_c1_t2,
-                torch.zeros_like(f_p_c1_t2),
-                no_trans_conv=True
-            )
-            # Decode pose features for Part Net
-            x_p_c1_t2 = self.decoder(
-                torch.zeros_like(f_a_c1_t2),
-                torch.zeros_like(f_c_c1_t2),
-                f_p_c1_t2
-            )
-
         if self.training:
             # t1 is random time step, c2 is another condition
             (f_a_c1_t1, f_c_c1_t1, _) = self.encoder(x_c1_t1)
@@ -151,9 +135,9 @@ class AutoEncoder(nn.Module):
                                  + F.mse_loss(f_c_c1_t2, f_c_c2_t2))
 
             return (
-                (x_c_c1_t2, x_p_c1_t2),
+                (f_a_c1_t2, f_c_c1_t2, f_p_c1_t2),
                 (f_p_c1_t2, f_p_c2_t2),
                 (xrecon_loss_t2, cano_cons_loss_t2)
             )
         else:  # evaluating
-            return x_c_c1_t2, x_p_c1_t2
+            return f_c_c1_t2, f_p_c1_t2
-- 
cgit v1.2.3