From 507e1d163aaa6ea4be23e7f08ff6ce0ef58c830b Mon Sep 17 00:00:00 2001
From: Jordan Gong <jordan.gong@protonmail.com>
Date: Sat, 23 Jan 2021 22:19:51 +0800
Subject: Remove the third term in canonical consistency loss

---
 models/auto_encoder.py | 22 ++++++++--------------
 models/model.py        |  4 +---
 models/rgb_part_net.py |  9 ++++-----
 3 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/models/auto_encoder.py b/models/auto_encoder.py
index 36be868..35cb629 100644
--- a/models/auto_encoder.py
+++ b/models/auto_encoder.py
@@ -113,7 +113,6 @@ class Decoder(nn.Module):
 class AutoEncoder(nn.Module):
     def __init__(
             self,
-            num_class: int = 74,
             channels: int = 3,
             feature_channels: int = 64,
             embedding_dims: tuple[int, int, int] = (128, 128, 64)
@@ -122,25 +121,23 @@ class AutoEncoder(nn.Module):
         self.encoder = Encoder(channels, feature_channels, embedding_dims)
         self.decoder = Decoder(embedding_dims, feature_channels, channels)
 
-        f_c_dim = embedding_dims[1]
-        self.classifier = nn.Sequential(
-            nn.LeakyReLU(0.2, inplace=True),
-            BasicLinear(f_c_dim, num_class)
-        )
-
-    def forward(self, x_c1_t2, x_c1_t1=None, x_c2_t2=None, y=None):
+    def forward(self, x_c1_t2, x_c1_t1=None, x_c2_t2=None):
         # x_c1_t2 is the frame for later module
         (f_a_c1_t2, f_c_c1_t2, f_p_c1_t2) = self.encoder(x_c1_t2)
 
         with torch.no_grad():
             # Decode canonical features for HPM
             x_c_c1_t2 = self.decoder(
-                torch.zeros_like(f_a_c1_t2), f_c_c1_t2, torch.zeros_like(f_p_c1_t2),
+                torch.zeros_like(f_a_c1_t2),
+                f_c_c1_t2,
+                torch.zeros_like(f_p_c1_t2),
                 no_trans_conv=True
             )
             # Decode pose features for Part Net
             x_p_c1_t2 = self.decoder(
-                torch.zeros_like(f_a_c1_t2), torch.zeros_like(f_c_c1_t2), f_p_c1_t2
+                torch.zeros_like(f_a_c1_t2),
+                torch.zeros_like(f_c_c1_t2),
+                f_p_c1_t2
             )
 
         if self.training:
@@ -150,11 +147,8 @@ class AutoEncoder(nn.Module):
 
             x_c1_t2_ = self.decoder(f_a_c1_t1, f_c_c1_t1, f_p_c1_t2)
             xrecon_loss_t2 = F.mse_loss(x_c1_t2, x_c1_t2_)
-
-            y_ = self.classifier(f_c_c1_t2.contiguous())
             cano_cons_loss_t2 = (F.mse_loss(f_c_c1_t1, f_c_c1_t2)
-                                 + F.mse_loss(f_c_c1_t2, f_c_c2_t2)
-                                 + F.cross_entropy(y_, y))
+                                 + F.mse_loss(f_c_c1_t2, f_c_c2_t2))
 
             return (
                 (x_c_c1_t2, x_p_c1_t2),
diff --git a/models/model.py b/models/model.py
index cccb6c4..ddb715d 100644
--- a/models/model.py
+++ b/models/model.py
@@ -54,7 +54,6 @@ class Model:
         self.total_iters = self.meta.get('total_iters', (80000, 80000, 80000))
 
         self.is_train: bool = True
-        self.train_size: int = 74
         self.in_channels: int = 3
         self.pr: Optional[int] = None
         self.k: Optional[int] = None
@@ -147,7 +146,7 @@ class Model:
         hpm_optim_hp = optim_hp.pop('hpm', {})
         fc_optim_hp = optim_hp.pop('fc', {})
         sched_hp = self.hp.get('scheduler', {})
-        self.rgb_pn = RGBPartNet(self.train_size, self.in_channels, **model_hp)
+        self.rgb_pn = RGBPartNet(self.in_channels, **model_hp)
         # Try to accelerate computation using CUDA or others
         self.rgb_pn = self.rgb_pn.to(self.device)
         self.optimizer = optim.Adam([
@@ -409,7 +408,6 @@ class Model:
             self,
             dataset_config: DatasetConfiguration
     ) -> Union[CASIAB]:
-        self.train_size = dataset_config.get('train_size', 74)
         self.in_channels = dataset_config.get('num_input_channels', 3)
         self._dataset_sig = self._make_signature(
             dataset_config,
diff --git a/models/rgb_part_net.py b/models/rgb_part_net.py
index 2cc0958..755d5dc 100644
--- a/models/rgb_part_net.py
+++ b/models/rgb_part_net.py
@@ -13,7 +13,6 @@ from utils.triplet_loss import BatchAllTripletLoss
 class RGBPartNet(nn.Module):
     def __init__(
             self,
-            num_class: int = 74,
             ae_in_channels: int = 3,
             ae_feature_channels: int = 64,
             f_a_c_p_dims: tuple[int, int, int] = (128, 128, 64),
@@ -31,7 +30,7 @@ class RGBPartNet(nn.Module):
     ):
         super().__init__()
         self.ae = AutoEncoder(
-            num_class, ae_in_channels, ae_feature_channels, f_a_c_p_dims
+            ae_in_channels, ae_feature_channels, f_a_c_p_dims
         )
         self.pn = PartNet(
             ae_in_channels, fpfe_feature_channels, fpfe_kernel_sizes,
@@ -60,7 +59,7 @@ class RGBPartNet(nn.Module):
 
         # Step 1: Disentanglement
         # t, n, c, h, w
-        ((x_c_c1, x_p_c1), losses) = self._disentangle(x_c1, x_c2, y)
+        ((x_c_c1, x_p_c1), losses) = self._disentangle(x_c1, x_c2)
 
         # Step 2.a: HPM & Static Gait Feature Aggregation
         # t, n, c, h, w
@@ -85,7 +84,7 @@ class RGBPartNet(nn.Module):
         else:
             return x.unsqueeze(1).view(-1)
 
-    def _disentangle(self, x_c1, x_c2=None, y=None):
+    def _disentangle(self, x_c1, x_c2=None):
         t, n, c, h, w = x_c1.size()
         if self.training:
             # Decoded canonical features and Pose images
@@ -95,7 +94,7 @@ class RGBPartNet(nn.Module):
             xrecon_loss, cano_cons_loss = [], []
             for t2 in range(t):
                 t1 = random.randrange(t)
-                output = self.ae(x_c1[t2], x_c1[t1], x_c2[t2], y)
+                output = self.ae(x_c1[t2], x_c1[t1], x_c2[t2])
                 (x_c1_t2, f_p_t2, losses) = output
 
                 # Decoded features or image
-- 
cgit v1.2.3