From 98b6e6dc3be6f88abb72e351c8f2da2b23b8ab85 Mon Sep 17 00:00:00 2001
From: Jordan Gong <jordan.gong@protonmail.com>
Date: Thu, 7 Jan 2021 19:55:00 +0800
Subject: Type hint for python version lower than 3.9

---
 models/rgb_part_net.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'models/rgb_part_net.py')

diff --git a/models/rgb_part_net.py b/models/rgb_part_net.py
index 3037da0..39cbed6 100644
--- a/models/rgb_part_net.py
+++ b/models/rgb_part_net.py
@@ -1,4 +1,5 @@
 import random
+from typing import Tuple, List
 
 import torch
 import torch.nn as nn
@@ -16,14 +17,14 @@ class RGBPartNet(nn.Module):
             num_class: int = 74,
             ae_in_channels: int = 3,
             ae_feature_channels: int = 64,
-            f_a_c_p_dims: tuple[int, int, int] = (128, 128, 64),
-            hpm_scales: tuple[int, ...] = (1, 2, 4),
+            f_a_c_p_dims: Tuple[int, int, int] = (128, 128, 64),
+            hpm_scales: Tuple[int, ...] = (1, 2, 4),
             hpm_use_avg_pool: bool = True,
             hpm_use_max_pool: bool = True,
             fpfe_feature_channels: int = 32,
-            fpfe_kernel_sizes: tuple[tuple, ...] = ((5, 3), (3, 3), (3, 3)),
-            fpfe_paddings: tuple[tuple, ...] = ((2, 1), (1, 1), (1, 1)),
-            fpfe_halving: tuple[int, ...] = (0, 2, 3),
+            fpfe_kernel_sizes: Tuple[Tuple, ...] = ((5, 3), (3, 3), (3, 3)),
+            fpfe_paddings: Tuple[Tuple, ...] = ((2, 1), (1, 1), (1, 1)),
+            fpfe_halving: Tuple[int, ...] = (0, 2, 3),
             tfa_squeeze_ratio: int = 4,
             tfa_num_parts: int = 16,
             embedding_dims: int = 256,
@@ -142,8 +143,8 @@ class RGBPartNet(nn.Module):
             return (x_c_c1, x_p_c1), None
 
     @staticmethod
-    def _pose_sim_loss(f_p_c1: list[torch.Tensor],
-                       f_p_c2: list[torch.Tensor]) -> torch.Tensor:
+    def _pose_sim_loss(f_p_c1: List[torch.Tensor],
+                       f_p_c2: List[torch.Tensor]) -> torch.Tensor:
         f_p_c1_mean = torch.stack(f_p_c1).mean(dim=0)
         f_p_c2_mean = torch.stack(f_p_c2).mean(dim=0)
         return F.mse_loss(f_p_c1_mean, f_p_c2_mean)
-- 
cgit v1.2.3


From 24b5968bfc5799e44c9bbbc00e3a9be00f4509ac Mon Sep 17 00:00:00 2001
From: Jordan Gong <jordan.gong@protonmail.com>
Date: Mon, 15 Feb 2021 11:08:52 +0800
Subject: Revert "Memory usage improvement"

This reverts commit be508061
---
 models/rgb_part_net.py | 117 ++++++++++++++++++++++++-------------------------
 1 file changed, 57 insertions(+), 60 deletions(-)

(limited to 'models/rgb_part_net.py')

diff --git a/models/rgb_part_net.py b/models/rgb_part_net.py
index c489ec6..260eabd 100644
--- a/models/rgb_part_net.py
+++ b/models/rgb_part_net.py
@@ -58,67 +58,64 @@ class RGBPartNet(nn.Module):
     def fc(self, x):
         return x @ self.fc_mat
 
-    def forward(self, x, y=None, is_c1=True):
-        # Step 1a: Disentangle condition 1 clips
-        if is_c1:
-            # n, t, c, h, w
-            ((x_c, x_p), xrecon_loss, images) = self._disentangle(x, is_c1)
-
-            # Step 2.a: Static Gait Feature Aggregation & HPM
-            # n, c, h, w
-            x_c = self.hpm(x_c)
-            # p, n, c
-
-            # Step 2.b: FPFE & TFA (Dynamic Gait Feature Aggregation)
-            # n, t, c, h, w
-            x_p = self.pn(x_p)
-            # p, n, c
-
-            # Step 3: Cat feature map together and fc
-            x = torch.cat((x_c, x_p))
-            x = self.fc(x)
-
-            if self.training:
-                y = y.T
-                hpm_ba_trip = self.hpm_ba_trip(
-                    x[:self.hpm_num_parts], y[:self.hpm_num_parts]
-                )
-                pn_ba_trip = self.pn_ba_trip(
-                    x[self.hpm_num_parts:], y[self.hpm_num_parts:]
-                )
-                return (xrecon_loss, hpm_ba_trip, pn_ba_trip), images
-            else:  # evaluating
-                return x.unsqueeze(1).view(-1)
-        else:  # Step 1b: Disentangle condition 2 clips
-            return self._disentangle(x, is_c1)
-
-    def _disentangle(self, x_t2, is_c1=True):
-        if is_c1:  # condition 1
-            n, t, *_ = x_size = x_t2.size()
-            device = x_t2.device
-            if self.training:
-                (f_a_, f_c_, f_p_), xrecon_loss = self.ae(x_t2, is_c1)
-                # Decode features
-                with torch.no_grad():
-                    x_c = self._decode_cano_feature(f_c_, n, t, device)
-                    x_p = self._decode_pose_feature(f_p_, *x_size, device)
-
-                    i_a, i_c, i_p = None, None, None
-                    if self.image_log_on:
-                        i_a = self._decode_appr_feature(f_a_, *x_size, device)
-                        # Continue decoding canonical features
-                        i_c = self.ae.decoder.trans_conv3(x_c)
-                        i_c = torch.sigmoid(self.ae.decoder.trans_conv4(i_c))
-                        i_p = x_p
-
-                return (x_c, x_p), xrecon_loss, (i_a, i_c, i_p)
-            else:  # evaluating
-                f_c_, f_p_ = self.ae(x_t2)
+    def forward(self, x_c1, x_c2=None, y=None):
+        # Step 1: Disentanglement
+        # n, t, c, h, w
+        ((x_c, x_p), losses, images) = self._disentangle(x_c1, x_c2)
+
+        # Step 2.a: Static Gait Feature Aggregation & HPM
+        # n, c, h, w
+        x_c = self.hpm(x_c)
+        # p, n, c
+
+        # Step 2.b: FPFE & TFA (Dynamic Gait Feature Aggregation)
+        # n, t, c, h, w
+        x_p = self.pn(x_p)
+        # p, n, c
+
+        # Step 3: Cat feature map together and fc
+        x = torch.cat((x_c, x_p))
+        x = self.fc(x)
+
+        if self.training:
+            y = y.T
+            hpm_ba_trip = self.hpm_ba_trip(
+                x[:self.hpm_num_parts], y[:self.hpm_num_parts]
+            )
+            pn_ba_trip = self.pn_ba_trip(
+                x[self.hpm_num_parts:], y[self.hpm_num_parts:]
+            )
+            losses = torch.stack((*losses, hpm_ba_trip, pn_ba_trip))
+            return losses, images
+        else:
+            return x.unsqueeze(1).view(-1)
+
+    def _disentangle(self, x_c1_t2, x_c2_t2=None):
+        n, t, c, h, w = x_c1_t2.size()
+        device = x_c1_t2.device
+        x_c1_t1 = x_c1_t2[:, torch.randperm(t), :, :, :]
+        if self.training:
+            ((f_a_, f_c_, f_p_), losses) = self.ae(x_c1_t2, x_c1_t1, x_c2_t2)
+            # Decode features
+            with torch.no_grad():
                 x_c = self._decode_cano_feature(f_c_, n, t, device)
-                x_p = self._decode_pose_feature(f_p_, *x_size, device)
-                return (x_c, x_p), None, None
-        else:  # condition 2
-            return self.ae(x_t2, is_c1)
+                x_p = self._decode_pose_feature(f_p_, n, t, c, h, w, device)
+
+                i_a, i_c, i_p = None, None, None
+                if self.image_log_on:
+                    i_a = self._decode_appr_feature(f_a_, n, t, c, h, w, device)
+                    # Continue decoding canonical features
+                    i_c = self.ae.decoder.trans_conv3(x_c)
+                    i_c = torch.sigmoid(self.ae.decoder.trans_conv4(i_c))
+                    i_p = x_p
+
+            return (x_c, x_p), losses, (i_a, i_c, i_p)
+
+        else:  # evaluating
+            f_c_, f_p_ = self.ae(x_c1_t2)
+            x_c = self._decode_cano_feature(f_c_, n, t, device)
+            x_p = self._decode_pose_feature(f_p_, n, t, c, h, w, device)
+            return (x_c, x_p), None, None
 
     def _decode_appr_feature(self, f_a_, n, t, c, h, w, device):
         # Decode appearance features
-- 
cgit v1.2.3