From 98b6e6dc3be6f88abb72e351c8f2da2b23b8ab85 Mon Sep 17 00:00:00 2001
From: Jordan Gong <jordan.gong@protonmail.com>
Date: Thu, 7 Jan 2021 19:55:00 +0800
Subject: Type hint for python version lower than 3.9

---
 models/auto_encoder.py |  8 +++++---
 models/hpm.py          |  4 +++-
 models/layers.py       | 20 ++++++++++----------
 models/model.py        | 14 +++++++-------
 models/part_net.py     | 13 +++++++------
 models/rgb_part_net.py | 15 ++++++++-------
 6 files changed, 40 insertions(+), 34 deletions(-)

(limited to 'models')

diff --git a/models/auto_encoder.py b/models/auto_encoder.py
index 7c1f7ef..1e7c323 100644
--- a/models/auto_encoder.py
+++ b/models/auto_encoder.py
@@ -1,3 +1,5 @@
+from typing import Tuple
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -12,7 +14,7 @@ class Encoder(nn.Module):
             self,
             in_channels: int = 3,
             feature_channels: int = 64,
-            output_dims: tuple[int, int, int] = (128, 128, 64)
+            output_dims: Tuple[int, int, int] = (128, 128, 64)
     ):
         super().__init__()
         self.feature_channels = feature_channels
@@ -67,7 +69,7 @@ class Decoder(nn.Module):
 
     def __init__(
             self,
-            input_dims: tuple[int, int, int] = (128, 128, 64),
+            input_dims: Tuple[int, int, int] = (128, 128, 64),
             feature_channels: int = 64,
             out_channels: int = 3,
     ):
@@ -116,7 +118,7 @@ class AutoEncoder(nn.Module):
             num_class: int = 74,
             channels: int = 3,
             feature_channels: int = 64,
-            embedding_dims: tuple[int, int, int] = (128, 128, 64)
+            embedding_dims: Tuple[int, int, int] = (128, 128, 64)
     ):
         super().__init__()
         self.encoder = Encoder(channels, feature_channels, embedding_dims)
diff --git a/models/hpm.py b/models/hpm.py
index 66503e3..7505ed7 100644
--- a/models/hpm.py
+++ b/models/hpm.py
@@ -1,3 +1,5 @@
+from typing import Tuple
+
 import torch
 import torch.nn as nn
 
@@ -9,7 +11,7 @@ class HorizontalPyramidMatching(nn.Module):
             self,
             in_channels: int,
             out_channels: int = 128,
-            scales: tuple[int, ...] = (1, 2, 4),
+            scales: Tuple[int, ...] = (1, 2, 4),
             use_avg_pool: bool = True,
             use_max_pool: bool = True,
             **kwargs
diff --git a/models/layers.py b/models/layers.py
index a9f04b3..7f2ccec 100644
--- a/models/layers.py
+++ b/models/layers.py
@@ -1,4 +1,4 @@
-from typing import Union
+from typing import Union, Tuple
 
 import torch
 import torch.nn as nn
@@ -10,7 +10,7 @@ class BasicConv2d(nn.Module):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_size: Union[int, tuple[int, int]],
+            kernel_size: Union[int, Tuple[int, int]],
             **kwargs
     ):
         super().__init__()
@@ -29,7 +29,7 @@ class VGGConv2d(BasicConv2d):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_size: Union[int, tuple[int, int]] = 3,
+            kernel_size: Union[int, Tuple[int, int]] = 3,
             padding: int = 1,
             **kwargs
     ):
@@ -47,7 +47,7 @@ class BasicConvTranspose2d(nn.Module):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_size: Union[int, tuple[int, int]],
+            kernel_size: Union[int, Tuple[int, int]],
             **kwargs
     ):
         super().__init__()
@@ -66,7 +66,7 @@ class DCGANConvTranspose2d(BasicConvTranspose2d):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_size: Union[int, tuple[int, int]] = 4,
+            kernel_size: Union[int, Tuple[int, int]] = 4,
             stride: int = 2,
             padding: int = 1,
             is_last_layer: bool = False,
@@ -104,7 +104,7 @@ class FocalConv2d(BasicConv2d):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_size: Union[int, tuple[int, int]],
+            kernel_size: Union[int, Tuple[int, int]],
             halving: int,
             **kwargs
     ):
@@ -124,8 +124,8 @@ class FocalConv2dBlock(nn.Module):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_sizes: tuple[int, int],
-            paddings: tuple[int, int],
+            kernel_sizes: Tuple[int, int],
+            paddings: Tuple[int, int],
             halving: int,
             use_pool: bool = True,
             **kwargs
@@ -151,7 +151,7 @@ class BasicConv1d(nn.Module):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_size: Union[int, tuple[int]],
+            kernel_size: Union[int, Tuple[int]],
             **kwargs
     ):
         super().__init__()
@@ -167,7 +167,7 @@ class HorizontalPyramidPooling(BasicConv2d):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_size: Union[int, tuple[int, int]] = 1,
+            kernel_size: Union[int, Tuple[int, int]] = 1,
             use_avg_pool: bool = True,
             use_max_pool: bool = True,
             **kwargs
diff --git a/models/model.py b/models/model.py
index 1dc0f23..4deced0 100644
--- a/models/model.py
+++ b/models/model.py
@@ -1,5 +1,5 @@
 import os
-from typing import Union, Optional
+from typing import Union, Optional, Tuple, List
 
 import numpy as np
 import torch
@@ -195,9 +195,9 @@ class Model:
 
     def _batch_splitter(
             self,
-            batch: list[dict[str, Union[np.int64, str, torch.Tensor]]]
-    ) -> tuple[dict[str, Union[list[str], torch.Tensor]],
-               dict[str, Union[list[str], torch.Tensor]]]:
+            batch: List[dict[str, Union[np.int64, str, torch.Tensor]]]
+    ) -> Tuple[dict[str, Union[List[str], torch.Tensor]],
+               dict[str, Union[List[str], torch.Tensor]]]:
         """
         Disentanglement need two random conditions, this function will
         split pr * k * 2 samples to 2 dicts each containing pr * k
@@ -212,7 +212,7 @@ class Model:
 
     def _make_signature(self,
                         config: dict,
-                        popped_keys: Optional[list] = None) -> str:
+                        popped_keys: Optional[List] = None) -> str:
         _config = config.copy()
         if popped_keys:
             for key in popped_keys:
@@ -220,12 +220,12 @@ class Model:
 
         return self._gen_sig(list(_config.values()))
 
-    def _gen_sig(self, values: Union[tuple, list, str, int, float]) -> str:
+    def _gen_sig(self, values: Union[Tuple, List, str, int, float]) -> str:
         strings = []
         for v in values:
             if isinstance(v, str):
                 strings.append(v)
-            elif isinstance(v, (tuple, list)):
+            elif isinstance(v, (Tuple, List)):
                 strings.append(self._gen_sig(v))
             else:
                 strings.append(str(v))
diff --git a/models/part_net.py b/models/part_net.py
index ac7c434..6d8d4e1 100644
--- a/models/part_net.py
+++ b/models/part_net.py
@@ -1,4 +1,5 @@
 import copy
+from typing import Tuple
 
 import torch
 import torch.nn as nn
@@ -12,9 +13,9 @@ class FrameLevelPartFeatureExtractor(nn.Module):
             self,
             in_channels: int = 3,
             feature_channels: int = 32,
-            kernel_sizes: tuple[tuple, ...] = ((5, 3), (3, 3), (3, 3)),
-            paddings: tuple[tuple, ...] = ((2, 1), (1, 1), (1, 1)),
-            halving: tuple[int, ...] = (0, 2, 3)
+            kernel_sizes: Tuple[Tuple, ...] = ((5, 3), (3, 3), (3, 3)),
+            paddings: Tuple[Tuple, ...] = ((2, 1), (1, 1), (1, 1)),
+            halving: Tuple[int, ...] = (0, 2, 3)
     ):
         super().__init__()
         num_blocks = len(kernel_sizes)
@@ -112,9 +113,9 @@ class PartNet(nn.Module):
             self,
             in_channels: int = 3,
             feature_channels: int = 32,
-            kernel_sizes: tuple[tuple, ...] = ((5, 3), (3, 3), (3, 3)),
-            paddings: tuple[tuple, ...] = ((2, 1), (1, 1), (1, 1)),
-            halving: tuple[int, ...] = (0, 2, 3),
+            kernel_sizes: Tuple[Tuple, ...] = ((5, 3), (3, 3), (3, 3)),
+            paddings: Tuple[Tuple, ...] = ((2, 1), (1, 1), (1, 1)),
+            halving: Tuple[int, ...] = (0, 2, 3),
             squeeze_ratio: int = 4,
             num_part: int = 16
     ):
diff --git a/models/rgb_part_net.py b/models/rgb_part_net.py
index 3037da0..39cbed6 100644
--- a/models/rgb_part_net.py
+++ b/models/rgb_part_net.py
@@ -1,4 +1,5 @@
 import random
+from typing import Tuple, List
 
 import torch
 import torch.nn as nn
@@ -16,14 +17,14 @@ class RGBPartNet(nn.Module):
             num_class: int = 74,
             ae_in_channels: int = 3,
             ae_feature_channels: int = 64,
-            f_a_c_p_dims: tuple[int, int, int] = (128, 128, 64),
-            hpm_scales: tuple[int, ...] = (1, 2, 4),
+            f_a_c_p_dims: Tuple[int, int, int] = (128, 128, 64),
+            hpm_scales: Tuple[int, ...] = (1, 2, 4),
             hpm_use_avg_pool: bool = True,
             hpm_use_max_pool: bool = True,
             fpfe_feature_channels: int = 32,
-            fpfe_kernel_sizes: tuple[tuple, ...] = ((5, 3), (3, 3), (3, 3)),
-            fpfe_paddings: tuple[tuple, ...] = ((2, 1), (1, 1), (1, 1)),
-            fpfe_halving: tuple[int, ...] = (0, 2, 3),
+            fpfe_kernel_sizes: Tuple[Tuple, ...] = ((5, 3), (3, 3), (3, 3)),
+            fpfe_paddings: Tuple[Tuple, ...] = ((2, 1), (1, 1), (1, 1)),
+            fpfe_halving: Tuple[int, ...] = (0, 2, 3),
             tfa_squeeze_ratio: int = 4,
             tfa_num_parts: int = 16,
             embedding_dims: int = 256,
@@ -142,8 +143,8 @@ class RGBPartNet(nn.Module):
             return (x_c_c1, x_p_c1), None
 
     @staticmethod
-    def _pose_sim_loss(f_p_c1: list[torch.Tensor],
-                       f_p_c2: list[torch.Tensor]) -> torch.Tensor:
+    def _pose_sim_loss(f_p_c1: List[torch.Tensor],
+                       f_p_c2: List[torch.Tensor]) -> torch.Tensor:
         f_p_c1_mean = torch.stack(f_p_c1).mean(dim=0)
         f_p_c2_mean = torch.stack(f_p_c2).mean(dim=0)
         return F.mse_loss(f_p_c1_mean, f_p_c2_mean)
-- 
cgit v1.2.3


From e5a73abd80578aa5e46d8d444466d1e6346ec6ec Mon Sep 17 00:00:00 2001
From: Jordan Gong <jordan.gong@protonmail.com>
Date: Thu, 7 Jan 2021 19:55:00 +0800
Subject: Type hint for python version lower than 3.9

---
 models/model.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'models')

diff --git a/models/model.py b/models/model.py
index 4deced0..725988a 100644
--- a/models/model.py
+++ b/models/model.py
@@ -1,5 +1,5 @@
 import os
-from typing import Union, Optional, Tuple, List
+from typing import Union, Optional, Tuple, List, Dict
 
 import numpy as np
 import torch
@@ -166,7 +166,7 @@ class Model:
             popped_keys=['root_dir', 'cache_on']
         )
         self.log_name = '_'.join((self.log_name, self._dataset_sig))
-        config: dict = dataset_config.copy()
+        config: Dict = dataset_config.copy()
         name = config.pop('name')
         if name == 'CASIA-B':
             return CASIAB(**config, is_train=self.is_train)
@@ -180,7 +180,7 @@ class Model:
             dataset: Union[CASIAB],
             dataloader_config: DataloaderConfiguration
     ) -> DataLoader:
-        config: dict = dataloader_config.copy()
+        config: Dict = dataloader_config.copy()
         if self.is_train:
             (self.pr, self.k) = config.pop('batch_size')
             self.log_name = '_'.join((self.log_name, str(self.pr), str(self.k)))
@@ -195,9 +195,9 @@ class Model:
 
     def _batch_splitter(
             self,
-            batch: List[dict[str, Union[np.int64, str, torch.Tensor]]]
-    ) -> Tuple[dict[str, Union[List[str], torch.Tensor]],
-               dict[str, Union[List[str], torch.Tensor]]]:
+            batch: List[Dict[str, Union[np.int64, str, torch.Tensor]]]
+    ) -> Tuple[Dict[str, Union[List[str], torch.Tensor]],
+               Dict[str, Union[List[str], torch.Tensor]]]:
         """
         Disentanglement need two random conditions, this function will
         split pr * k * 2 samples to 2 dicts each containing pr * k
@@ -211,7 +211,7 @@ class Model:
         return default_collate(_batch[0]), default_collate(_batch[1])
 
     def _make_signature(self,
-                        config: dict,
+                        config: Dict,
                         popped_keys: Optional[List] = None) -> str:
         _config = config.copy()
         if popped_keys:
-- 
cgit v1.2.3


From d750dd9dafe3cda3b1331ad2bfecb53c8c2b1267 Mon Sep 17 00:00:00 2001
From: Jordan Gong <jordan.gong@protonmail.com>
Date: Thu, 21 Jan 2021 23:47:11 +0800
Subject: A type hint fix

---
 models/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'models')

diff --git a/models/model.py b/models/model.py
index bed28a5..f4604c8 100644
--- a/models/model.py
+++ b/models/model.py
@@ -133,7 +133,7 @@ class Model:
         dataloader = self._parse_dataloader_config(dataset, dataloader_config)
         # Prepare for model, optimizer and scheduler
         model_hp = self.hp.get('model', {})
-        optim_hp: dict = self.hp.get('optimizer', {}).copy()
+        optim_hp: Dict = self.hp.get('optimizer', {}).copy()
         ae_optim_hp = optim_hp.pop('auto_encoder', {})
         pn_optim_hp = optim_hp.pop('part_net', {})
         hpm_optim_hp = optim_hp.pop('hpm', {})
-- 
cgit v1.2.3


From a040400d7caa267d4bfbe8e5520568806f92b3d4 Mon Sep 17 00:00:00 2001
From: Jordan Gong <jordan.gong@protonmail.com>
Date: Sat, 23 Jan 2021 00:43:20 +0800
Subject: Type hint fixes

---
 models/model.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'models')

diff --git a/models/model.py b/models/model.py
index 8992914..1dd195e 100644
--- a/models/model.py
+++ b/models/model.py
@@ -220,7 +220,7 @@ class Model:
 
     def predict_all(
             self,
-            iters: tuple[int],
+            iters: Tuple[int],
             dataset_config: DatasetConfiguration,
             dataset_selectors: Dict[
                 str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]]
@@ -266,7 +266,7 @@ class Model:
 
         return self._evaluate(gallery_samples, probe_samples)
 
-    def _get_eval_sample(self, sample: dict[str, Union[list, torch.Tensor]]):
+    def _get_eval_sample(self, sample: Dict[str, Union[List, torch.Tensor]]):
         label = sample.pop('label').item()
         clip = sample.pop('clip').to(self.device)
         feature = self.rgb_pn(clip).detach()
@@ -326,7 +326,7 @@ class Model:
 
     def _load_pretrained(
             self,
-            iters: tuple[int],
+            iters: Tuple[int],
             dataset_config: DatasetConfiguration,
             dataset_selectors: Dict[
                 str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]]
-- 
cgit v1.2.3


From 24b5968bfc5799e44c9bbbc00e3a9be00f4509ac Mon Sep 17 00:00:00 2001
From: Jordan Gong <jordan.gong@protonmail.com>
Date: Mon, 15 Feb 2021 11:08:52 +0800
Subject: Revert "Memory usage improvement"

This reverts commit be508061
---
 models/auto_encoder.py |  70 +++++++++++++----------------
 models/model.py        |  21 +++------
 models/rgb_part_net.py | 117 ++++++++++++++++++++++++-------------------------
 3 files changed, 92 insertions(+), 116 deletions(-)

(limited to 'models')

diff --git a/models/auto_encoder.py b/models/auto_encoder.py
index 918a95c..7b9b29f 100644
--- a/models/auto_encoder.py
+++ b/models/auto_encoder.py
@@ -119,47 +119,32 @@ class AutoEncoder(nn.Module):
             embedding_dims: Tuple[int, int, int] = (128, 128, 64)
     ):
         super().__init__()
-        self.f_c_c1_t2_ = None
-        self.f_p_c1_t2_ = None
-        self.f_c_c1_t1_ = None
         self.encoder = Encoder(channels, feature_channels, embedding_dims)
         self.decoder = Decoder(embedding_dims, feature_channels, channels)
 
-    def forward(self, x_t2, is_c1=True):
-        n, t, c, h, w = x_t2.size()
-        if is_c1:  # condition 1
-            # x_c1_t2 is the frame for later module
-            x_c1_t2_ = x_t2.view(n * t, c, h, w)
-            (f_a_c1_t2_, self.f_c_c1_t2_, self.f_p_c1_t2_) \
-                = self.encoder(x_c1_t2_)
-
-            if self.training:
-                # t1 is random time step
-                x_c1_t1 = x_t2[:, torch.randperm(t), :, :, :]
-                x_c1_t1_ = x_c1_t1.view(n * t, c, h, w)
-                (f_a_c1_t1_, self.f_c_c1_t1_, _) = self.encoder(x_c1_t1_)
-
-                x_c1_t2_pred_ = self.decoder(
-                    f_a_c1_t1_, self.f_c_c1_t1_, self.f_p_c1_t2_
-                )
-                x_c1_t2_pred = x_c1_t2_pred_.view(n, t, c, h, w)
-
-                xrecon_loss = torch.stack([
-                    F.mse_loss(x_t2[:, i, :, :, :], x_c1_t2_pred[:, i, :, :, :])
-                    for i in range(t)
-                ]).sum()
-
-                return ((f_a_c1_t2_, self.f_c_c1_t2_, self.f_p_c1_t2_),
-                        xrecon_loss)
-            else:  # evaluating
-                return self.f_c_c1_t2_, self.f_p_c1_t2_
-        else:  # condition 2
-            # c2 is another condition
-            x_c2_t2_ = x_t2.view(n * t, c, h, w)
-            (_, f_c_c2_t2_, f_p_c2_t2_) = self.encoder(x_c2_t2_)
-
-            f_c_c1_t1 = self.f_c_c1_t1_.view(n, t, -1)
-            f_c_c1_t2 = self.f_c_c1_t2_.view(n, t, -1)
+    def forward(self, x_c1_t2, x_c1_t1=None, x_c2_t2=None):
+        n, t, c, h, w = x_c1_t2.size()
+        # x_c1_t2 is the frame for later module
+        x_c1_t2_ = x_c1_t2.view(n * t, c, h, w)
+        (f_a_c1_t2_, f_c_c1_t2_, f_p_c1_t2_) = self.encoder(x_c1_t2_)
+
+        if self.training:
+            # t1 is random time step, c2 is another condition
+            x_c1_t1 = x_c1_t1.view(n * t, c, h, w)
+            (f_a_c1_t1_, f_c_c1_t1_, _) = self.encoder(x_c1_t1)
+            x_c2_t2 = x_c2_t2.view(n * t, c, h, w)
+            (_, f_c_c2_t2_, f_p_c2_t2_) = self.encoder(x_c2_t2)
+
+            x_c1_t2_pred_ = self.decoder(f_a_c1_t1_, f_c_c1_t1_, f_p_c1_t2_)
+            x_c1_t2_pred = x_c1_t2_pred_.view(n, t, c, h, w)
+
+            xrecon_loss = torch.stack([
+                F.mse_loss(x_c1_t2[:, i, :, :, :], x_c1_t2_pred[:, i, :, :, :])
+                for i in range(t)
+            ]).sum()
+
+            f_c_c1_t1 = f_c_c1_t1_.view(n, t, -1)
+            f_c_c1_t2 = f_c_c1_t2_.view(n, t, -1)
             f_c_c2_t2 = f_c_c2_t2_.view(n, t, -1)
             cano_cons_loss = torch.stack([
                 F.mse_loss(f_c_c1_t1[:, i, :], f_c_c1_t2[:, i, :])
@@ -167,8 +152,13 @@ class AutoEncoder(nn.Module):
                 for i in range(t)
             ]).mean()
 
-            f_p_c1_t2 = self.f_p_c1_t2_.view(n, t, -1)
+            f_p_c1_t2 = f_p_c1_t2_.view(n, t, -1)
             f_p_c2_t2 = f_p_c2_t2_.view(n, t, -1)
             pose_sim_loss = F.mse_loss(f_p_c1_t2.mean(1), f_p_c2_t2.mean(1))
 
-            return cano_cons_loss, pose_sim_loss * 10
+            return (
+                (f_a_c1_t2_, f_c_c1_t2_, f_p_c1_t2_),
+                (xrecon_loss, cano_cons_loss, pose_sim_loss * 10)
+            )
+        else:  # evaluating
+            return f_c_c1_t2_, f_p_c1_t2_
diff --git a/models/model.py b/models/model.py
index 3aeb754..9748e46 100644
--- a/models/model.py
+++ b/models/model.py
@@ -182,7 +182,7 @@ class Model:
         # Training start
         start_time = datetime.now()
         running_loss = torch.zeros(5, device=self.device)
-        print(f"{'Time':^8} {'Iter':^5} {'Loss':^5}",
+        print(f"{'Time':^8} {'Iter':^5} {'Loss':^6}",
               f"{'Xrecon':^8} {'CanoCons':^8} {'PoseSim':^8}",
               f"{'BATripH':^8} {'BATripP':^8} {'LRs':^19}")
         for (batch_c1, batch_c2) in dataloader:
@@ -190,21 +190,12 @@ class Model:
             # Zero the parameter gradients
             self.optimizer.zero_grad()
             # forward + backward + optimize
-            # Feed data twice in order to reduce memory usage
             x_c1 = batch_c1['clip'].to(self.device)
+            x_c2 = batch_c2['clip'].to(self.device)
             y = batch_c1['label'].to(self.device)
             # Duplicate labels for each part
             y = y.unsqueeze(1).repeat(1, self.rgb_pn.num_total_parts)
-            # Feed condition 1 clips first
-            losses, images = self.rgb_pn(x_c1, y)
-            (xrecon_loss, hpm_ba_trip, pn_ba_trip) = losses
-            x_c2 = batch_c2['clip'].to(self.device)
-            # Then feed condition 2 clips
-            cano_cons_loss, pose_sim_loss = self.rgb_pn(x_c2, is_c1=False)
-            losses = torch.stack((
-                xrecon_loss, cano_cons_loss, pose_sim_loss,
-                hpm_ba_trip, pn_ba_trip
-            ))
+            losses, images = self.rgb_pn(x_c1, x_c2, y)
             loss = losses.sum()
             loss.backward()
             self.optimizer.step()
@@ -234,9 +225,7 @@ class Model:
                     self.writer.add_images(
                         'Canonical image', i_c, self.curr_iter
                     )
-                    for (i, (o, a, p)) in enumerate(zip(
-                            batch_c1['clip'], i_a, i_p
-                    )):
+                    for (i, (o, a, p)) in enumerate(zip(x_c1, i_a, i_p)):
                         self.writer.add_images(
                             f'Original image/batch {i}', o, self.curr_iter
                         )
@@ -250,7 +239,7 @@ class Model:
                 remaining_minute, second = divmod(time_used.seconds, 60)
                 hour, minute = divmod(remaining_minute, 60)
                 print(f'{hour:02}:{minute:02}:{second:02}',
-                      f'{self.curr_iter:5d} {running_loss.sum() / 100:5.3f}',
+                      f'{self.curr_iter:5d} {running_loss.sum() / 100:6.3f}',
                       '{:f} {:f} {:f} {:f} {:f}'.format(*running_loss / 100),
                       '{:.3e} {:.3e}'.format(lrs[0], lrs[1]))
                 running_loss.zero_()
diff --git a/models/rgb_part_net.py b/models/rgb_part_net.py
index c489ec6..260eabd 100644
--- a/models/rgb_part_net.py
+++ b/models/rgb_part_net.py
@@ -58,67 +58,64 @@ class RGBPartNet(nn.Module):
     def fc(self, x):
         return x @ self.fc_mat
 
-    def forward(self, x, y=None, is_c1=True):
-        # Step 1a: Disentangle condition 1 clips
-        if is_c1:
-            # n, t, c, h, w
-            ((x_c, x_p), xrecon_loss, images) = self._disentangle(x, is_c1)
-
-            # Step 2.a: Static Gait Feature Aggregation & HPM
-            # n, c, h, w
-            x_c = self.hpm(x_c)
-            # p, n, c
-
-            # Step 2.b: FPFE & TFA (Dynamic Gait Feature Aggregation)
-            # n, t, c, h, w
-            x_p = self.pn(x_p)
-            # p, n, c
-
-            # Step 3: Cat feature map together and fc
-            x = torch.cat((x_c, x_p))
-            x = self.fc(x)
-
-            if self.training:
-                y = y.T
-                hpm_ba_trip = self.hpm_ba_trip(
-                    x[:self.hpm_num_parts], y[:self.hpm_num_parts]
-                )
-                pn_ba_trip = self.pn_ba_trip(
-                    x[self.hpm_num_parts:], y[self.hpm_num_parts:]
-                )
-                return (xrecon_loss, hpm_ba_trip, pn_ba_trip), images
-            else:  # evaluating
-                return x.unsqueeze(1).view(-1)
-        else:  # Step 1b: Disentangle condition 2 clips
-            return self._disentangle(x, is_c1)
-
-    def _disentangle(self, x_t2, is_c1=True):
-        if is_c1:  # condition 1
-            n, t, *_ = x_size = x_t2.size()
-            device = x_t2.device
-            if self.training:
-                (f_a_, f_c_, f_p_), xrecon_loss = self.ae(x_t2, is_c1)
-                # Decode features
-                with torch.no_grad():
-                    x_c = self._decode_cano_feature(f_c_, n, t, device)
-                    x_p = self._decode_pose_feature(f_p_, *x_size, device)
-
-                    i_a, i_c, i_p = None, None, None
-                    if self.image_log_on:
-                        i_a = self._decode_appr_feature(f_a_, *x_size, device)
-                        # Continue decoding canonical features
-                        i_c = self.ae.decoder.trans_conv3(x_c)
-                        i_c = torch.sigmoid(self.ae.decoder.trans_conv4(i_c))
-                        i_p = x_p
-
-                return (x_c, x_p), xrecon_loss, (i_a, i_c, i_p)
-            else:  # evaluating
-                f_c_, f_p_ = self.ae(x_t2)
+    def forward(self, x_c1, x_c2=None, y=None):
+        # Step 1: Disentanglement
+        # n, t, c, h, w
+        ((x_c, x_p), losses, images) = self._disentangle(x_c1, x_c2)
+
+        # Step 2.a: Static Gait Feature Aggregation & HPM
+        # n, c, h, w
+        x_c = self.hpm(x_c)
+        # p, n, c
+
+        # Step 2.b: FPFE & TFA (Dynamic Gait Feature Aggregation)
+        # n, t, c, h, w
+        x_p = self.pn(x_p)
+        # p, n, c
+
+        # Step 3: Cat feature map together and fc
+        x = torch.cat((x_c, x_p))
+        x = self.fc(x)
+
+        if self.training:
+            y = y.T
+            hpm_ba_trip = self.hpm_ba_trip(
+                x[:self.hpm_num_parts], y[:self.hpm_num_parts]
+            )
+            pn_ba_trip = self.pn_ba_trip(
+                x[self.hpm_num_parts:], y[self.hpm_num_parts:]
+            )
+            losses = torch.stack((*losses, hpm_ba_trip, pn_ba_trip))
+            return losses, images
+        else:
+            return x.unsqueeze(1).view(-1)
+
+    def _disentangle(self, x_c1_t2, x_c2_t2=None):
+        n, t, c, h, w = x_c1_t2.size()
+        device = x_c1_t2.device
+        x_c1_t1 = x_c1_t2[:, torch.randperm(t), :, :, :]
+        if self.training:
+            ((f_a_, f_c_, f_p_), losses) = self.ae(x_c1_t2, x_c1_t1, x_c2_t2)
+            # Decode features
+            with torch.no_grad():
                 x_c = self._decode_cano_feature(f_c_, n, t, device)
-                x_p = self._decode_pose_feature(f_p_, *x_size, device)
-                return (x_c, x_p), None, None
-        else:  # condition 2
-            return self.ae(x_t2, is_c1)
+                x_p = self._decode_pose_feature(f_p_, n, t, c, h, w, device)
+
+                i_a, i_c, i_p = None, None, None
+                if self.image_log_on:
+                    i_a = self._decode_appr_feature(f_a_, n, t, c, h, w, device)
+                    # Continue decoding canonical features
+                    i_c = self.ae.decoder.trans_conv3(x_c)
+                    i_c = torch.sigmoid(self.ae.decoder.trans_conv4(i_c))
+                    i_p = x_p
+
+            return (x_c, x_p), losses, (i_a, i_c, i_p)
+
+        else:  # evaluating
+            f_c_, f_p_ = self.ae(x_c1_t2)
+            x_c = self._decode_cano_feature(f_c_, n, t, device)
+            x_p = self._decode_pose_feature(f_p_, n, t, c, h, w, device)
+            return (x_c, x_p), None, None
 
     def _decode_appr_feature(self, f_a_, n, t, c, h, w, device):
         # Decode appearance features
-- 
cgit v1.2.3