From 98b6e6dc3be6f88abb72e351c8f2da2b23b8ab85 Mon Sep 17 00:00:00 2001
From: Jordan Gong <jordan.gong@protonmail.com>
Date: Thu, 7 Jan 2021 19:55:00 +0800
Subject: Type hint for python version lower than 3.9

---
 models/auto_encoder.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'models/auto_encoder.py')

diff --git a/models/auto_encoder.py b/models/auto_encoder.py
index 7c1f7ef..1e7c323 100644
--- a/models/auto_encoder.py
+++ b/models/auto_encoder.py
@@ -1,3 +1,5 @@
+from typing import Tuple
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -12,7 +14,7 @@ class Encoder(nn.Module):
             self,
             in_channels: int = 3,
             feature_channels: int = 64,
-            output_dims: tuple[int, int, int] = (128, 128, 64)
+            output_dims: Tuple[int, int, int] = (128, 128, 64)
     ):
         super().__init__()
         self.feature_channels = feature_channels
@@ -67,7 +69,7 @@ class Decoder(nn.Module):
 
     def __init__(
             self,
-            input_dims: tuple[int, int, int] = (128, 128, 64),
+            input_dims: Tuple[int, int, int] = (128, 128, 64),
             feature_channels: int = 64,
             out_channels: int = 3,
     ):
@@ -116,7 +118,7 @@ class AutoEncoder(nn.Module):
             num_class: int = 74,
             channels: int = 3,
             feature_channels: int = 64,
-            embedding_dims: tuple[int, int, int] = (128, 128, 64)
+            embedding_dims: Tuple[int, int, int] = (128, 128, 64)
     ):
         super().__init__()
         self.encoder = Encoder(channels, feature_channels, embedding_dims)
-- 
cgit v1.2.3


From 24b5968bfc5799e44c9bbbc00e3a9be00f4509ac Mon Sep 17 00:00:00 2001
From: Jordan Gong <jordan.gong@protonmail.com>
Date: Mon, 15 Feb 2021 11:08:52 +0800
Subject: Revert "Memory usage improvement"

This reverts commit be508061
---
 models/auto_encoder.py | 70 ++++++++++++++++++++++----------------------------
 1 file changed, 30 insertions(+), 40 deletions(-)

(limited to 'models/auto_encoder.py')

diff --git a/models/auto_encoder.py b/models/auto_encoder.py
index 918a95c..7b9b29f 100644
--- a/models/auto_encoder.py
+++ b/models/auto_encoder.py
@@ -119,47 +119,32 @@ class AutoEncoder(nn.Module):
             embedding_dims: Tuple[int, int, int] = (128, 128, 64)
     ):
         super().__init__()
-        self.f_c_c1_t2_ = None
-        self.f_p_c1_t2_ = None
-        self.f_c_c1_t1_ = None
         self.encoder = Encoder(channels, feature_channels, embedding_dims)
         self.decoder = Decoder(embedding_dims, feature_channels, channels)
 
-    def forward(self, x_t2, is_c1=True):
-        n, t, c, h, w = x_t2.size()
-        if is_c1:  # condition 1
-            # x_c1_t2 is the frame for later module
-            x_c1_t2_ = x_t2.view(n * t, c, h, w)
-            (f_a_c1_t2_, self.f_c_c1_t2_, self.f_p_c1_t2_) \
-                = self.encoder(x_c1_t2_)
-
-            if self.training:
-                # t1 is random time step
-                x_c1_t1 = x_t2[:, torch.randperm(t), :, :, :]
-                x_c1_t1_ = x_c1_t1.view(n * t, c, h, w)
-                (f_a_c1_t1_, self.f_c_c1_t1_, _) = self.encoder(x_c1_t1_)
-
-                x_c1_t2_pred_ = self.decoder(
-                    f_a_c1_t1_, self.f_c_c1_t1_, self.f_p_c1_t2_
-                )
-                x_c1_t2_pred = x_c1_t2_pred_.view(n, t, c, h, w)
-
-                xrecon_loss = torch.stack([
-                    F.mse_loss(x_t2[:, i, :, :, :], x_c1_t2_pred[:, i, :, :, :])
-                    for i in range(t)
-                ]).sum()
-
-                return ((f_a_c1_t2_, self.f_c_c1_t2_, self.f_p_c1_t2_),
-                        xrecon_loss)
-            else:  # evaluating
-                return self.f_c_c1_t2_, self.f_p_c1_t2_
-        else:  # condition 2
-            # c2 is another condition
-            x_c2_t2_ = x_t2.view(n * t, c, h, w)
-            (_, f_c_c2_t2_, f_p_c2_t2_) = self.encoder(x_c2_t2_)
-
-            f_c_c1_t1 = self.f_c_c1_t1_.view(n, t, -1)
-            f_c_c1_t2 = self.f_c_c1_t2_.view(n, t, -1)
+    def forward(self, x_c1_t2, x_c1_t1=None, x_c2_t2=None):
+        n, t, c, h, w = x_c1_t2.size()
+        # x_c1_t2 is the frame for later module
+        x_c1_t2_ = x_c1_t2.view(n * t, c, h, w)
+        (f_a_c1_t2_, f_c_c1_t2_, f_p_c1_t2_) = self.encoder(x_c1_t2_)
+
+        if self.training:
+            # t1 is random time step, c2 is another condition
+            x_c1_t1 = x_c1_t1.view(n * t, c, h, w)
+            (f_a_c1_t1_, f_c_c1_t1_, _) = self.encoder(x_c1_t1)
+            x_c2_t2 = x_c2_t2.view(n * t, c, h, w)
+            (_, f_c_c2_t2_, f_p_c2_t2_) = self.encoder(x_c2_t2)
+
+            x_c1_t2_pred_ = self.decoder(f_a_c1_t1_, f_c_c1_t1_, f_p_c1_t2_)
+            x_c1_t2_pred = x_c1_t2_pred_.view(n, t, c, h, w)
+
+            xrecon_loss = torch.stack([
+                F.mse_loss(x_c1_t2[:, i, :, :, :], x_c1_t2_pred[:, i, :, :, :])
+                for i in range(t)
+            ]).sum()
+
+            f_c_c1_t1 = f_c_c1_t1_.view(n, t, -1)
+            f_c_c1_t2 = f_c_c1_t2_.view(n, t, -1)
             f_c_c2_t2 = f_c_c2_t2_.view(n, t, -1)
             cano_cons_loss = torch.stack([
                 F.mse_loss(f_c_c1_t1[:, i, :], f_c_c1_t2[:, i, :])
@@ -167,8 +152,13 @@ class AutoEncoder(nn.Module):
                 for i in range(t)
             ]).mean()
 
-            f_p_c1_t2 = self.f_p_c1_t2_.view(n, t, -1)
+            f_p_c1_t2 = f_p_c1_t2_.view(n, t, -1)
             f_p_c2_t2 = f_p_c2_t2_.view(n, t, -1)
             pose_sim_loss = F.mse_loss(f_p_c1_t2.mean(1), f_p_c2_t2.mean(1))
 
-            return cano_cons_loss, pose_sim_loss * 10
+            return (
+                (f_a_c1_t2_, f_c_c1_t2_, f_p_c1_t2_),
+                (xrecon_loss, cano_cons_loss, pose_sim_loss * 10)
+            )
+        else:  # evaluating
+            return f_c_c1_t2_, f_p_c1_t2_
-- 
cgit v1.2.3