From 98b6e6dc3be6f88abb72e351c8f2da2b23b8ab85 Mon Sep 17 00:00:00 2001 From: Jordan Gong Date: Thu, 7 Jan 2021 19:55:00 +0800 Subject: Type hint for python version lower than 3.9 --- models/auto_encoder.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'models/auto_encoder.py') diff --git a/models/auto_encoder.py b/models/auto_encoder.py index 7c1f7ef..1e7c323 100644 --- a/models/auto_encoder.py +++ b/models/auto_encoder.py @@ -1,3 +1,5 @@ +from typing import Tuple + import torch import torch.nn as nn import torch.nn.functional as F @@ -12,7 +14,7 @@ class Encoder(nn.Module): self, in_channels: int = 3, feature_channels: int = 64, - output_dims: tuple[int, int, int] = (128, 128, 64) + output_dims: Tuple[int, int, int] = (128, 128, 64) ): super().__init__() self.feature_channels = feature_channels @@ -67,7 +69,7 @@ class Decoder(nn.Module): def __init__( self, - input_dims: tuple[int, int, int] = (128, 128, 64), + input_dims: Tuple[int, int, int] = (128, 128, 64), feature_channels: int = 64, out_channels: int = 3, ): @@ -116,7 +118,7 @@ class AutoEncoder(nn.Module): num_class: int = 74, channels: int = 3, feature_channels: int = 64, - embedding_dims: tuple[int, int, int] = (128, 128, 64) + embedding_dims: Tuple[int, int, int] = (128, 128, 64) ): super().__init__() self.encoder = Encoder(channels, feature_channels, embedding_dims) -- cgit v1.2.3 From 24b5968bfc5799e44c9bbbc00e3a9be00f4509ac Mon Sep 17 00:00:00 2001 From: Jordan Gong Date: Mon, 15 Feb 2021 11:08:52 +0800 Subject: Revert "Memory usage improvement" This reverts commit be508061 --- models/auto_encoder.py | 70 ++++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 40 deletions(-) (limited to 'models/auto_encoder.py') diff --git a/models/auto_encoder.py b/models/auto_encoder.py index 918a95c..7b9b29f 100644 --- a/models/auto_encoder.py +++ b/models/auto_encoder.py @@ -119,47 +119,32 @@ class AutoEncoder(nn.Module): embedding_dims: Tuple[int, int, int] = (128, 128, 64) ): super().__init__() - self.f_c_c1_t2_ = None - self.f_p_c1_t2_ = None - self.f_c_c1_t1_ = None self.encoder = Encoder(channels, feature_channels, embedding_dims) self.decoder = Decoder(embedding_dims, feature_channels, channels) - def forward(self, x_t2, is_c1=True): - n, t, c, h, w = x_t2.size() - if is_c1: # condition 1 - # x_c1_t2 is the frame for later module - x_c1_t2_ = x_t2.view(n * t, c, h, w) - (f_a_c1_t2_, self.f_c_c1_t2_, self.f_p_c1_t2_) \ - = self.encoder(x_c1_t2_) - - if self.training: - # t1 is random time step - x_c1_t1 = x_t2[:, torch.randperm(t), :, :, :] - x_c1_t1_ = x_c1_t1.view(n * t, c, h, w) - (f_a_c1_t1_, self.f_c_c1_t1_, _) = self.encoder(x_c1_t1_) - - x_c1_t2_pred_ = self.decoder( - f_a_c1_t1_, self.f_c_c1_t1_, self.f_p_c1_t2_ - ) - x_c1_t2_pred = x_c1_t2_pred_.view(n, t, c, h, w) - - xrecon_loss = torch.stack([ - F.mse_loss(x_t2[:, i, :, :, :], x_c1_t2_pred[:, i, :, :, :]) - for i in range(t) - ]).sum() - - return ((f_a_c1_t2_, self.f_c_c1_t2_, self.f_p_c1_t2_), - xrecon_loss) - else: # evaluating - return self.f_c_c1_t2_, self.f_p_c1_t2_ - else: # condition 2 - # c2 is another condition - x_c2_t2_ = x_t2.view(n * t, c, h, w) - (_, f_c_c2_t2_, f_p_c2_t2_) = self.encoder(x_c2_t2_) - - f_c_c1_t1 = self.f_c_c1_t1_.view(n, t, -1) - f_c_c1_t2 = self.f_c_c1_t2_.view(n, t, -1) + def forward(self, x_c1_t2, x_c1_t1=None, x_c2_t2=None): + n, t, c, h, w = x_c1_t2.size() + # x_c1_t2 is the frame for later module + x_c1_t2_ = x_c1_t2.view(n * t, c, h, w) + (f_a_c1_t2_, f_c_c1_t2_, f_p_c1_t2_) = self.encoder(x_c1_t2_) + + if self.training: + # t1 is random time step, c2 is another condition + x_c1_t1 = x_c1_t1.view(n * t, c, h, w) + (f_a_c1_t1_, f_c_c1_t1_, _) = self.encoder(x_c1_t1) + x_c2_t2 = x_c2_t2.view(n * t, c, h, w) + (_, f_c_c2_t2_, f_p_c2_t2_) = self.encoder(x_c2_t2) + + x_c1_t2_pred_ = self.decoder(f_a_c1_t1_, f_c_c1_t1_, f_p_c1_t2_) + x_c1_t2_pred = x_c1_t2_pred_.view(n, t, c, h, w) + + xrecon_loss = torch.stack([ + F.mse_loss(x_c1_t2[:, i, :, :, :], x_c1_t2_pred[:, i, :, :, :]) + for i in range(t) + ]).sum() + + f_c_c1_t1 = f_c_c1_t1_.view(n, t, -1) + f_c_c1_t2 = f_c_c1_t2_.view(n, t, -1) f_c_c2_t2 = f_c_c2_t2_.view(n, t, -1) cano_cons_loss = torch.stack([ F.mse_loss(f_c_c1_t1[:, i, :], f_c_c1_t2[:, i, :]) @@ -167,8 +152,13 @@ class AutoEncoder(nn.Module): for i in range(t) ]).mean() - f_p_c1_t2 = self.f_p_c1_t2_.view(n, t, -1) + f_p_c1_t2 = f_p_c1_t2_.view(n, t, -1) f_p_c2_t2 = f_p_c2_t2_.view(n, t, -1) pose_sim_loss = F.mse_loss(f_p_c1_t2.mean(1), f_p_c2_t2.mean(1)) - return cano_cons_loss, pose_sim_loss * 10 + return ( + (f_a_c1_t2_, f_c_c1_t2_, f_p_c1_t2_), + (xrecon_loss, cano_cons_loss, pose_sim_loss * 10) + ) + else: # evaluating + return f_c_c1_t2_, f_p_c1_t2_ -- cgit v1.2.3