From 507e1d163aaa6ea4be23e7f08ff6ce0ef58c830b Mon Sep 17 00:00:00 2001 From: Jordan Gong Date: Sat, 23 Jan 2021 22:19:51 +0800 Subject: Remove the third term in canonical consistency loss --- models/auto_encoder.py | 22 ++++++++-------------- models/model.py | 4 +--- models/rgb_part_net.py | 9 ++++----- 3 files changed, 13 insertions(+), 22 deletions(-) diff --git a/models/auto_encoder.py b/models/auto_encoder.py index 36be868..35cb629 100644 --- a/models/auto_encoder.py +++ b/models/auto_encoder.py @@ -113,7 +113,6 @@ class Decoder(nn.Module): class AutoEncoder(nn.Module): def __init__( self, - num_class: int = 74, channels: int = 3, feature_channels: int = 64, embedding_dims: tuple[int, int, int] = (128, 128, 64) @@ -122,25 +121,23 @@ class AutoEncoder(nn.Module): self.encoder = Encoder(channels, feature_channels, embedding_dims) self.decoder = Decoder(embedding_dims, feature_channels, channels) - f_c_dim = embedding_dims[1] - self.classifier = nn.Sequential( - nn.LeakyReLU(0.2, inplace=True), - BasicLinear(f_c_dim, num_class) - ) - - def forward(self, x_c1_t2, x_c1_t1=None, x_c2_t2=None, y=None): + def forward(self, x_c1_t2, x_c1_t1=None, x_c2_t2=None): # x_c1_t2 is the frame for later module (f_a_c1_t2, f_c_c1_t2, f_p_c1_t2) = self.encoder(x_c1_t2) with torch.no_grad(): # Decode canonical features for HPM x_c_c1_t2 = self.decoder( - torch.zeros_like(f_a_c1_t2), f_c_c1_t2, torch.zeros_like(f_p_c1_t2), + torch.zeros_like(f_a_c1_t2), + f_c_c1_t2, + torch.zeros_like(f_p_c1_t2), no_trans_conv=True ) # Decode pose features for Part Net x_p_c1_t2 = self.decoder( - torch.zeros_like(f_a_c1_t2), torch.zeros_like(f_c_c1_t2), f_p_c1_t2 + torch.zeros_like(f_a_c1_t2), + torch.zeros_like(f_c_c1_t2), + f_p_c1_t2 ) if self.training: @@ -150,11 +147,8 @@ class AutoEncoder(nn.Module): x_c1_t2_ = self.decoder(f_a_c1_t1, f_c_c1_t1, f_p_c1_t2) xrecon_loss_t2 = F.mse_loss(x_c1_t2, x_c1_t2_) - - y_ = self.classifier(f_c_c1_t2.contiguous()) cano_cons_loss_t2 = (F.mse_loss(f_c_c1_t1, f_c_c1_t2) - + F.mse_loss(f_c_c1_t2, f_c_c2_t2) - + F.cross_entropy(y_, y)) + + F.mse_loss(f_c_c1_t2, f_c_c2_t2)) return ( (x_c_c1_t2, x_p_c1_t2), diff --git a/models/model.py b/models/model.py index cccb6c4..ddb715d 100644 --- a/models/model.py +++ b/models/model.py @@ -54,7 +54,6 @@ class Model: self.total_iters = self.meta.get('total_iters', (80000, 80000, 80000)) self.is_train: bool = True - self.train_size: int = 74 self.in_channels: int = 3 self.pr: Optional[int] = None self.k: Optional[int] = None @@ -147,7 +146,7 @@ class Model: hpm_optim_hp = optim_hp.pop('hpm', {}) fc_optim_hp = optim_hp.pop('fc', {}) sched_hp = self.hp.get('scheduler', {}) - self.rgb_pn = RGBPartNet(self.train_size, self.in_channels, **model_hp) + self.rgb_pn = RGBPartNet(self.in_channels, **model_hp) # Try to accelerate computation using CUDA or others self.rgb_pn = self.rgb_pn.to(self.device) self.optimizer = optim.Adam([ @@ -409,7 +408,6 @@ class Model: self, dataset_config: DatasetConfiguration ) -> Union[CASIAB]: - self.train_size = dataset_config.get('train_size', 74) self.in_channels = dataset_config.get('num_input_channels', 3) self._dataset_sig = self._make_signature( dataset_config, diff --git a/models/rgb_part_net.py b/models/rgb_part_net.py index 2cc0958..755d5dc 100644 --- a/models/rgb_part_net.py +++ b/models/rgb_part_net.py @@ -13,7 +13,6 @@ from utils.triplet_loss import BatchAllTripletLoss class RGBPartNet(nn.Module): def __init__( self, - num_class: int = 74, ae_in_channels: int = 3, ae_feature_channels: int = 64, f_a_c_p_dims: tuple[int, int, int] = (128, 128, 64), @@ -31,7 +30,7 @@ class RGBPartNet(nn.Module): ): super().__init__() self.ae = AutoEncoder( - num_class, ae_in_channels, ae_feature_channels, f_a_c_p_dims + ae_in_channels, ae_feature_channels, f_a_c_p_dims ) self.pn = PartNet( ae_in_channels, fpfe_feature_channels, fpfe_kernel_sizes, @@ -60,7 +59,7 @@ class RGBPartNet(nn.Module): # Step 1: Disentanglement # t, n, c, h, w - ((x_c_c1, x_p_c1), losses) = self._disentangle(x_c1, x_c2, y) + ((x_c_c1, x_p_c1), losses) = self._disentangle(x_c1, x_c2) # Step 2.a: HPM & Static Gait Feature Aggregation # t, n, c, h, w @@ -85,7 +84,7 @@ class RGBPartNet(nn.Module): else: return x.unsqueeze(1).view(-1) - def _disentangle(self, x_c1, x_c2=None, y=None): + def _disentangle(self, x_c1, x_c2=None): t, n, c, h, w = x_c1.size() if self.training: # Decoded canonical features and Pose images @@ -95,7 +94,7 @@ class RGBPartNet(nn.Module): xrecon_loss, cano_cons_loss = [], [] for t2 in range(t): t1 = random.randrange(t) - output = self.ae(x_c1[t2], x_c1[t1], x_c2[t2], y) + output = self.ae(x_c1[t2], x_c1[t1], x_c2[t2]) (x_c1_t2, f_p_t2, losses) = output # Decoded features or image -- cgit v1.2.3