diff options
author | Jordan Gong <jordan.gong@protonmail.com> | 2021-04-04 17:44:23 +0800 |
---|---|---|
committer | Jordan Gong <jordan.gong@protonmail.com> | 2021-04-04 17:44:23 +0800 |
commit | 6f3dd9109b8ae7b37e3373d844a6c406d83c2b35 (patch) | |
tree | a530221dfef3100a236c4091c3d0c15ea636d9e5 /models | |
parent | 6a8824e4fb8bdd1f3e763b78b765830788415cfb (diff) | |
parent | 85627d4cfb495453a7c28b3f131b84b1038af674 (diff) |
Merge branch 'disentangling_only' into disentangling_only_py3.8disentangling_only_py3.8
Diffstat (limited to 'models')
-rw-r--r-- | models/auto_encoder.py | 9 | ||||
-rw-r--r-- | models/model.py | 24 | ||||
-rw-r--r-- | models/rgb_part_net.py | 15 |
3 files changed, 36 insertions, 12 deletions
diff --git a/models/auto_encoder.py b/models/auto_encoder.py index b1d51ef..61622eb 100644 --- a/models/auto_encoder.py +++ b/models/auto_encoder.py @@ -112,7 +112,7 @@ class Decoder(nn.Module): x = torch.cat((f_appearance, f_canonical, f_pose), dim=1) x = self.fc(x) x = x.view(-1, self.feature_channels * 8, self.h_0, self.w_0) - x = F.relu(x, inplace=True) + x = F.leaky_relu(x, 0.2, inplace=True) x = self.trans_conv1(x) x = self.trans_conv2(x) x = self.trans_conv3(x) @@ -124,6 +124,7 @@ class Decoder(nn.Module): class AutoEncoder(nn.Module): def __init__( self, + num_class: int, channels: int = 3, frame_size: Tuple[int, int] = (64, 48), feature_channels: int = 64, @@ -134,8 +135,9 @@ class AutoEncoder(nn.Module): feature_channels, embedding_dims) self.decoder = Decoder(embedding_dims, feature_channels, self.encoder.feature_size, channels) + self.classifier = BasicLinear(embedding_dims[1], num_class) - def forward(self, x_c1_t2, x_c1_t1=None, x_c2_t2=None): + def forward(self, x_c1_t2, x_c1_t1=None, x_c2_t2=None, y=None): n, t, c, h, w = x_c1_t2.size() # x_c1_t2 is the frame for later module x_c1_t2_ = x_c1_t2.view(n * t, c, h, w) @@ -162,6 +164,9 @@ class AutoEncoder(nn.Module): cano_cons_loss = torch.stack([ F.mse_loss(f_c_c1_t1[:, i, :], f_c_c1_t2[:, i, :]) + F.mse_loss(f_c_c1_t2[:, i, :], f_c_c2_t2[:, i, :]) + + F.cross_entropy(self.classifier( + F.leaky_relu(f_c_c1_t2[:, i, :], 0.2) + ), y) for i in range(t) ]).mean() diff --git a/models/model.py b/models/model.py index 46987ca..ebaaaf1 100644 --- a/models/model.py +++ b/models/model.py @@ -54,6 +54,7 @@ class Model: self.total_iters = self.meta.get('total_iters', (self.total_iter,)) self.is_train: bool = True + self.num_class: Optional[int] = None self.in_channels: int = 3 self.in_size: Tuple[int, int] = (64, 48) self.batch_size: Optional[int] = None @@ -160,8 +161,13 @@ class Model: optim_hp: Dict = self.hp.get('optimizer', {}).copy() sched_hp = self.hp.get('scheduler', {}) - self.rgb_pn = RGBPartNet(self.in_channels, self.in_size, **model_hp, - image_log_on=self.image_log_on) + self.rgb_pn = RGBPartNet( + self.num_class, + self.in_channels, + self.in_size, + **model_hp, + image_log_on=self.image_log_on + ) # Try to accelerate computation using CUDA or others self.rgb_pn = self.rgb_pn.to(self.device) @@ -202,7 +208,8 @@ class Model: # forward + backward + optimize x_c1 = batch_c1['clip'].to(self.device) x_c2 = batch_c2['clip'].to(self.device) - losses, features, images = self.rgb_pn(x_c1, x_c2) + y = batch_c1['label'].to(self.device) + losses, features, images = self.rgb_pn(x_c1, x_c2, y) loss = losses.sum() loss.backward() self.optimizer.step() @@ -254,8 +261,9 @@ class Model: batch_c1, batch_c2 = next(val_dataloader) x_c1 = batch_c1['clip'].to(self.device) x_c2 = batch_c2['clip'].to(self.device) + y = batch_c1['label'].to(self.device) with torch.no_grad(): - losses, _, _ = self.rgb_pn(x_c1, x_c2) + losses, _, _ = self.rgb_pn(x_c1, x_c2, y) loss = losses.sum() self._write_stat('Val', loss, losses) @@ -302,7 +310,12 @@ class Model: # Init models model_hp: dict = self.hp.get('model', {}).copy() - self.rgb_pn = RGBPartNet(self.in_channels, self.in_size, **model_hp) + self.rgb_pn = RGBPartNet( + self.num_class, + self.in_channels, + self.in_size, + **model_hp + ) # Try to accelerate computation using CUDA or others self.rgb_pn = self.rgb_pn.to(self.device) self.rgb_pn.eval() @@ -419,6 +432,7 @@ class Model: self, dataset_config: DatasetConfiguration ) -> Union[CASIAB]: + self.num_class = dataset_config.get('train_size', 74) self.in_channels = dataset_config.get('num_input_channels', 3) self.in_size = dataset_config.get('frame_size', (64, 48)) self._dataset_sig = self._make_signature( diff --git a/models/rgb_part_net.py b/models/rgb_part_net.py index 1c7a1a2..6be6b0a 100644 --- a/models/rgb_part_net.py +++ b/models/rgb_part_net.py @@ -10,6 +10,7 @@ from models.auto_encoder import AutoEncoder class RGBPartNet(nn.Module): def __init__( self, + num_class: int, ae_in_channels: int = 3, ae_in_size: Tuple[int, int] = (64, 48), ae_feature_channels: int = 64, @@ -22,11 +23,15 @@ class RGBPartNet(nn.Module): self.image_log_on = image_log_on self.ae = AutoEncoder( - ae_in_channels, ae_in_size, ae_feature_channels, f_a_c_p_dims + num_class, + ae_in_channels, + ae_in_size, + ae_feature_channels, + f_a_c_p_dims ) - def forward(self, x_c1, x_c2=None): - losses, features, images = self._disentangle(x_c1, x_c2) + def forward(self, x_c1, x_c2=None, y=None): + losses, features, images = self._disentangle(x_c1, x_c2, y) if self.training: losses = torch.stack(losses) @@ -34,11 +39,11 @@ class RGBPartNet(nn.Module): else: return features - def _disentangle(self, x_c1_t2, x_c2_t2=None): + def _disentangle(self, x_c1_t2, x_c2_t2=None, y=None): n, t, c, h, w = x_c1_t2.size() if self.training: x_c1_t1 = x_c1_t2[:, torch.randperm(t), :, :, :] - ((f_a_, f_c_, f_p_), losses) = self.ae(x_c1_t2, x_c1_t1, x_c2_t2) + ((f_a_, f_c_, f_p_), losses) = self.ae(x_c1_t2, x_c1_t1, x_c2_t2, y) f_a = f_a_.view(n, t, -1) f_c = f_c_.view(n, t, -1) f_p = f_p_.view(n, t, -1) |