diff options
| -rw-r--r-- | models/auto_encoder.py | 22 | ||||
| -rw-r--r-- | models/model.py | 4 | ||||
| -rw-r--r-- | models/rgb_part_net.py | 9 | 
3 files changed, 13 insertions, 22 deletions
diff --git a/models/auto_encoder.py b/models/auto_encoder.py index 36be868..35cb629 100644 --- a/models/auto_encoder.py +++ b/models/auto_encoder.py @@ -113,7 +113,6 @@ class Decoder(nn.Module):  class AutoEncoder(nn.Module):      def __init__(              self, -            num_class: int = 74,              channels: int = 3,              feature_channels: int = 64,              embedding_dims: tuple[int, int, int] = (128, 128, 64) @@ -122,25 +121,23 @@ class AutoEncoder(nn.Module):          self.encoder = Encoder(channels, feature_channels, embedding_dims)          self.decoder = Decoder(embedding_dims, feature_channels, channels) -        f_c_dim = embedding_dims[1] -        self.classifier = nn.Sequential( -            nn.LeakyReLU(0.2, inplace=True), -            BasicLinear(f_c_dim, num_class) -        ) - -    def forward(self, x_c1_t2, x_c1_t1=None, x_c2_t2=None, y=None): +    def forward(self, x_c1_t2, x_c1_t1=None, x_c2_t2=None):          # x_c1_t2 is the frame for later module          (f_a_c1_t2, f_c_c1_t2, f_p_c1_t2) = self.encoder(x_c1_t2)          with torch.no_grad():              # Decode canonical features for HPM              x_c_c1_t2 = self.decoder( -                torch.zeros_like(f_a_c1_t2), f_c_c1_t2, torch.zeros_like(f_p_c1_t2), +                torch.zeros_like(f_a_c1_t2), +                f_c_c1_t2, +                torch.zeros_like(f_p_c1_t2),                  no_trans_conv=True              )              # Decode pose features for Part Net              x_p_c1_t2 = self.decoder( -                torch.zeros_like(f_a_c1_t2), torch.zeros_like(f_c_c1_t2), f_p_c1_t2 +                torch.zeros_like(f_a_c1_t2), +                torch.zeros_like(f_c_c1_t2), +                f_p_c1_t2              )          if self.training: @@ -150,11 +147,8 @@ class AutoEncoder(nn.Module):              x_c1_t2_ = self.decoder(f_a_c1_t1, f_c_c1_t1, f_p_c1_t2)              xrecon_loss_t2 = F.mse_loss(x_c1_t2, x_c1_t2_) - -            y_ = self.classifier(f_c_c1_t2.contiguous())              cano_cons_loss_t2 = (F.mse_loss(f_c_c1_t1, f_c_c1_t2) -                                 + F.mse_loss(f_c_c1_t2, f_c_c2_t2) -                                 + F.cross_entropy(y_, y)) +                                 + F.mse_loss(f_c_c1_t2, f_c_c2_t2))              return (                  (x_c_c1_t2, x_p_c1_t2), diff --git a/models/model.py b/models/model.py index cccb6c4..ddb715d 100644 --- a/models/model.py +++ b/models/model.py @@ -54,7 +54,6 @@ class Model:          self.total_iters = self.meta.get('total_iters', (80000, 80000, 80000))          self.is_train: bool = True -        self.train_size: int = 74          self.in_channels: int = 3          self.pr: Optional[int] = None          self.k: Optional[int] = None @@ -147,7 +146,7 @@ class Model:          hpm_optim_hp = optim_hp.pop('hpm', {})          fc_optim_hp = optim_hp.pop('fc', {})          sched_hp = self.hp.get('scheduler', {}) -        self.rgb_pn = RGBPartNet(self.train_size, self.in_channels, **model_hp) +        self.rgb_pn = RGBPartNet(self.in_channels, **model_hp)          # Try to accelerate computation using CUDA or others          self.rgb_pn = self.rgb_pn.to(self.device)          self.optimizer = optim.Adam([ @@ -409,7 +408,6 @@ class Model:              self,              dataset_config: DatasetConfiguration      ) -> Union[CASIAB]: -        self.train_size = dataset_config.get('train_size', 74)          self.in_channels = dataset_config.get('num_input_channels', 3)          self._dataset_sig = self._make_signature(              dataset_config, diff --git a/models/rgb_part_net.py b/models/rgb_part_net.py index 2cc0958..755d5dc 100644 --- a/models/rgb_part_net.py +++ b/models/rgb_part_net.py @@ -13,7 +13,6 @@ from utils.triplet_loss import BatchAllTripletLoss  class RGBPartNet(nn.Module):      def __init__(              self, -            num_class: int = 74,              ae_in_channels: int = 3,              ae_feature_channels: int = 64,              f_a_c_p_dims: tuple[int, int, int] = (128, 128, 64), @@ -31,7 +30,7 @@ class RGBPartNet(nn.Module):      ):          super().__init__()          self.ae = AutoEncoder( -            num_class, ae_in_channels, ae_feature_channels, f_a_c_p_dims +            ae_in_channels, ae_feature_channels, f_a_c_p_dims          )          self.pn = PartNet(              ae_in_channels, fpfe_feature_channels, fpfe_kernel_sizes, @@ -60,7 +59,7 @@ class RGBPartNet(nn.Module):          # Step 1: Disentanglement          # t, n, c, h, w -        ((x_c_c1, x_p_c1), losses) = self._disentangle(x_c1, x_c2, y) +        ((x_c_c1, x_p_c1), losses) = self._disentangle(x_c1, x_c2)          # Step 2.a: HPM & Static Gait Feature Aggregation          # t, n, c, h, w @@ -85,7 +84,7 @@ class RGBPartNet(nn.Module):          else:              return x.unsqueeze(1).view(-1) -    def _disentangle(self, x_c1, x_c2=None, y=None): +    def _disentangle(self, x_c1, x_c2=None):          t, n, c, h, w = x_c1.size()          if self.training:              # Decoded canonical features and Pose images @@ -95,7 +94,7 @@ class RGBPartNet(nn.Module):              xrecon_loss, cano_cons_loss = [], []              for t2 in range(t):                  t1 = random.randrange(t) -                output = self.ae(x_c1[t2], x_c1[t1], x_c2[t2], y) +                output = self.ae(x_c1[t2], x_c1[t1], x_c2[t2])                  (x_c1_t2, f_p_t2, losses) = output                  # Decoded features or image  | 
