6 files changed, 79 insertions, 64 deletions
diff --git a/models/auto_encoder.py b/models/auto_encoder.py
index 2d715db..c6bc52f 100644
--- a/models/auto_encoder.py
+++ b/models/auto_encoder.py
@@ -1,3 +1,5 @@
+from typing import Tuple
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -13,7 +15,7 @@ class Encoder(nn.Module):
             in_channels: int = 3,
             frame_size: tuple[int, int] = (64, 48),
             feature_channels: int = 64,
-            output_dims: tuple[int, int, int] = (128, 128, 64)
+            output_dims: Tuple[int, int, int] = (128, 128, 64)
     ):
         super().__init__()
         self.feature_channels = feature_channels
@@ -74,7 +76,7 @@ class Decoder(nn.Module):
 
     def __init__(
             self,
-            input_dims: tuple[int, int, int] = (128, 128, 64),
+            input_dims: Tuple[int, int, int] = (128, 128, 64),
             feature_channels: int = 64,
             feature_size: tuple[int, int] = (4, 3),
             out_channels: int = 3,
@@ -127,7 +129,7 @@ class AutoEncoder(nn.Module):
             channels: int = 3,
             frame_size: tuple[int, int] = (64, 48),
             feature_channels: int = 64,
-            embedding_dims: tuple[int, int, int] = (128, 128, 64)
+            embedding_dims: Tuple[int, int, int] = (128, 128, 64)
     ):
         super().__init__()
         self.encoder = Encoder(channels, frame_size,
diff --git a/models/hpm.py b/models/hpm.py
index 9879cfb..b49be3a 100644
--- a/models/hpm.py
+++ b/models/hpm.py
@@ -1,3 +1,5 @@
+from typing import Tuple
+
 import torch
 import torch.nn as nn
 
@@ -10,7 +12,7 @@ class HorizontalPyramidMatching(nn.Module):
             in_channels: int,
             out_channels: int = 128,
             use_1x1conv: bool = False,
-            scales: tuple[int, ...] = (1, 2, 4),
+            scales: Tuple[int, ...] = (1, 2, 4),
             use_avg_pool: bool = True,
             use_max_pool: bool = False,
             **kwargs
diff --git a/models/layers.py b/models/layers.py
index ef53a95..ae61583 100644
--- a/models/layers.py
+++ b/models/layers.py
@@ -1,4 +1,4 @@
-from typing import Union
+from typing import Union, Tuple
 
 import torch
 import torch.nn as nn
@@ -10,7 +10,7 @@ class BasicConv2d(nn.Module):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_size: Union[int, tuple[int, int]],
+            kernel_size: Union[int, Tuple[int, int]],
             **kwargs
     ):
         super().__init__()
@@ -29,7 +29,7 @@ class VGGConv2d(BasicConv2d):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_size: Union[int, tuple[int, int]] = 3,
+            kernel_size: Union[int, Tuple[int, int]] = 3,
             padding: int = 1,
             **kwargs
     ):
@@ -47,7 +47,7 @@ class BasicConvTranspose2d(nn.Module):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_size: Union[int, tuple[int, int]],
+            kernel_size: Union[int, Tuple[int, int]],
             **kwargs
     ):
         super().__init__()
@@ -66,7 +66,7 @@ class DCGANConvTranspose2d(BasicConvTranspose2d):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_size: Union[int, tuple[int, int]] = 4,
+            kernel_size: Union[int, Tuple[int, int]] = 4,
             stride: int = 2,
             padding: int = 1,
             is_last_layer: bool = False,
@@ -104,7 +104,7 @@ class FocalConv2d(BasicConv2d):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_size: Union[int, tuple[int, int]],
+            kernel_size: Union[int, Tuple[int, int]],
             halving: int,
             **kwargs
     ):
@@ -124,8 +124,8 @@ class FocalConv2dBlock(nn.Module):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_sizes: tuple[int, int],
-            paddings: tuple[int, int],
+            kernel_sizes: Tuple[int, int],
+            paddings: Tuple[int, int],
             halving: int,
             use_pool: bool = True,
             **kwargs
@@ -151,7 +151,7 @@ class BasicConv1d(nn.Module):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_size: Union[int, tuple[int]],
+            kernel_size: Union[int, Tuple[int]],
             **kwargs
     ):
         super().__init__()
diff --git a/models/model.py b/models/model.py
index 3f5d283..3d619fe 100644
--- a/models/model.py
+++ b/models/model.py
@@ -1,6 +1,6 @@
 import os
 from datetime import datetime
-from typing import Union, Optional
+from typing import Union, Optional, Tuple, List, Dict, Set
 
 import numpy as np
 import torch
@@ -59,8 +59,8 @@ class Model:
         self.pr: Optional[int] = None
         self.k: Optional[int] = None
 
-        self._gallery_dataset_meta: Optional[dict[str, list]] = None
-        self._probe_datasets_meta: Optional[dict[str, dict[str, list]]] = None
+        self._gallery_dataset_meta: Optional[Dict[str, List]] = None
+        self._probe_datasets_meta: Optional[Dict[str, Dict[str, List]]] = None
 
         self._model_name: str = self.meta.get('name', 'RGB-GaitPart')
         self._hp_sig: str = self._make_signature(self.hp)
@@ -108,8 +108,8 @@ class Model:
     def fit_all(
             self,
             dataset_config: DatasetConfiguration,
-            dataset_selectors: dict[
-                str, dict[str, Union[ClipClasses, ClipConditions, ClipViews]]
+            dataset_selectors: Dict[
+                str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]]
             ],
             dataloader_config: DataloaderConfiguration,
     ):
@@ -141,7 +141,7 @@ class Model:
         dataloader = self._parse_dataloader_config(dataset, dataloader_config)
         # Prepare for model, optimizer and scheduler
         model_hp = self.hp.get('model', {})
-        optim_hp: dict = self.hp.get('optimizer', {}).copy()
+        optim_hp: Dict = self.hp.get('optimizer', {}).copy()
         start_iter = optim_hp.pop('start_iter', 0)
         ae_optim_hp = optim_hp.pop('auto_encoder', {})
         pn_optim_hp = optim_hp.pop('part_net', {})
@@ -151,12 +151,13 @@ class Model:
         self.rgb_pn = RGBPartNet(self.in_channels, self.in_size, **model_hp,
                                  image_log_on=self.image_log_on)
         # Try to accelerate computation using CUDA or others
+        self.rgb_pn = nn.DataParallel(self.rgb_pn)
         self.rgb_pn = self.rgb_pn.to(self.device)
         self.optimizer = optim.Adam([
-            {'params': self.rgb_pn.ae.parameters(), **ae_optim_hp},
-            {'params': self.rgb_pn.pn.parameters(), **pn_optim_hp},
-            {'params': self.rgb_pn.hpm.parameters(), **hpm_optim_hp},
-            {'params': self.rgb_pn.fc_mat, **fc_optim_hp}
+            {'params': self.rgb_pn.module.ae.parameters(), **ae_optim_hp},
+            {'params': self.rgb_pn.module.pn.parameters(), **pn_optim_hp},
+            {'params': self.rgb_pn.module.hpm.parameters(), **hpm_optim_hp},
+            {'params': self.rgb_pn.module.fc_mat, **fc_optim_hp}
         ], **optim_hp)
         sched_gamma = sched_hp.get('gamma', 0.9)
         sched_step_size = sched_hp.get('step_size', 500)
@@ -195,8 +196,14 @@ class Model:
             x_c2 = batch_c2['clip'].to(self.device)
             y = batch_c1['label'].to(self.device)
             # Duplicate labels for each part
-            y = y.unsqueeze(1).repeat(1, self.rgb_pn.num_total_parts)
+            y = y.unsqueeze(1).repeat(1, self.rgb_pn.module.num_total_parts)
             losses, images = self.rgb_pn(x_c1, x_c2, y)
+            losses = torch.stack((
+                # xrecon           cano_cons         pose_sim
+                losses[0].sum(), losses[1].mean(), losses[2].mean(),
+                # hpm_ba_trip       pn_ba_trip
+                losses[3].mean(), losses[4].mean()
+            ))
             loss = losses.sum()
             loss.backward()
             self.optimizer.step()
@@ -263,13 +270,13 @@ class Model:
 
     def predict_all(
             self,
-            iters: tuple[int],
+            iters: Tuple[int],
             dataset_config: DatasetConfiguration,
-            dataset_selectors: dict[
-                str, dict[str, Union[ClipClasses, ClipConditions, ClipViews]]
+            dataset_selectors: Dict[
+                str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]]
             ],
             dataloader_config: DataloaderConfiguration,
-    ) -> dict[str, torch.Tensor]:
+    ) -> Dict[str, torch.Tensor]:
         # Transform data to features
         gallery_samples, probe_samples = self.transform(
             iters, dataset_config, dataset_selectors, dataloader_config
@@ -281,10 +288,10 @@ class Model:
 
     def transform(
             self,
-            iters: tuple[int],
+            iters: Tuple[int],
             dataset_config: DatasetConfiguration,
-            dataset_selectors: dict[
-                str, dict[str, Union[ClipClasses, ClipConditions, ClipViews]]
+            dataset_selectors: Dict[
+                str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]]
             ],
             dataloader_config: DataloaderConfiguration
     ):
@@ -302,6 +309,7 @@ class Model:
         model_hp = self.hp.get('model', {})
         self.rgb_pn = RGBPartNet(self.in_channels, self.in_size, **model_hp)
         # Try to accelerate computation using CUDA or others
+        self.rgb_pn = nn.DataParallel(self.rgb_pn)
         self.rgb_pn = self.rgb_pn.to(self.device)
         self.rgb_pn.eval()
 
@@ -326,7 +334,7 @@ class Model:
 
         return gallery_samples, probe_samples
 
-    def _get_eval_sample(self, sample: dict[str, Union[list, torch.Tensor]]):
+    def _get_eval_sample(self, sample: Dict[str, Union[List, torch.Tensor]]):
         label = sample.pop('label').item()
         clip = sample.pop('clip').to(self.device)
         feature = self.rgb_pn(clip).detach()
@@ -338,10 +346,10 @@ class Model:
 
     def evaluate(
             self,
-            gallery_samples: dict[str, Union[list[str], torch.Tensor]],
-            probe_samples: dict[str, dict[str, Union[list[str], torch.Tensor]]],
+            gallery_samples: Dict[str, Union[List[str], torch.Tensor]],
+            probe_samples: Dict[str, Dict[str, Union[List[str], torch.Tensor]]],
             num_ranks: int = 5
-    ) -> dict[str, torch.Tensor]:
+    ) -> Dict[str, torch.Tensor]:
         probe_conditions = self._probe_datasets_meta.keys()
         gallery_views_meta = self._gallery_dataset_meta['views']
         probe_views_meta = list(self._probe_datasets_meta.values())[0]['views']
@@ -386,12 +394,12 @@ class Model:
 
     def _load_pretrained(
             self,
-            iters: tuple[int],
+            iters: Tuple[int],
             dataset_config: DatasetConfiguration,
-            dataset_selectors: dict[
-                str, dict[str, Union[ClipClasses, ClipConditions, ClipViews]]
+            dataset_selectors: Dict[
+                str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]]
             ]
-    ) -> dict[str, str]:
+    ) -> Dict[str, str]:
         checkpoints = {}
         for (iter_, (condition, selector)) in zip(
                 iters, dataset_selectors.items()
@@ -408,7 +416,7 @@ class Model:
             self,
             dataset_config: DatasetConfiguration,
             dataloader_config: DataloaderConfiguration,
-    ) -> tuple[DataLoader, dict[str, DataLoader]]:
+    ) -> Tuple[DataLoader, Dict[str, DataLoader]]:
         dataset_name = dataset_config.get('name', 'CASIA-B')
         if dataset_name == 'CASIA-B':
             gallery_dataset = self._parse_dataset_config(
@@ -465,7 +473,7 @@ class Model:
             dataset_config,
             popped_keys=['root_dir', 'cache_on']
         )
-        config: dict = dataset_config.copy()
+        config: Dict = dataset_config.copy()
         name = config.pop('name', 'CASIA-B')
         if name == 'CASIA-B':
             return CASIAB(**config, is_train=self.is_train)
@@ -479,7 +487,7 @@ class Model:
             dataset: Union[CASIAB],
             dataloader_config: DataloaderConfiguration
     ) -> DataLoader:
-        config: dict = dataloader_config.copy()
+        config: Dict = dataloader_config.copy()
         (self.pr, self.k) = config.pop('batch_size', (8, 16))
         if self.is_train:
             triplet_sampler = TripletSampler(dataset, (self.pr, self.k))
@@ -492,9 +500,9 @@ class Model:
 
     def _batch_splitter(
             self,
-            batch: list[dict[str, Union[np.int64, str, torch.Tensor]]]
-    ) -> tuple[dict[str, Union[list[str], torch.Tensor]],
-               dict[str, Union[list[str], torch.Tensor]]]:
+            batch: List[Dict[str, Union[np.int64, str, torch.Tensor]]]
+    ) -> Tuple[Dict[str, Union[List[str], torch.Tensor]],
+               Dict[str, Union[List[str], torch.Tensor]]]:
         """
         Disentanglement need two random conditions, this function will
         split pr * k * 2 samples to 2 dicts each containing pr * k
@@ -508,8 +516,8 @@ class Model:
         return default_collate(_batch[0]), default_collate(_batch[1])
 
     def _make_signature(self,
-                        config: dict,
-                        popped_keys: Optional[list] = None) -> str:
+                        config: Dict,
+                        popped_keys: Optional[List] = None) -> str:
         _config = config.copy()
         if popped_keys:
             for key in popped_keys:
@@ -517,16 +525,16 @@ class Model:
 
         return self._gen_sig(list(_config.values()))
 
-    def _gen_sig(self, values: Union[tuple, list, set, str, int, float]) -> str:
+    def _gen_sig(self, values: Union[Tuple, List, Set, str, int, float]) -> str:
         strings = []
         for v in values:
             if isinstance(v, str):
                 strings.append(v)
-            elif isinstance(v, (tuple, list)):
+            elif isinstance(v, (Tuple, List)):
                 strings.append(self._gen_sig(v))
-            elif isinstance(v, set):
+            elif isinstance(v, Set):
                 strings.append(self._gen_sig(sorted(list(v))))
-            elif isinstance(v, dict):
+            elif isinstance(v, Dict):
                 strings.append(self._gen_sig(list(v.values())))
             else:
                 strings.append(str(v))
diff --git a/models/part_net.py b/models/part_net.py
index 62a2bac..f34f993 100644
--- a/models/part_net.py
+++ b/models/part_net.py
@@ -1,4 +1,5 @@
 import copy
+from typing import Tuple
 
 import torch
 import torch.nn as nn
@@ -12,9 +13,9 @@ class FrameLevelPartFeatureExtractor(nn.Module):
             self,
             in_channels: int = 3,
             feature_channels: int = 32,
-            kernel_sizes: tuple[tuple, ...] = ((5, 3), (3, 3), (3, 3)),
-            paddings: tuple[tuple, ...] = ((2, 1), (1, 1), (1, 1)),
-            halving: tuple[int, ...] = (0, 2, 3)
+            kernel_sizes: Tuple[Tuple, ...] = ((5, 3), (3, 3), (3, 3)),
+            paddings: Tuple[Tuple, ...] = ((2, 1), (1, 1), (1, 1)),
+            halving: Tuple[int, ...] = (0, 2, 3)
     ):
         super().__init__()
         num_blocks = len(kernel_sizes)
@@ -112,9 +113,9 @@ class PartNet(nn.Module):
             self,
             in_channels: int = 3,
             feature_channels: int = 32,
-            kernel_sizes: tuple[tuple, ...] = ((5, 3), (3, 3), (3, 3)),
-            paddings: tuple[tuple, ...] = ((2, 1), (1, 1), (1, 1)),
-            halving: tuple[int, ...] = (0, 2, 3),
+            kernel_sizes: Tuple[Tuple, ...] = ((5, 3), (3, 3), (3, 3)),
+            paddings: Tuple[Tuple, ...] = ((2, 1), (1, 1), (1, 1)),
+            halving: Tuple[int, ...] = (0, 2, 3),
             squeeze_ratio: int = 4,
             num_part: int = 16
     ):
diff --git a/models/rgb_part_net.py b/models/rgb_part_net.py
index 67acac3..80b3e17 100644
--- a/models/rgb_part_net.py
+++ b/models/rgb_part_net.py
@@ -1,3 +1,5 @@
+from typing import Tuple
+
 import torch
 import torch.nn as nn
 
@@ -13,19 +15,19 @@ class RGBPartNet(nn.Module):
             ae_in_channels: int = 3,
             ae_in_size: tuple[int, int] = (64, 48),
             ae_feature_channels: int = 64,
-            f_a_c_p_dims: tuple[int, int, int] = (128, 128, 64),
+            f_a_c_p_dims: Tuple[int, int, int] = (128, 128, 64),
             hpm_use_1x1conv: bool = False,
-            hpm_scales: tuple[int, ...] = (1, 2, 4),
+            hpm_scales: Tuple[int, ...] = (1, 2, 4),
             hpm_use_avg_pool: bool = True,
             hpm_use_max_pool: bool = True,
             fpfe_feature_channels: int = 32,
-            fpfe_kernel_sizes: tuple[tuple, ...] = ((5, 3), (3, 3), (3, 3)),
-            fpfe_paddings: tuple[tuple, ...] = ((2, 1), (1, 1), (1, 1)),
-            fpfe_halving: tuple[int, ...] = (0, 2, 3),
+            fpfe_kernel_sizes: Tuple[Tuple, ...] = ((5, 3), (3, 3), (3, 3)),
+            fpfe_paddings: Tuple[Tuple, ...] = ((2, 1), (1, 1), (1, 1)),
+            fpfe_halving: Tuple[int, ...] = (0, 2, 3),
             tfa_squeeze_ratio: int = 4,
             tfa_num_parts: int = 16,
             embedding_dims: int = 256,
-            triplet_margins: tuple[float, float] = (0.2, 0.2),
+            triplet_margins: Tuple[float, float] = (0.2, 0.2),
             image_log_on: bool = False
     ):
         super().__init__()
@@ -84,7 +86,7 @@ class RGBPartNet(nn.Module):
             pn_ba_trip = self.pn_ba_trip(
                 x[self.hpm_num_parts:], y[self.hpm_num_parts:]
             )
-            losses = torch.stack((*losses, hpm_ba_trip, pn_ba_trip))
+            losses = (*losses, hpm_ba_trip, pn_ba_trip)
             return losses, images
         else:
             return x.unsqueeze(1).view(-1)