11 files changed, 97 insertions, 89 deletions
diff --git a/models/auto_encoder.py b/models/auto_encoder.py
index 4fece69..7f0eb6c 100644
--- a/models/auto_encoder.py
+++ b/models/auto_encoder.py
@@ -1,3 +1,5 @@
+from typing import Tuple
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -11,9 +13,9 @@ class Encoder(nn.Module):
     def __init__(
             self,
             in_channels: int = 3,
-            frame_size: tuple[int, int] = (64, 48),
+            frame_size: Tuple[int, int] = (64, 48),
             feature_channels: int = 64,
-            output_dims: tuple[int, int, int] = (128, 128, 64)
+            output_dims: Tuple[int, int, int] = (128, 128, 64)
     ):
         super().__init__()
         self.feature_channels = feature_channels
@@ -74,9 +76,9 @@ class Decoder(nn.Module):
 
     def __init__(
             self,
-            input_dims: tuple[int, int, int] = (128, 128, 64),
+            input_dims: Tuple[int, int, int] = (128, 128, 64),
             feature_channels: int = 64,
-            feature_size: tuple[int, int] = (4, 3),
+            feature_size: Tuple[int, int] = (4, 3),
             out_channels: int = 3,
     ):
         super().__init__()
@@ -125,9 +127,9 @@ class AutoEncoder(nn.Module):
     def __init__(
             self,
             channels: int = 3,
-            frame_size: tuple[int, int] = (64, 48),
+            frame_size: Tuple[int, int] = (64, 48),
             feature_channels: int = 64,
-            embedding_dims: tuple[int, int, int] = (128, 128, 64)
+            embedding_dims: Tuple[int, int, int] = (128, 128, 64)
     ):
         super().__init__()
         self.encoder = Encoder(channels, frame_size,
diff --git a/models/hpm.py b/models/hpm.py
index 8186b20..8320569 100644
--- a/models/hpm.py
+++ b/models/hpm.py
@@ -1,3 +1,5 @@
+from typing import Tuple
+
 import torch
 import torch.nn as nn
 
@@ -9,7 +11,7 @@ class HorizontalPyramidMatching(nn.Module):
             self,
             in_channels: int,
             out_channels: int = 128,
-            scales: tuple[int, ...] = (1, 2, 4),
+            scales: Tuple[int, ...] = (1, 2, 4),
             use_avg_pool: bool = True,
             use_max_pool: bool = False,
     ):
diff --git a/models/layers.py b/models/layers.py
index c609698..a0933e8 100644
--- a/models/layers.py
+++ b/models/layers.py
@@ -1,4 +1,4 @@
-from typing import Union
+from typing import Union, Tuple
 
 import torch
 import torch.nn as nn
@@ -10,7 +10,7 @@ class BasicConv2d(nn.Module):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_size: Union[int, tuple[int, int]],
+            kernel_size: Union[int, Tuple[int, int]],
             **kwargs
     ):
         super().__init__()
@@ -29,7 +29,7 @@ class VGGConv2d(BasicConv2d):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_size: Union[int, tuple[int, int]] = 3,
+            kernel_size: Union[int, Tuple[int, int]] = 3,
             padding: int = 1,
             **kwargs
     ):
@@ -47,7 +47,7 @@ class BasicConvTranspose2d(nn.Module):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_size: Union[int, tuple[int, int]],
+            kernel_size: Union[int, Tuple[int, int]],
             **kwargs
     ):
         super().__init__()
@@ -66,7 +66,7 @@ class DCGANConvTranspose2d(BasicConvTranspose2d):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_size: Union[int, tuple[int, int]] = 4,
+            kernel_size: Union[int, Tuple[int, int]] = 4,
             stride: int = 2,
             padding: int = 1,
             is_last_layer: bool = False,
@@ -106,7 +106,7 @@ class FocalConv2d(BasicConv2d):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_size: Union[int, tuple[int, int]],
+            kernel_size: Union[int, Tuple[int, int]],
             halving: int,
             **kwargs
     ):
@@ -126,8 +126,8 @@ class FocalConv2dBlock(nn.Module):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_sizes: tuple[int, int],
-            paddings: tuple[int, int],
+            kernel_sizes: Tuple[int, int],
+            paddings: Tuple[int, int],
             halving: int,
             use_pool: bool = True,
             **kwargs
@@ -153,7 +153,7 @@ class BasicConv1d(nn.Module):
             self,
             in_channels: int,
             out_channels: int,
-            kernel_size: Union[int, tuple[int]],
+            kernel_size: Union[int, Tuple[int]],
             **kwargs
     ):
         super().__init__()
diff --git a/models/model.py b/models/model.py
index 36a4f7f..573b1e6 100644
--- a/models/model.py
+++ b/models/model.py
@@ -1,7 +1,7 @@
 import os
 import random
 from datetime import datetime
-from typing import Union, Optional
+from typing import Union, Optional, Tuple, List, Dict, Set
 
 import numpy as np
 import torch
@@ -59,12 +59,12 @@ class Model:
 
         self.is_train: bool = True
         self.in_channels: int = 3
-        self.in_size: tuple[int, int] = (64, 48)
+        self.in_size: Tuple[int, int] = (64, 48)
         self.pr: Optional[int] = None
         self.k: Optional[int] = None
 
-        self._gallery_dataset_meta: Optional[dict[str, list]] = None
-        self._probe_datasets_meta: Optional[dict[str, dict[str, list]]] = None
+        self._gallery_dataset_meta: Optional[Dict[str, List]] = None
+        self._probe_datasets_meta: Optional[Dict[str, Dict[str, List]]] = None
 
         self._model_name: str = self.meta.get('name', 'RGB-GaitPart')
         self._hp_sig: str = self._make_signature(self.hp)
@@ -114,8 +114,8 @@ class Model:
     def fit_all(
             self,
             dataset_config: DatasetConfiguration,
-            dataset_selectors: dict[
-                str, dict[str, Union[ClipClasses, ClipConditions, ClipViews]]
+            dataset_selectors: Dict[
+                str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]]
             ],
             dataloader_config: DataloaderConfiguration,
     ):
@@ -146,11 +146,11 @@ class Model:
         dataset = self._parse_dataset_config(dataset_config)
         dataloader = self._parse_dataloader_config(dataset, dataloader_config)
         # Prepare for model, optimizer and scheduler
-        model_hp: dict = self.hp.get('model', {}).copy()
+        model_hp: Dict = self.hp.get('model', {}).copy()
         triplet_is_hard = model_hp.pop('triplet_is_hard', True)
         triplet_is_mean = model_hp.pop('triplet_is_mean', True)
         triplet_margins = model_hp.pop('triplet_margins', None)
-        optim_hp: dict = self.hp.get('optimizer', {}).copy()
+        optim_hp: Dict = self.hp.get('optimizer', {}).copy()
         ae_optim_hp = optim_hp.pop('auto_encoder', {})
         hpm_optim_hp = optim_hp.pop('hpm', {})
         pn_optim_hp = optim_hp.pop('part_net', {})
@@ -369,13 +369,13 @@ class Model:
 
     def predict_all(
             self,
-            iters: tuple[int],
+            iters: Tuple[int],
             dataset_config: DatasetConfiguration,
-            dataset_selectors: dict[
-                str, dict[str, Union[ClipClasses, ClipConditions, ClipViews]]
+            dataset_selectors: Dict[
+                str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]]
             ],
             dataloader_config: DataloaderConfiguration,
-    ) -> dict[str, torch.Tensor]:
+    ) -> Dict[str, torch.Tensor]:
         # Transform data to features
         gallery_samples, probe_samples = self.transform(
             iters, dataset_config, dataset_selectors, dataloader_config
@@ -387,10 +387,10 @@ class Model:
 
     def transform(
             self,
-            iters: tuple[int],
+            iters: Tuple[int],
             dataset_config: DatasetConfiguration,
-            dataset_selectors: dict[
-                str, dict[str, Union[ClipClasses, ClipConditions, ClipViews]]
+            dataset_selectors: Dict[
+                str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]]
             ],
             dataloader_config: DataloaderConfiguration,
             is_train: bool = False
@@ -438,7 +438,7 @@ class Model:
 
         return gallery_samples, probe_samples
 
-    def _get_eval_sample(self, sample: dict[str, Union[list, torch.Tensor]]):
+    def _get_eval_sample(self, sample: Dict[str, Union[List, torch.Tensor]]):
         label, condition, view, clip = sample.values()
         with torch.no_grad():
             feature = self.rgb_pn(clip.to(self.device))
@@ -451,10 +451,10 @@ class Model:
 
     @staticmethod
     def evaluate(
-            gallery_samples: dict[str, dict[str, Union[list, torch.Tensor]]],
-            probe_samples: dict[str, dict[str, Union[list, torch.Tensor]]],
+            gallery_samples: Dict[str, Dict[str, Union[List, torch.Tensor]]],
+            probe_samples: Dict[str, Dict[str, Union[List, torch.Tensor]]],
             num_ranks: int = 5
-    ) -> dict[str, torch.Tensor]:
+    ) -> Dict[str, torch.Tensor]:
         conditions = list(probe_samples.keys())
         gallery_views_meta = gallery_samples['meta']['views']
         probe_views_meta = probe_samples[conditions[0]]['meta']['views']
@@ -499,12 +499,12 @@ class Model:
 
     def _load_pretrained(
             self,
-            iters: tuple[int],
+            iters: Tuple[int],
             dataset_config: DatasetConfiguration,
-            dataset_selectors: dict[
-                str, dict[str, Union[ClipClasses, ClipConditions, ClipViews]]
+            dataset_selectors: Dict[
+                str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]]
             ]
-    ) -> dict[str, str]:
+    ) -> Dict[str, str]:
         checkpoints = {}
         for (iter_, total_iter, (condition, selector)) in zip(
                 iters, self.total_iters, dataset_selectors.items()
@@ -523,7 +523,7 @@ class Model:
             dataset_config: DatasetConfiguration,
             dataloader_config: DataloaderConfiguration,
             is_train: bool = False
-    ) -> tuple[DataLoader, dict[str, DataLoader]]:
+    ) -> Tuple[DataLoader, Dict[str, DataLoader]]:
         dataset_name = dataset_config.get('name', 'CASIA-B')
         if dataset_name == 'CASIA-B':
             self.is_train = is_train
@@ -582,7 +582,7 @@ class Model:
             dataset_config,
             popped_keys=['root_dir', 'cache_on']
         )
-        config: dict = dataset_config.copy()
+        config: Dict = dataset_config.copy()
         name = config.pop('name', 'CASIA-B')
         if name == 'CASIA-B':
             return CASIAB(**config, is_train=self.is_train)
@@ -596,7 +596,7 @@ class Model:
             dataset: Union[CASIAB],
             dataloader_config: DataloaderConfiguration
     ) -> DataLoader:
-        config: dict = dataloader_config.copy()
+        config: Dict = dataloader_config.copy()
         (self.pr, self.k) = config.pop('batch_size', (8, 16))
         if self.is_train:
             triplet_sampler = TripletSampler(dataset, (self.pr, self.k))
@@ -609,9 +609,9 @@ class Model:
 
     def _batch_splitter(
             self,
-            batch: list[dict[str, Union[np.int64, str, torch.Tensor]]]
-    ) -> tuple[dict[str, Union[list[str], torch.Tensor]],
-               dict[str, Union[list[str], torch.Tensor]]]:
+            batch: List[Dict[str, Union[np.int64, str, torch.Tensor]]]
+    ) -> Tuple[Dict[str, Union[List[str], torch.Tensor]],
+               Dict[str, Union[List[str], torch.Tensor]]]:
         """
         Disentanglement need two random conditions, this function will
         split pr * k * 2 samples to 2 dicts each containing pr * k
@@ -625,8 +625,8 @@ class Model:
         return default_collate(_batch[0]), default_collate(_batch[1])
 
     def _make_signature(self,
-                        config: dict,
-                        popped_keys: Optional[list] = None) -> str:
+                        config: Dict,
+                        popped_keys: Optional[List] = None) -> str:
         _config = config.copy()
         if popped_keys:
             for key in popped_keys:
@@ -634,16 +634,16 @@ class Model:
 
         return self._gen_sig(list(_config.values()))
 
-    def _gen_sig(self, values: Union[tuple, list, set, str, int, float]) -> str:
+    def _gen_sig(self, values: Union[Tuple, List, Set, str, int, float]) -> str:
         strings = []
         for v in values:
             if isinstance(v, str):
                 strings.append(v)
-            elif isinstance(v, (tuple, list)):
+            elif isinstance(v, (Tuple, List)):
                 strings.append(self._gen_sig(v))
-            elif isinstance(v, set):
+            elif isinstance(v, Set):
                 strings.append(self._gen_sig(sorted(list(v))))
-            elif isinstance(v, dict):
+            elif isinstance(v, Dict):
                 strings.append(self._gen_sig(list(v.values())))
             else:
                 strings.append(str(v))
diff --git a/models/part_net.py b/models/part_net.py
index f2236bf..de19c8c 100644
--- a/models/part_net.py
+++ b/models/part_net.py
@@ -1,4 +1,5 @@
 import copy
+from typing import Tuple
 
 import torch
 import torch.nn as nn
@@ -12,9 +13,9 @@ class FrameLevelPartFeatureExtractor(nn.Module):
             self,
             in_channels: int = 3,
             feature_channels: int = 32,
-            kernel_sizes: tuple[tuple, ...] = ((5, 3), (3, 3), (3, 3)),
-            paddings: tuple[tuple, ...] = ((2, 1), (1, 1), (1, 1)),
-            halving: tuple[int, ...] = (0, 2, 3)
+            kernel_sizes: Tuple[Tuple, ...] = ((5, 3), (3, 3), (3, 3)),
+            paddings: Tuple[Tuple, ...] = ((2, 1), (1, 1), (1, 1)),
+            halving: Tuple[int, ...] = (0, 2, 3)
     ):
         super().__init__()
         num_blocks = len(kernel_sizes)
diff --git a/models/rgb_part_net.py b/models/rgb_part_net.py
index c136040..fba3485 100644
--- a/models/rgb_part_net.py
+++ b/models/rgb_part_net.py
@@ -1,3 +1,5 @@
+from typing import Tuple
+
 import torch
 import torch.nn as nn
 
@@ -10,15 +12,15 @@ class RGBPartNet(nn.Module):
     def __init__(
             self,
             ae_in_channels: int = 3,
-            ae_in_size: tuple[int, int] = (64, 48),
+            ae_in_size: Tuple[int, int] = (64, 48),
             ae_feature_channels: int = 64,
-            f_a_c_p_dims: tuple[int, int, int] = (128, 128, 64),
-            hpm_scales: tuple[int, ...] = (1, 2, 4),
+            f_a_c_p_dims: Tuple[int, int, int] = (128, 128, 64),
+            hpm_scales: Tuple[int, ...] = (1, 2, 4),
             hpm_use_avg_pool: bool = True,
             hpm_use_max_pool: bool = True,
             tfa_squeeze_ratio: int = 4,
             tfa_num_parts: int = 16,
-            embedding_dims: tuple[int] = (256, 256),
+            embedding_dims: Tuple[int] = (256, 256),
             image_log_on: bool = False
     ):
         super().__init__()
diff --git a/preprocess.py b/preprocess.py
index 91fa8c2..eef59ba 100644
--- a/preprocess.py
+++ b/preprocess.py
@@ -1,5 +1,6 @@
 import glob
 import os
+from typing import Tuple
 
 import torch
 import torchvision
@@ -23,7 +24,7 @@ class CASIABClip(Dataset):
         video, *_ = torchvision.io.read_video(filename, pts_unit='sec')
         self.frames = video.permute(0, 3, 1, 2) / 255
 
-    def __getitem__(self, index) -> tuple[int, torch.Tensor]:
+    def __getitem__(self, index) -> Tuple[int, torch.Tensor]:
         return index, self.frames[index]
 
     def __len__(self) -> int:
@@ -35,7 +36,7 @@ model = model.to(DEVICE)
 model.eval()
 
 
-def result_handler(frame_: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+def result_handler(frame_: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
     for (box, label, score, mask) in zip(*result.values()):
         x0, y0, x1, y1 = box
         height, width = y1 - y0, x1 - x0
diff --git a/startup b/startup
index 644e59b..c871f54 100644
--- a/startup
+++ b/startup
@@ -16,6 +16,7 @@ pip3 install scikit-learn tqdm tensorboard
 cd /root
 
 git clone https://git.jordangong.com/jordangong/gait-recognition.git
+cd gait-recognition; git checkout python3.8; cd ..
 mkdir -p gait-recognition/data/CASIA-B-MRCNN
 wget https://storage.googleapis.com/gait-dataset/CASIA-B-MRCNN-SEG.tar.zst
 tar -I zstd -xf CASIA-B-MRCNN-SEG.tar.zst -C gait-recognition/data/CASIA-B-MRCNN
diff --git a/utils/configuration.py b/utils/configuration.py
index f6ac182..8ee08f2 100644
--- a/utils/configuration.py
+++ b/utils/configuration.py
@@ -1,4 +1,4 @@
-from typing import TypedDict, Optional, Union
+from typing import TypedDict, Optional, Union, Tuple, Dict
 
 from utils.dataset import ClipClasses, ClipConditions, ClipViews
 
@@ -17,35 +17,35 @@ class DatasetConfiguration(TypedDict):
     num_sampled_frames: int
     truncate_threshold: int
     discard_threshold: int
-    selector: Optional[dict[str, Union[ClipClasses, ClipConditions, ClipViews]]]
+    selector: Optional[Dict[str, Union[ClipClasses, ClipConditions, ClipViews]]]
     num_input_channels: int
-    frame_size: tuple[int, int]
+    frame_size: Tuple[int, int]
     cache_on: bool
 
 
 class DataloaderConfiguration(TypedDict):
-    batch_size: tuple[int, int]
+    batch_size: Tuple[int, int]
     num_workers: int
     pin_memory: bool
 
 
 class ModelHPConfiguration(TypedDict):
     ae_feature_channels: int
-    f_a_c_p_dims: tuple[int, int, int]
-    hpm_scales: tuple[int, ...]
+    f_a_c_p_dims: Tuple[int, int, int]
+    hpm_scales: Tuple[int, ...]
     hpm_use_avg_pool: bool
     hpm_use_max_pool: bool
     tfa_num_parts: int
     tfa_squeeze_ratio: int
-    embedding_dims: tuple[int]
+    embedding_dims: Tuple[int]
     triplet_is_hard: bool
     triplet_is_mean: bool
-    triplet_margins: tuple[float, float]
+    triplet_margins: Tuple[float, float]
 
 
 class SubOptimizerHPConfiguration(TypedDict):
     lr: int
-    betas: tuple[float, float]
+    betas: Tuple[float, float]
     eps: float
     weight_decay: float
     amsgrad: bool
@@ -53,7 +53,7 @@ class SubOptimizerHPConfiguration(TypedDict):
 
 class OptimizerHPConfiguration(TypedDict):
     lr: int
-    betas: tuple[float, float]
+    betas: Tuple[float, float]
     eps: float
     weight_decay: float
     amsgrad: bool
@@ -85,8 +85,8 @@ class ModelConfiguration(TypedDict):
     name: str
     restore_iter: int
     total_iter: int
-    restore_iters: tuple[int, ...]
-    total_iters: tuple[int, ...]
+    restore_iters: Tuple[int, ...]
+    total_iters: Tuple[int, ...]
 
 
 class Configuration(TypedDict):
diff --git a/utils/dataset.py b/utils/dataset.py
index 387c211..41e2f1e 100644
--- a/utils/dataset.py
+++ b/utils/dataset.py
@@ -1,7 +1,7 @@
 import os
 import random
 import re
-from typing import Optional, NewType, Union
+from typing import Optional, NewType, Union, List, Tuple, Set, Dict
 
 import numpy as np
 import torch
@@ -11,9 +11,9 @@ from sklearn.preprocessing import LabelEncoder
 from torch.utils import data
 from tqdm import tqdm
 
-ClipClasses = NewType('ClipClasses', set[str])
-ClipConditions = NewType('ClipConditions', set[str])
-ClipViews = NewType('ClipViews', set[str])
+ClipClasses = NewType('ClipClasses', Set[str])
+ClipConditions = NewType('ClipConditions', Set[str])
+ClipViews = NewType('ClipViews', Set[str])
 
 
 class CASIAB(data.Dataset):
@@ -27,11 +27,11 @@ class CASIAB(data.Dataset):
             num_sampled_frames: int = 30,
             truncate_threshold: int = 40,
             discard_threshold: int = 15,
-            selector: Optional[dict[
+            selector: Optional[Dict[
                 str, Union[ClipClasses, ClipConditions, ClipViews]
             ]] = None,
             num_input_channels: int = 3,
-            frame_size: tuple[int, int] = (64, 32),
+            frame_size: Tuple[int, int] = (64, 32),
             cache_on: bool = False
     ):
         """
@@ -79,15 +79,15 @@ class CASIAB(data.Dataset):
         self.views: np.ndarray[np.str_]
         # Labels, classes, conditions and views in dataset,
         #   set of three attributes above
-        self.metadata: dict[str, list[np.int64, str]]
+        self.metadata: Dict[str, List[np.int64, str]]
 
         # Dictionaries for indexing frames and frame names by clip name
         # and chip path when cache is on
-        self._cached_clips_frame_names: Optional[dict[str, list[str]]] = None
-        self._cached_clips: Optional[dict[str, torch.Tensor]] = None
+        self._cached_clips_frame_names: Optional[Dict[str, List[str]]] = None
+        self._cached_clips: Optional[Dict[str, torch.Tensor]] = None
 
         # Video clip directory names
-        self._clip_names: list[str] = []
+        self._clip_names: List[str] = []
         clip_names = sorted(os.listdir(self._root_dir))
 
         if self._is_train:
@@ -174,7 +174,7 @@ class CASIAB(data.Dataset):
     def __getitem__(
             self,
             index: int
-    ) -> dict[str, Union[np.int64, str, torch.Tensor]]:
+    ) -> Dict[str, Union[np.int64, str, torch.Tensor]]:
         label = self.labels[index]
         condition = self.conditions[index]
         view = self.views[index]
@@ -222,8 +222,8 @@ class CASIAB(data.Dataset):
     def _load_cached_video(
             self,
             clip: torch.Tensor,
-            frame_names: list[str],
-            sampled_frame_names: list[str]
+            frame_names: List[str],
+            sampled_frame_names: List[str]
     ) -> torch.Tensor:
         # Mask the original clip when it is long enough
         if len(frame_names) >= self._num_sampled_frames:
@@ -253,7 +253,7 @@ class CASIAB(data.Dataset):
         return clip
 
     def _sample_frames(self, clip_path: str,
-                       is_caching: bool = False) -> list[str]:
+                       is_caching: bool = False) -> List[str]:
         if self._cache_on:
             if is_caching:
                 # Sort frame in advance for loading convenience
diff --git a/utils/sampler.py b/utils/sampler.py
index 0c9872c..581d7a2 100644
--- a/utils/sampler.py
+++ b/utils/sampler.py
@@ -1,6 +1,5 @@
 import random
-from collections.abc import Iterator
-from typing import Union
+from typing import Union, Tuple, Iterator
 
 import numpy as np
 from torch.utils import data
@@ -12,7 +11,7 @@ class TripletSampler(data.Sampler):
     def __init__(
             self,
             data_source: Union[CASIAB],
-            batch_size: tuple[int, int]
+            batch_size: Tuple[int, int]
     ):
         super().__init__(data_source)
         self.metadata_labels = data_source.metadata['labels']