diff options
-rw-r--r-- | models/auto_encoder.py | 8 | ||||
-rw-r--r-- | models/hpm.py | 4 | ||||
-rw-r--r-- | models/layers.py | 18 | ||||
-rw-r--r-- | models/model.py | 84 | ||||
-rw-r--r-- | models/part_net.py | 13 | ||||
-rw-r--r-- | models/rgb_part_net.py | 16 | ||||
-rw-r--r-- | preprocess.py | 5 | ||||
-rw-r--r-- | startup | 1 | ||||
-rw-r--r-- | utils/configuration.py | 28 | ||||
-rw-r--r-- | utils/dataset.py | 28 | ||||
-rw-r--r-- | utils/sampler.py | 5 |
11 files changed, 113 insertions, 97 deletions
diff --git a/models/auto_encoder.py b/models/auto_encoder.py index 2d715db..c6bc52f 100644 --- a/models/auto_encoder.py +++ b/models/auto_encoder.py @@ -1,3 +1,5 @@ +from typing import Tuple + import torch import torch.nn as nn import torch.nn.functional as F @@ -13,7 +15,7 @@ class Encoder(nn.Module): in_channels: int = 3, frame_size: tuple[int, int] = (64, 48), feature_channels: int = 64, - output_dims: tuple[int, int, int] = (128, 128, 64) + output_dims: Tuple[int, int, int] = (128, 128, 64) ): super().__init__() self.feature_channels = feature_channels @@ -74,7 +76,7 @@ class Decoder(nn.Module): def __init__( self, - input_dims: tuple[int, int, int] = (128, 128, 64), + input_dims: Tuple[int, int, int] = (128, 128, 64), feature_channels: int = 64, feature_size: tuple[int, int] = (4, 3), out_channels: int = 3, @@ -127,7 +129,7 @@ class AutoEncoder(nn.Module): channels: int = 3, frame_size: tuple[int, int] = (64, 48), feature_channels: int = 64, - embedding_dims: tuple[int, int, int] = (128, 128, 64) + embedding_dims: Tuple[int, int, int] = (128, 128, 64) ): super().__init__() self.encoder = Encoder(channels, frame_size, diff --git a/models/hpm.py b/models/hpm.py index 9879cfb..b49be3a 100644 --- a/models/hpm.py +++ b/models/hpm.py @@ -1,3 +1,5 @@ +from typing import Tuple + import torch import torch.nn as nn @@ -10,7 +12,7 @@ class HorizontalPyramidMatching(nn.Module): in_channels: int, out_channels: int = 128, use_1x1conv: bool = False, - scales: tuple[int, ...] = (1, 2, 4), + scales: Tuple[int, ...] = (1, 2, 4), use_avg_pool: bool = True, use_max_pool: bool = False, **kwargs diff --git a/models/layers.py b/models/layers.py index ef53a95..ae61583 100644 --- a/models/layers.py +++ b/models/layers.py @@ -1,4 +1,4 @@ -from typing import Union +from typing import Union, Tuple import torch import torch.nn as nn @@ -10,7 +10,7 @@ class BasicConv2d(nn.Module): self, in_channels: int, out_channels: int, - kernel_size: Union[int, tuple[int, int]], + kernel_size: Union[int, Tuple[int, int]], **kwargs ): super().__init__() @@ -29,7 +29,7 @@ class VGGConv2d(BasicConv2d): self, in_channels: int, out_channels: int, - kernel_size: Union[int, tuple[int, int]] = 3, + kernel_size: Union[int, Tuple[int, int]] = 3, padding: int = 1, **kwargs ): @@ -47,7 +47,7 @@ class BasicConvTranspose2d(nn.Module): self, in_channels: int, out_channels: int, - kernel_size: Union[int, tuple[int, int]], + kernel_size: Union[int, Tuple[int, int]], **kwargs ): super().__init__() @@ -66,7 +66,7 @@ class DCGANConvTranspose2d(BasicConvTranspose2d): self, in_channels: int, out_channels: int, - kernel_size: Union[int, tuple[int, int]] = 4, + kernel_size: Union[int, Tuple[int, int]] = 4, stride: int = 2, padding: int = 1, is_last_layer: bool = False, @@ -104,7 +104,7 @@ class FocalConv2d(BasicConv2d): self, in_channels: int, out_channels: int, - kernel_size: Union[int, tuple[int, int]], + kernel_size: Union[int, Tuple[int, int]], halving: int, **kwargs ): @@ -124,8 +124,8 @@ class FocalConv2dBlock(nn.Module): self, in_channels: int, out_channels: int, - kernel_sizes: tuple[int, int], - paddings: tuple[int, int], + kernel_sizes: Tuple[int, int], + paddings: Tuple[int, int], halving: int, use_pool: bool = True, **kwargs @@ -151,7 +151,7 @@ class BasicConv1d(nn.Module): self, in_channels: int, out_channels: int, - kernel_size: Union[int, tuple[int]], + kernel_size: Union[int, Tuple[int]], **kwargs ): super().__init__() diff --git a/models/model.py b/models/model.py index 3f5d283..3d619fe 100644 --- a/models/model.py +++ b/models/model.py @@ -1,6 +1,6 @@ import os from datetime import datetime -from typing import Union, Optional +from typing import Union, Optional, Tuple, List, Dict, Set import numpy as np import torch @@ -59,8 +59,8 @@ class Model: self.pr: Optional[int] = None self.k: Optional[int] = None - self._gallery_dataset_meta: Optional[dict[str, list]] = None - self._probe_datasets_meta: Optional[dict[str, dict[str, list]]] = None + self._gallery_dataset_meta: Optional[Dict[str, List]] = None + self._probe_datasets_meta: Optional[Dict[str, Dict[str, List]]] = None self._model_name: str = self.meta.get('name', 'RGB-GaitPart') self._hp_sig: str = self._make_signature(self.hp) @@ -108,8 +108,8 @@ class Model: def fit_all( self, dataset_config: DatasetConfiguration, - dataset_selectors: dict[ - str, dict[str, Union[ClipClasses, ClipConditions, ClipViews]] + dataset_selectors: Dict[ + str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]] ], dataloader_config: DataloaderConfiguration, ): @@ -141,7 +141,7 @@ class Model: dataloader = self._parse_dataloader_config(dataset, dataloader_config) # Prepare for model, optimizer and scheduler model_hp = self.hp.get('model', {}) - optim_hp: dict = self.hp.get('optimizer', {}).copy() + optim_hp: Dict = self.hp.get('optimizer', {}).copy() start_iter = optim_hp.pop('start_iter', 0) ae_optim_hp = optim_hp.pop('auto_encoder', {}) pn_optim_hp = optim_hp.pop('part_net', {}) @@ -151,12 +151,13 @@ class Model: self.rgb_pn = RGBPartNet(self.in_channels, self.in_size, **model_hp, image_log_on=self.image_log_on) # Try to accelerate computation using CUDA or others + self.rgb_pn = nn.DataParallel(self.rgb_pn) self.rgb_pn = self.rgb_pn.to(self.device) self.optimizer = optim.Adam([ - {'params': self.rgb_pn.ae.parameters(), **ae_optim_hp}, - {'params': self.rgb_pn.pn.parameters(), **pn_optim_hp}, - {'params': self.rgb_pn.hpm.parameters(), **hpm_optim_hp}, - {'params': self.rgb_pn.fc_mat, **fc_optim_hp} + {'params': self.rgb_pn.module.ae.parameters(), **ae_optim_hp}, + {'params': self.rgb_pn.module.pn.parameters(), **pn_optim_hp}, + {'params': self.rgb_pn.module.hpm.parameters(), **hpm_optim_hp}, + {'params': self.rgb_pn.module.fc_mat, **fc_optim_hp} ], **optim_hp) sched_gamma = sched_hp.get('gamma', 0.9) sched_step_size = sched_hp.get('step_size', 500) @@ -195,8 +196,14 @@ class Model: x_c2 = batch_c2['clip'].to(self.device) y = batch_c1['label'].to(self.device) # Duplicate labels for each part - y = y.unsqueeze(1).repeat(1, self.rgb_pn.num_total_parts) + y = y.unsqueeze(1).repeat(1, self.rgb_pn.module.num_total_parts) losses, images = self.rgb_pn(x_c1, x_c2, y) + losses = torch.stack(( + # xrecon cano_cons pose_sim + losses[0].sum(), losses[1].mean(), losses[2].mean(), + # hpm_ba_trip pn_ba_trip + losses[3].mean(), losses[4].mean() + )) loss = losses.sum() loss.backward() self.optimizer.step() @@ -263,13 +270,13 @@ class Model: def predict_all( self, - iters: tuple[int], + iters: Tuple[int], dataset_config: DatasetConfiguration, - dataset_selectors: dict[ - str, dict[str, Union[ClipClasses, ClipConditions, ClipViews]] + dataset_selectors: Dict[ + str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]] ], dataloader_config: DataloaderConfiguration, - ) -> dict[str, torch.Tensor]: + ) -> Dict[str, torch.Tensor]: # Transform data to features gallery_samples, probe_samples = self.transform( iters, dataset_config, dataset_selectors, dataloader_config @@ -281,10 +288,10 @@ class Model: def transform( self, - iters: tuple[int], + iters: Tuple[int], dataset_config: DatasetConfiguration, - dataset_selectors: dict[ - str, dict[str, Union[ClipClasses, ClipConditions, ClipViews]] + dataset_selectors: Dict[ + str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]] ], dataloader_config: DataloaderConfiguration ): @@ -302,6 +309,7 @@ class Model: model_hp = self.hp.get('model', {}) self.rgb_pn = RGBPartNet(self.in_channels, self.in_size, **model_hp) # Try to accelerate computation using CUDA or others + self.rgb_pn = nn.DataParallel(self.rgb_pn) self.rgb_pn = self.rgb_pn.to(self.device) self.rgb_pn.eval() @@ -326,7 +334,7 @@ class Model: return gallery_samples, probe_samples - def _get_eval_sample(self, sample: dict[str, Union[list, torch.Tensor]]): + def _get_eval_sample(self, sample: Dict[str, Union[List, torch.Tensor]]): label = sample.pop('label').item() clip = sample.pop('clip').to(self.device) feature = self.rgb_pn(clip).detach() @@ -338,10 +346,10 @@ class Model: def evaluate( self, - gallery_samples: dict[str, Union[list[str], torch.Tensor]], - probe_samples: dict[str, dict[str, Union[list[str], torch.Tensor]]], + gallery_samples: Dict[str, Union[List[str], torch.Tensor]], + probe_samples: Dict[str, Dict[str, Union[List[str], torch.Tensor]]], num_ranks: int = 5 - ) -> dict[str, torch.Tensor]: + ) -> Dict[str, torch.Tensor]: probe_conditions = self._probe_datasets_meta.keys() gallery_views_meta = self._gallery_dataset_meta['views'] probe_views_meta = list(self._probe_datasets_meta.values())[0]['views'] @@ -386,12 +394,12 @@ class Model: def _load_pretrained( self, - iters: tuple[int], + iters: Tuple[int], dataset_config: DatasetConfiguration, - dataset_selectors: dict[ - str, dict[str, Union[ClipClasses, ClipConditions, ClipViews]] + dataset_selectors: Dict[ + str, Dict[str, Union[ClipClasses, ClipConditions, ClipViews]] ] - ) -> dict[str, str]: + ) -> Dict[str, str]: checkpoints = {} for (iter_, (condition, selector)) in zip( iters, dataset_selectors.items() @@ -408,7 +416,7 @@ class Model: self, dataset_config: DatasetConfiguration, dataloader_config: DataloaderConfiguration, - ) -> tuple[DataLoader, dict[str, DataLoader]]: + ) -> Tuple[DataLoader, Dict[str, DataLoader]]: dataset_name = dataset_config.get('name', 'CASIA-B') if dataset_name == 'CASIA-B': gallery_dataset = self._parse_dataset_config( @@ -465,7 +473,7 @@ class Model: dataset_config, popped_keys=['root_dir', 'cache_on'] ) - config: dict = dataset_config.copy() + config: Dict = dataset_config.copy() name = config.pop('name', 'CASIA-B') if name == 'CASIA-B': return CASIAB(**config, is_train=self.is_train) @@ -479,7 +487,7 @@ class Model: dataset: Union[CASIAB], dataloader_config: DataloaderConfiguration ) -> DataLoader: - config: dict = dataloader_config.copy() + config: Dict = dataloader_config.copy() (self.pr, self.k) = config.pop('batch_size', (8, 16)) if self.is_train: triplet_sampler = TripletSampler(dataset, (self.pr, self.k)) @@ -492,9 +500,9 @@ class Model: def _batch_splitter( self, - batch: list[dict[str, Union[np.int64, str, torch.Tensor]]] - ) -> tuple[dict[str, Union[list[str], torch.Tensor]], - dict[str, Union[list[str], torch.Tensor]]]: + batch: List[Dict[str, Union[np.int64, str, torch.Tensor]]] + ) -> Tuple[Dict[str, Union[List[str], torch.Tensor]], + Dict[str, Union[List[str], torch.Tensor]]]: """ Disentanglement need two random conditions, this function will split pr * k * 2 samples to 2 dicts each containing pr * k @@ -508,8 +516,8 @@ class Model: return default_collate(_batch[0]), default_collate(_batch[1]) def _make_signature(self, - config: dict, - popped_keys: Optional[list] = None) -> str: + config: Dict, + popped_keys: Optional[List] = None) -> str: _config = config.copy() if popped_keys: for key in popped_keys: @@ -517,16 +525,16 @@ class Model: return self._gen_sig(list(_config.values())) - def _gen_sig(self, values: Union[tuple, list, set, str, int, float]) -> str: + def _gen_sig(self, values: Union[Tuple, List, Set, str, int, float]) -> str: strings = [] for v in values: if isinstance(v, str): strings.append(v) - elif isinstance(v, (tuple, list)): + elif isinstance(v, (Tuple, List)): strings.append(self._gen_sig(v)) - elif isinstance(v, set): + elif isinstance(v, Set): strings.append(self._gen_sig(sorted(list(v)))) - elif isinstance(v, dict): + elif isinstance(v, Dict): strings.append(self._gen_sig(list(v.values()))) else: strings.append(str(v)) diff --git a/models/part_net.py b/models/part_net.py index 62a2bac..f34f993 100644 --- a/models/part_net.py +++ b/models/part_net.py @@ -1,4 +1,5 @@ import copy +from typing import Tuple import torch import torch.nn as nn @@ -12,9 +13,9 @@ class FrameLevelPartFeatureExtractor(nn.Module): self, in_channels: int = 3, feature_channels: int = 32, - kernel_sizes: tuple[tuple, ...] = ((5, 3), (3, 3), (3, 3)), - paddings: tuple[tuple, ...] = ((2, 1), (1, 1), (1, 1)), - halving: tuple[int, ...] = (0, 2, 3) + kernel_sizes: Tuple[Tuple, ...] = ((5, 3), (3, 3), (3, 3)), + paddings: Tuple[Tuple, ...] = ((2, 1), (1, 1), (1, 1)), + halving: Tuple[int, ...] = (0, 2, 3) ): super().__init__() num_blocks = len(kernel_sizes) @@ -112,9 +113,9 @@ class PartNet(nn.Module): self, in_channels: int = 3, feature_channels: int = 32, - kernel_sizes: tuple[tuple, ...] = ((5, 3), (3, 3), (3, 3)), - paddings: tuple[tuple, ...] = ((2, 1), (1, 1), (1, 1)), - halving: tuple[int, ...] = (0, 2, 3), + kernel_sizes: Tuple[Tuple, ...] = ((5, 3), (3, 3), (3, 3)), + paddings: Tuple[Tuple, ...] = ((2, 1), (1, 1), (1, 1)), + halving: Tuple[int, ...] = (0, 2, 3), squeeze_ratio: int = 4, num_part: int = 16 ): diff --git a/models/rgb_part_net.py b/models/rgb_part_net.py index 67acac3..80b3e17 100644 --- a/models/rgb_part_net.py +++ b/models/rgb_part_net.py @@ -1,3 +1,5 @@ +from typing import Tuple + import torch import torch.nn as nn @@ -13,19 +15,19 @@ class RGBPartNet(nn.Module): ae_in_channels: int = 3, ae_in_size: tuple[int, int] = (64, 48), ae_feature_channels: int = 64, - f_a_c_p_dims: tuple[int, int, int] = (128, 128, 64), + f_a_c_p_dims: Tuple[int, int, int] = (128, 128, 64), hpm_use_1x1conv: bool = False, - hpm_scales: tuple[int, ...] = (1, 2, 4), + hpm_scales: Tuple[int, ...] = (1, 2, 4), hpm_use_avg_pool: bool = True, hpm_use_max_pool: bool = True, fpfe_feature_channels: int = 32, - fpfe_kernel_sizes: tuple[tuple, ...] = ((5, 3), (3, 3), (3, 3)), - fpfe_paddings: tuple[tuple, ...] = ((2, 1), (1, 1), (1, 1)), - fpfe_halving: tuple[int, ...] = (0, 2, 3), + fpfe_kernel_sizes: Tuple[Tuple, ...] = ((5, 3), (3, 3), (3, 3)), + fpfe_paddings: Tuple[Tuple, ...] = ((2, 1), (1, 1), (1, 1)), + fpfe_halving: Tuple[int, ...] = (0, 2, 3), tfa_squeeze_ratio: int = 4, tfa_num_parts: int = 16, embedding_dims: int = 256, - triplet_margins: tuple[float, float] = (0.2, 0.2), + triplet_margins: Tuple[float, float] = (0.2, 0.2), image_log_on: bool = False ): super().__init__() @@ -84,7 +86,7 @@ class RGBPartNet(nn.Module): pn_ba_trip = self.pn_ba_trip( x[self.hpm_num_parts:], y[self.hpm_num_parts:] ) - losses = torch.stack((*losses, hpm_ba_trip, pn_ba_trip)) + losses = (*losses, hpm_ba_trip, pn_ba_trip) return losses, images else: return x.unsqueeze(1).view(-1) diff --git a/preprocess.py b/preprocess.py index 91fa8c2..eef59ba 100644 --- a/preprocess.py +++ b/preprocess.py @@ -1,5 +1,6 @@ import glob import os +from typing import Tuple import torch import torchvision @@ -23,7 +24,7 @@ class CASIABClip(Dataset): video, *_ = torchvision.io.read_video(filename, pts_unit='sec') self.frames = video.permute(0, 3, 1, 2) / 255 - def __getitem__(self, index) -> tuple[int, torch.Tensor]: + def __getitem__(self, index) -> Tuple[int, torch.Tensor]: return index, self.frames[index] def __len__(self) -> int: @@ -35,7 +36,7 @@ model = model.to(DEVICE) model.eval() -def result_handler(frame_: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: +def result_handler(frame_: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: for (box, label, score, mask) in zip(*result.values()): x0, y0, x1, y1 = box height, width = y1 - y0, x1 - x0 @@ -16,6 +16,7 @@ pip3 install scikit-learn tqdm tensorboard cd /root git clone https://git.jordangong.com/jordangong/gait-recognition.git +cd gait-recognition; git checkout python3.8; cd .. mkdir -p gait-recognition/data/CASIA-B-MRCNN wget https://storage.googleapis.com/gait-dataset/CASIA-B-MRCNN-SEG.tar.zst tar -I zstd -xf CASIA-B-MRCNN-SEG.tar.zst -C gait-recognition/data/CASIA-B-MRCNN diff --git a/utils/configuration.py b/utils/configuration.py index 435d815..b9e6d92 100644 --- a/utils/configuration.py +++ b/utils/configuration.py @@ -1,4 +1,4 @@ -from typing import TypedDict, Optional, Union +from typing import TypedDict, Optional, Union, Tuple, Dict from utils.dataset import ClipClasses, ClipConditions, ClipViews @@ -17,38 +17,38 @@ class DatasetConfiguration(TypedDict): num_sampled_frames: int truncate_threshold: int discard_threshold: int - selector: Optional[dict[str, Union[ClipClasses, ClipConditions, ClipViews]]] + selector: Optional[Dict[str, Union[ClipClasses, ClipConditions, ClipViews]]] num_input_channels: int - frame_size: tuple[int, int] + frame_size: Tuple[int, int] cache_on: bool class DataloaderConfiguration(TypedDict): - batch_size: tuple[int, int] + batch_size: Tuple[int, int] num_workers: int pin_memory: bool class ModelHPConfiguration(TypedDict): ae_feature_channels: int - f_a_c_p_dims: tuple[int, int, int] - hpm_scales: tuple[int, ...] + f_a_c_p_dims: Tuple[int, int, int] + hpm_scales: Tuple[int, ...] hpm_use_1x1conv: bool hpm_use_avg_pool: bool hpm_use_max_pool: bool fpfe_feature_channels: int - fpfe_kernel_sizes: tuple[tuple, ...] - fpfe_paddings: tuple[tuple, ...] - fpfe_halving: tuple[int, ...] + fpfe_kernel_sizes: Tuple[Tuple, ...] + fpfe_paddings: Tuple[Tuple, ...] + fpfe_halving: Tuple[int, ...] tfa_squeeze_ratio: int tfa_num_parts: int embedding_dims: int - triplet_margins: tuple[float, float] + triplet_margins: Tuple[float, float] class SubOptimizerHPConfiguration(TypedDict): lr: int - betas: tuple[float, float] + betas: Tuple[float, float] eps: float weight_decay: float amsgrad: bool @@ -57,7 +57,7 @@ class SubOptimizerHPConfiguration(TypedDict): class OptimizerHPConfiguration(TypedDict): start_iter: int lr: int - betas: tuple[float, float] + betas: Tuple[float, float] eps: float weight_decay: float amsgrad: bool @@ -82,8 +82,8 @@ class ModelConfiguration(TypedDict): name: str restore_iter: int total_iter: int - restore_iters: tuple[int, ...] - total_iters: tuple[int, ...] + restore_iters: Tuple[int, ...] + total_iters: Tuple[int, ...] class Configuration(TypedDict): diff --git a/utils/dataset.py b/utils/dataset.py index c487988..72cf050 100644 --- a/utils/dataset.py +++ b/utils/dataset.py @@ -1,7 +1,7 @@ import os import random import re -from typing import Optional, NewType, Union +from typing import Optional, NewType, Union, List, Tuple, Set, Dict import numpy as np import torch @@ -11,9 +11,9 @@ from sklearn.preprocessing import LabelEncoder from torch.utils import data from tqdm import tqdm -ClipClasses = NewType('ClipClasses', set[str]) -ClipConditions = NewType('ClipConditions', set[str]) -ClipViews = NewType('ClipViews', set[str]) +ClipClasses = NewType('ClipClasses', Set[str]) +ClipConditions = NewType('ClipConditions', Set[str]) +ClipViews = NewType('ClipViews', Set[str]) class CASIAB(data.Dataset): @@ -27,11 +27,11 @@ class CASIAB(data.Dataset): num_sampled_frames: int = 30, truncate_threshold: int = 40, discard_threshold: int = 15, - selector: Optional[dict[ + selector: Optional[Dict[ str, Union[ClipClasses, ClipConditions, ClipViews] ]] = None, num_input_channels: int = 3, - frame_size: tuple[int, int] = (64, 32), + frame_size: Tuple[int, int] = (64, 32), cache_on: bool = False ): """ @@ -79,15 +79,15 @@ class CASIAB(data.Dataset): self.views: np.ndarray[np.str_] # Labels, classes, conditions and views in dataset, # set of three attributes above - self.metadata: dict[str, list[np.int64, str]] + self.metadata: Dict[str, List[np.int64, str]] # Dictionaries for indexing frames and frame names by clip name # and chip path when cache is on - self._cached_clips_frame_names: Optional[dict[str, list[str]]] = None - self._cached_clips: Optional[dict[str, torch.Tensor]] = None + self._cached_clips_frame_names: Optional[Dict[str, List[str]]] = None + self._cached_clips: Optional[Dict[str, torch.Tensor]] = None # Video clip directory names - self._clip_names: list[str] = [] + self._clip_names: List[str] = [] clip_names = sorted(os.listdir(self._root_dir)) if self._is_train: @@ -174,7 +174,7 @@ class CASIAB(data.Dataset): def __getitem__( self, index: int - ) -> dict[str, Union[np.int64, str, torch.Tensor]]: + ) -> Dict[str, Union[np.int64, str, torch.Tensor]]: label = self.labels[index] condition = self.conditions[index] view = self.views[index] @@ -222,8 +222,8 @@ class CASIAB(data.Dataset): def _load_cached_video( self, clip: torch.Tensor, - frame_names: list[str], - sampled_frame_names: list[str] + frame_names: List[str], + sampled_frame_names: List[str] ) -> torch.Tensor: # Mask the original clip when it is long enough if len(frame_names) >= self._num_sampled_frames: @@ -253,7 +253,7 @@ class CASIAB(data.Dataset): return clip def _sample_frames(self, clip_path: str, - is_caching: bool = False) -> list[str]: + is_caching: bool = False) -> List[str]: if self._cache_on: if is_caching: # Sort frame in advance for loading convenience diff --git a/utils/sampler.py b/utils/sampler.py index cdf1984..0977f94 100644 --- a/utils/sampler.py +++ b/utils/sampler.py @@ -1,6 +1,5 @@ import random -from collections.abc import Iterator -from typing import Union +from typing import Union, Tuple, Iterator import numpy as np from torch.utils import data @@ -12,7 +11,7 @@ class TripletSampler(data.Sampler): def __init__( self, data_source: Union[CASIAB], - batch_size: tuple[int, int] + batch_size: Tuple[int, int] ): super().__init__(data_source) self.metadata_labels = data_source.metadata['labels'] |