diff options
author | Jordan Gong <jordan.gong@protonmail.com> | 2021-04-03 21:30:35 +0800 |
---|---|---|
committer | Jordan Gong <jordan.gong@protonmail.com> | 2021-04-03 23:06:07 +0800 |
commit | f6f133fa7b926ce0c7d28bbf0ba4de41b3708d4a (patch) | |
tree | 4bf9b80c1c7a96f081a4e3b3b751145054fccc39 /utils | |
parent | d12dd6b04a4e7c2b1ee43ab6f36f25d0c35ca364 (diff) | |
parent | b9f35fbe7d78b3c478086ea26c2a76f72ce35687 (diff) |
Merge branch 'master' into disentangling_only
# Conflicts:
# config.py
# models/hpm.py
# models/layers.py
# models/model.py
# models/part_net.py
# models/rgb_part_net.py
# test/part_net.py
# utils/configuration.py
# utils/triplet_loss.py
Diffstat (limited to 'utils')
-rw-r--r-- | utils/configuration.py | 6 | ||||
-rw-r--r-- | utils/dataset.py | 6 | ||||
-rw-r--r-- | utils/sampler.py | 35 |
3 files changed, 37 insertions, 10 deletions
diff --git a/utils/configuration.py b/utils/configuration.py index 1b7c8d3..fff3876 100644 --- a/utils/configuration.py +++ b/utils/configuration.py @@ -8,6 +8,7 @@ class SystemConfiguration(TypedDict): CUDA_VISIBLE_DEVICES: str save_dir: str image_log_on: bool + val_size: int class DatasetConfiguration(TypedDict): @@ -35,7 +36,6 @@ class ModelHPConfiguration(TypedDict): class OptimizerHPConfiguration(TypedDict): - start_iter: int lr: int betas: tuple[float, float] eps: float @@ -44,8 +44,8 @@ class OptimizerHPConfiguration(TypedDict): class SchedulerHPConfiguration(TypedDict): - step_size: int - gamma: float + start_step: int + final_gamma: float class HyperparameterConfiguration(TypedDict): diff --git a/utils/dataset.py b/utils/dataset.py index c487988..387c211 100644 --- a/utils/dataset.py +++ b/utils/dataset.py @@ -111,9 +111,9 @@ class CASIAB(data.Dataset): # in Bag #2 condition from 90 degree angle classes, conditions, views = [], [], [] if selector: - selected_classes = selector.pop('classes', None) - selected_conditions = selector.pop('conditions', None) - selected_views = selector.pop('views', None) + selected_classes = selector.get('classes', None) + selected_conditions = selector.get('conditions', None) + selected_views = selector.get('views', None) class_regex = r'\d{3}' condition_regex = r'(nm|bg|cl)-0[0-6]' diff --git a/utils/sampler.py b/utils/sampler.py index cdf1984..0c9872c 100644 --- a/utils/sampler.py +++ b/utils/sampler.py @@ -16,7 +16,18 @@ class TripletSampler(data.Sampler): ): super().__init__(data_source) self.metadata_labels = data_source.metadata['labels'] + metadata_conditions = data_source.metadata['conditions'] + self.subsets = {} + for condition in metadata_conditions: + pre, _ = condition.split('-') + if self.subsets.get(pre, None) is None: + self.subsets[pre] = [] + self.subsets[pre].append(condition) + self.num_subsets = len(self.subsets) + self.num_seq = {pre: len(seq) for (pre, seq) in self.subsets.items()} + self.min_num_seq = min(self.num_seq.values()) self.labels = data_source.labels + self.conditions = data_source.conditions self.length = len(self.labels) self.indexes = np.arange(0, self.length) (self.pr, self.k) = batch_size @@ -27,15 +38,31 @@ class TripletSampler(data.Sampler): # Sample pr subjects by sampling labels appeared in dataset sampled_subjects = random.sample(self.metadata_labels, k=self.pr) for label in sampled_subjects: - clips_from_subject = self.indexes[self.labels == label].tolist() + mask = self.labels == label + # Fix unbalanced datasets + if self.num_subsets > 1: + condition_mask = np.zeros(self.conditions.shape, dtype=bool) + for num, conditions_ in zip( + self.num_seq.values(), self.subsets.values() + ): + if num > self.min_num_seq: + conditions = random.sample( + conditions_, self.min_num_seq + ) + else: + conditions = conditions_ + for condition in conditions: + condition_mask |= self.conditions == condition + mask &= condition_mask + clips = self.indexes[mask].tolist() # Sample k clips from the subject without replacement if # have enough clips, k more clips will sampled for # disentanglement k = self.k * 2 - if len(clips_from_subject) >= k: - _sampled_indexes = random.sample(clips_from_subject, k=k) + if len(clips) >= k: + _sampled_indexes = random.sample(clips, k=k) else: - _sampled_indexes = random.choices(clips_from_subject, k=k) + _sampled_indexes = random.choices(clips, k=k) sampled_indexes += _sampled_indexes yield sampled_indexes |