import argparse import os from dataclasses import dataclass from pathlib import Path from typing import Iterable, Callable import sys import torch import yaml from torch.utils.data import Dataset from torchvision.datasets import CIFAR10, CIFAR100, ImageNet from torchvision.transforms import transforms path = str(Path(Path(__file__).parent.absolute()).parent.absolute()) sys.path.insert(0, path) from libs.criteria import InfoNCELoss from libs.datautils import color_distortion, Clip, RandomGaussianBlur, TwinTransform from libs.optimizers import LARS from libs.utils import Trainer, BaseConfig from libs.logging import BaseBatchLogRecord, Loggers from simclr.models import cifar_simclr_resnet50, cifar_simclr_vit_tiny, imagenet_simclr_resnet50 def parse_args_and_config(): parser = argparse.ArgumentParser( description='Contrastive baseline SimCLR', formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument('--codename', default='cifar10-simclr-128-lars-warmup', type=str, help="Model descriptor") parser.add_argument('--log-dir', default='logs', type=str, help="Path to log directory") parser.add_argument('--checkpoint-dir', default='checkpoints', type=str, help="Path to checkpoints directory") parser.add_argument('--seed', default=None, type=int, help='Random seed for reproducibility') parser.add_argument('--num-iters', default=23438, type=int, help='Number of iters (default is 50 epochs equiv., ' 'around dataset_size * epochs / batch_size)') parser.add_argument('--config', type=argparse.FileType(mode='r'), help='Path to config file (optional)') # TODO: Add model hyperparams dataclass parser.add_argument('--encoder', default='resnet', type=str, choices=('resnet', 'vit'), help='Backbone of encoder') parser.add_argument('--hid-dim', default=2048, type=int, help='Number of dimension of embedding') parser.add_argument('--out-dim', default=128, type=int, help='Number of dimension after projection') parser.add_argument('--temp', default=0.5, type=float, help='Temperature in InfoNCE loss') dataset_group = parser.add_argument_group('Dataset parameters') dataset_group.add_argument('--dataset-dir', default='dataset', type=str, help="Path to dataset directory") dataset_group.add_argument('--dataset', default='cifar10', type=str, choices=('cifar10, cifar100', 'imagenet'), help="Name of dataset") dataset_group.add_argument('--crop-size', default=32, type=int, help='Random crop size after resize') dataset_group.add_argument('--crop-scale-range', nargs=2, default=(0.8, 1), type=float, help='Random resize scale range', metavar=('start', 'stop')) dataset_group.add_argument('--hflip-prob', default=0.5, type=float, help='Random horizontal flip probability') dataset_group.add_argument('--distort-strength', default=0.5, type=float, help='Distortion strength') dataset_group.add_argument('--gauss-ker-scale', default=10, type=float, help='Gaussian kernel scale factor ' '(s = img_size / ker_size)') dataset_group.add_argument('--gauss-sigma-range', nargs=2, default=(0.1, 2), type=float, help='Random gaussian blur sigma range', metavar=('start', 'stop')) dataset_group.add_argument('--gauss-prob', default=0.5, type=float, help='Random gaussian blur probability') dataloader_group = parser.add_argument_group('Dataloader parameters') dataloader_group.add_argument('--batch-size', default=128, type=int, help='Batch size') dataloader_group.add_argument('--num-workers', default=2, type=int, help='Number of dataloader processes') optim_group = parser.add_argument_group('Optimizer parameters') optim_group.add_argument('--optim', default='lars', type=str, choices=('adam', 'sgd', 'lars'), help="Name of optimizer") optim_group.add_argument('--lr', default=1., type=float, help='Learning rate') optim_group.add_argument('--betas', nargs=2, default=(0.9, 0.999), type=float, help='Adam betas', metavar=('beta1', 'beta2')) optim_group.add_argument('--momentum', default=0.9, type=float, help='SDG momentum') optim_group.add_argument('--weight-decay', default=1e-6, type=float, help='Weight decay (l2 regularization)') sched_group = parser.add_argument_group('Scheduler parameters') sched_group.add_argument('--sched', default='warmup-anneal', type=str, choices=('const', None, 'linear', 'warmup-anneal'), help="Name of scheduler") sched_group.add_argument('--warmup-iters', default=2344, type=int, help='Epochs for warmup (`warmup-anneal` scheduler only)') args = parser.parse_args() if args.config: config = yaml.safe_load(args.config) args.__dict__ |= { k: tuple(v) if isinstance(v, list) else v for k, v in config.items() } args.checkpoint_dir = os.path.join(args.checkpoint_dir, args.codename) args.log_dir = os.path.join(args.log_dir, args.codename) return args @dataclass class SimCLRConfig(BaseConfig): @dataclass class DatasetConfig(BaseConfig.DatasetConfig): dataset_dir: str crop_size: int crop_scale_range: tuple[float, float] hflip_prob: float distort_strength: float gauss_ker_scale: float gauss_sigma_range: tuple[float, float] gauss_prob: float @dataclass class OptimConfig(BaseConfig.OptimConfig): momentum: float betas: tuple[float, float] weight_decay: float class SimCLRTrainer(Trainer): def __init__(self, encoder, hid_dim, out_dim, **kwargs): self.encoder = encoder self.hid_dim = hid_dim self.out_dim = out_dim super(SimCLRTrainer, self).__init__(**kwargs) @dataclass class BatchLogRecord(BaseBatchLogRecord): lr: float | None train_loss: float | None train_accuracy: float | None eval_loss: float | None eval_accuracy: float | None @staticmethod def _prepare_dataset(dataset_config: SimCLRConfig.DatasetConfig) -> tuple[Dataset, Dataset]: basic_augmentation = transforms.Compose([ transforms.RandomResizedCrop( dataset_config.crop_size, scale=dataset_config.crop_scale_range, interpolation=transforms.InterpolationMode.BICUBIC ), transforms.RandomHorizontalFlip(dataset_config.hflip_prob), color_distortion(dataset_config.distort_strength), ]) if dataset_config.dataset in {'cifar10', 'cifar100', 'cifar'}: transform = transforms.Compose([ basic_augmentation, transforms.ToTensor(), Clip(), ]) if dataset_config.dataset in {'cifar10', 'cifar'}: train_set = CIFAR10(dataset_config.dataset_dir, train=True, transform=TwinTransform(transform), download=True) test_set = CIFAR10(dataset_config.dataset_dir, train=False, transform=TwinTransform(transform)) else: # CIFAR-100 train_set = CIFAR100(dataset_config.dataset_dir, train=True, transform=TwinTransform(transform), download=True) test_set = CIFAR100(dataset_config.dataset_dir, train=False, transform=TwinTransform(transform)) elif dataset_config.dataset in {'imagenet1k', 'imagenet'}: random_gaussian_blur = RandomGaussianBlur( kernel_size=dataset_config.crop_size // dataset_config.gauss_ker_scale, sigma_range=dataset_config.gauss_sigma_range, p=dataset_config.gauss_prob ), transform = transforms.Compose([ basic_augmentation, random_gaussian_blur, transforms.ToTensor(), Clip() ]) train_set = ImageNet(dataset_config.dataset_dir, 'train', transform=TwinTransform(transform)) test_set = ImageNet(dataset_config.dataset_dir, 'val', transform=TwinTransform(transform)) else: raise NotImplementedError(f"Unimplemented dataset: '{dataset_config.dataset}") return train_set, test_set def _init_models(self, dataset: str) -> Iterable[tuple[str, torch.nn.Module]]: if dataset in {'cifar10', 'cifar100', 'cifar'}: if self.encoder == 'resnet': model = cifar_simclr_resnet50(self.hid_dim, self.out_dim) elif self.encoder == 'vit': model = cifar_simclr_vit_tiny(self.hid_dim, self.out_dim) else: raise NotImplementedError(f"Unimplemented encoder: '{self.encoder}") elif dataset in {'imagenet1k', 'imagenet'}: if self.encoder == 'resnet': model = imagenet_simclr_resnet50(self.hid_dim, self.out_dim) else: raise NotImplementedError(f"Unimplemented encoder: '{self.encoder}") else: raise NotImplementedError(f"Unimplemented dataset: '{dataset}") yield 'model', model @staticmethod def _configure_optimizers( models: Iterable[tuple[str, torch.nn.Module]], optim_config: SimCLRConfig.OptimConfig, ) -> Iterable[tuple[str, torch.optim.Optimizer]]: def exclude_from_wd_and_adaptation(name): if 'bn' in name: return True if optim_config.optim == 'lars' and 'bias' in name: return True for model_name, model in models: param_groups = [ { 'params': [p for name, p in model.named_parameters() if not exclude_from_wd_and_adaptation(name)], 'weight_decay': optim_config.weight_decay, 'layer_adaptation': True, }, { 'params': [p for name, p in model.named_parameters() if exclude_from_wd_and_adaptation(name)], 'weight_decay': 0., 'layer_adaptation': False, }, ] if optim_config.optim == 'adam': optimizer = torch.optim.Adam( param_groups, lr=optim_config.lr, betas=optim_config.betas, ) elif optim_config.optim in {'sgd', 'lars'}: optimizer = torch.optim.SGD( param_groups, lr=optim_config.lr, momentum=optim_config.momentum, ) else: raise NotImplementedError(f"Unimplemented optimizer: '{optim_config.optim}'") yield f"{model_name}_optim", optimizer def _custom_init_fn(self, config: SimCLRConfig): self.optims = {n: LARS(o) if config.optim_config.optim == 'lars' else o for n, o in self.optims.items()} def train(self, num_iters: int, loss_fn: Callable, logger: Loggers, device: torch.device): model = self.models['model'] optim = self.optims['model_optim'] sched = self.scheds['model_optim_sched'] train_loader = iter(self.train_loader) model.train() for iter_ in range(self.restore_iter, num_iters): input_, _ = next(train_loader) input_ = torch.cat(input_).to(device) model.zero_grad() output = model(input_) train_loss, train_accuracy = loss_fn(output) train_loss.backward() optim.step() self.log(logger, self.BatchLogRecord( iter_, num_iters, iter_, iter_, num_iters, optim.param_groups[0]['lr'], train_loss.item(), train_accuracy.item(), eval_loss=None, eval_accuracy=None, )) if (iter_ + 1) % (num_iters // 100) == 0: metrics = torch.Tensor(list(self.eval(loss_fn, device))).mean(0) eval_loss = metrics[0].item() eval_accuracy = metrics[1].item() eval_log = self.BatchLogRecord( iter_, num_iters, iter_, iter_, num_iters, lr=None, train_loss=None, train_accuracy=None, eval_loss=eval_loss, eval_accuracy=eval_accuracy, ) self.log(logger, eval_log) self.save_checkpoint(eval_log) model.train() if sched is not None: sched.step() def eval(self, loss_fn: Callable, device: torch.device): model = self.models['model'] model.eval() with torch.no_grad(): for input_, _ in self.test_loader: input_ = torch.cat(input_).to(device) output = model(input_) loss, accuracy = loss_fn(output) yield loss.item(), accuracy.item() if __name__ == '__main__': args = parse_args_and_config() config = SimCLRConfig.from_args(args) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') trainer = SimCLRTrainer( seed=args.seed, checkpoint_dir=args.checkpoint_dir, device=device, inf_mode=True, num_iters=args.num_iters, config=config, encoder=args.encoder, hid_dim=args.hid_dim, out_dim=args.out_dim, ) loggers = trainer.init_logger(args.log_dir) trainer.train(args.num_iters, InfoNCELoss(args.temp), loggers, device)