summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan Gong <jordan.gong@protonmail.com>2021-01-07 14:34:37 +0800
committerJordan Gong <jordan.gong@protonmail.com>2021-01-07 14:34:37 +0800
commita27af5dfd58e7b48cf3bd063fa2b4b51ed1e0277 (patch)
tree08e6b52d4e59506e6890bf67d1922abea97ce116
parentf1fe77c083f952e81cf80c0b44611fc6057a7882 (diff)
Change device config and add enable multi-GPU computing
1. Add `disable_acc` switch for disabling accelerator. When it is off, system will automatically choosing accelerator. 2. Enable multi-GPU training using torch.nn.DataParallel
-rw-r--r--config.py8
-rw-r--r--models/model.py17
-rw-r--r--utils/configuration.py4
3 files changed, 19 insertions, 10 deletions
diff --git a/config.py b/config.py
index ad737e8..47ded38 100644
--- a/config.py
+++ b/config.py
@@ -1,12 +1,10 @@
-import torch
-
from utils.configuration import Configuration
config: Configuration = {
'system': {
- # Device(s) used in training and testing (CPU or CUDA)
- 'device': torch.device('cuda'),
- # GPU(s) used in training or testing, if CUDA enabled
+ # Disable accelerator
+ 'disable_acc': False,
+ # GPU(s) used in training or testing if available
'CUDA_VISIBLE_DEVICES': '0',
# Directory used in training or testing for temporary storage
'save_dir': 'runs',
diff --git a/models/model.py b/models/model.py
index 5dc7d97..bf8b5fb 100644
--- a/models/model.py
+++ b/models/model.py
@@ -24,7 +24,16 @@ class Model:
model_config: ModelConfiguration,
hyperparameter_config: HyperparameterConfiguration
):
- self.device = system_config['device']
+ self.disable_acc = system_config['disable_acc']
+ if self.disable_acc:
+ self.device = torch.device('cpu')
+ else: # Enable accelerator
+ if torch.cuda.is_available():
+ self.device = torch.device('cuda')
+ else:
+ print('No accelerator available, fallback to CPU.')
+ self.device = torch.device('cpu')
+
self.save_dir = system_config['save_dir']
self.checkpoint_dir = os.path.join(self.save_dir, 'checkpoint')
self.log_dir = os.path.join(self.save_dir, 'logs')
@@ -75,11 +84,15 @@ class Model:
hp = self.hp.copy()
lr, betas = hp.pop('lr', 1e-4), hp.pop('betas', (0.9, 0.999))
self.rgb_pn = RGBPartNet(self.train_size, self.in_channels, **hp)
- self.rgb_pn = self.rgb_pn.to(self.device)
self.optimizer = optim.Adam(self.rgb_pn.parameters(), lr, betas)
self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, 500, 0.9)
self.writer = SummaryWriter(self.log_name)
+ if not self.disable_acc:
+ if torch.cuda.device_count() > 1:
+ self.rgb_pn = nn.DataParallel(self.rgb_pn)
+ self.rgb_pn = self.rgb_pn.to(self.device)
+
self.rgb_pn.train()
# Init weights at first iter
if self.curr_iter == 0:
diff --git a/utils/configuration.py b/utils/configuration.py
index 3e98343..f3ae0b3 100644
--- a/utils/configuration.py
+++ b/utils/configuration.py
@@ -1,12 +1,10 @@
from typing import TypedDict, Optional, Union
-import torch
-
from utils.dataset import ClipClasses, ClipConditions, ClipViews
class SystemConfiguration(TypedDict):
- device: torch.device
+ disable_acc: bool
CUDA_VISIBLE_DEVICES: str
save_dir: str