summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan Gong <jordan.gong@protonmail.com>2021-01-23 22:19:51 +0800
committerJordan Gong <jordan.gong@protonmail.com>2021-01-23 22:19:51 +0800
commit507e1d163aaa6ea4be23e7f08ff6ce0ef58c830b (patch)
tree1e3c722bb63e3873464296121ec290bd3e64ad14
parent59ccf61fed4d95b7fe91bb9552f0deb2f2c75b76 (diff)
Remove the third term in canonical consistency loss
-rw-r--r--models/auto_encoder.py22
-rw-r--r--models/model.py4
-rw-r--r--models/rgb_part_net.py9
3 files changed, 13 insertions, 22 deletions
diff --git a/models/auto_encoder.py b/models/auto_encoder.py
index 36be868..35cb629 100644
--- a/models/auto_encoder.py
+++ b/models/auto_encoder.py
@@ -113,7 +113,6 @@ class Decoder(nn.Module):
class AutoEncoder(nn.Module):
def __init__(
self,
- num_class: int = 74,
channels: int = 3,
feature_channels: int = 64,
embedding_dims: tuple[int, int, int] = (128, 128, 64)
@@ -122,25 +121,23 @@ class AutoEncoder(nn.Module):
self.encoder = Encoder(channels, feature_channels, embedding_dims)
self.decoder = Decoder(embedding_dims, feature_channels, channels)
- f_c_dim = embedding_dims[1]
- self.classifier = nn.Sequential(
- nn.LeakyReLU(0.2, inplace=True),
- BasicLinear(f_c_dim, num_class)
- )
-
- def forward(self, x_c1_t2, x_c1_t1=None, x_c2_t2=None, y=None):
+ def forward(self, x_c1_t2, x_c1_t1=None, x_c2_t2=None):
# x_c1_t2 is the frame for later module
(f_a_c1_t2, f_c_c1_t2, f_p_c1_t2) = self.encoder(x_c1_t2)
with torch.no_grad():
# Decode canonical features for HPM
x_c_c1_t2 = self.decoder(
- torch.zeros_like(f_a_c1_t2), f_c_c1_t2, torch.zeros_like(f_p_c1_t2),
+ torch.zeros_like(f_a_c1_t2),
+ f_c_c1_t2,
+ torch.zeros_like(f_p_c1_t2),
no_trans_conv=True
)
# Decode pose features for Part Net
x_p_c1_t2 = self.decoder(
- torch.zeros_like(f_a_c1_t2), torch.zeros_like(f_c_c1_t2), f_p_c1_t2
+ torch.zeros_like(f_a_c1_t2),
+ torch.zeros_like(f_c_c1_t2),
+ f_p_c1_t2
)
if self.training:
@@ -150,11 +147,8 @@ class AutoEncoder(nn.Module):
x_c1_t2_ = self.decoder(f_a_c1_t1, f_c_c1_t1, f_p_c1_t2)
xrecon_loss_t2 = F.mse_loss(x_c1_t2, x_c1_t2_)
-
- y_ = self.classifier(f_c_c1_t2.contiguous())
cano_cons_loss_t2 = (F.mse_loss(f_c_c1_t1, f_c_c1_t2)
- + F.mse_loss(f_c_c1_t2, f_c_c2_t2)
- + F.cross_entropy(y_, y))
+ + F.mse_loss(f_c_c1_t2, f_c_c2_t2))
return (
(x_c_c1_t2, x_p_c1_t2),
diff --git a/models/model.py b/models/model.py
index cccb6c4..ddb715d 100644
--- a/models/model.py
+++ b/models/model.py
@@ -54,7 +54,6 @@ class Model:
self.total_iters = self.meta.get('total_iters', (80000, 80000, 80000))
self.is_train: bool = True
- self.train_size: int = 74
self.in_channels: int = 3
self.pr: Optional[int] = None
self.k: Optional[int] = None
@@ -147,7 +146,7 @@ class Model:
hpm_optim_hp = optim_hp.pop('hpm', {})
fc_optim_hp = optim_hp.pop('fc', {})
sched_hp = self.hp.get('scheduler', {})
- self.rgb_pn = RGBPartNet(self.train_size, self.in_channels, **model_hp)
+ self.rgb_pn = RGBPartNet(self.in_channels, **model_hp)
# Try to accelerate computation using CUDA or others
self.rgb_pn = self.rgb_pn.to(self.device)
self.optimizer = optim.Adam([
@@ -409,7 +408,6 @@ class Model:
self,
dataset_config: DatasetConfiguration
) -> Union[CASIAB]:
- self.train_size = dataset_config.get('train_size', 74)
self.in_channels = dataset_config.get('num_input_channels', 3)
self._dataset_sig = self._make_signature(
dataset_config,
diff --git a/models/rgb_part_net.py b/models/rgb_part_net.py
index 2cc0958..755d5dc 100644
--- a/models/rgb_part_net.py
+++ b/models/rgb_part_net.py
@@ -13,7 +13,6 @@ from utils.triplet_loss import BatchAllTripletLoss
class RGBPartNet(nn.Module):
def __init__(
self,
- num_class: int = 74,
ae_in_channels: int = 3,
ae_feature_channels: int = 64,
f_a_c_p_dims: tuple[int, int, int] = (128, 128, 64),
@@ -31,7 +30,7 @@ class RGBPartNet(nn.Module):
):
super().__init__()
self.ae = AutoEncoder(
- num_class, ae_in_channels, ae_feature_channels, f_a_c_p_dims
+ ae_in_channels, ae_feature_channels, f_a_c_p_dims
)
self.pn = PartNet(
ae_in_channels, fpfe_feature_channels, fpfe_kernel_sizes,
@@ -60,7 +59,7 @@ class RGBPartNet(nn.Module):
# Step 1: Disentanglement
# t, n, c, h, w
- ((x_c_c1, x_p_c1), losses) = self._disentangle(x_c1, x_c2, y)
+ ((x_c_c1, x_p_c1), losses) = self._disentangle(x_c1, x_c2)
# Step 2.a: HPM & Static Gait Feature Aggregation
# t, n, c, h, w
@@ -85,7 +84,7 @@ class RGBPartNet(nn.Module):
else:
return x.unsqueeze(1).view(-1)
- def _disentangle(self, x_c1, x_c2=None, y=None):
+ def _disentangle(self, x_c1, x_c2=None):
t, n, c, h, w = x_c1.size()
if self.training:
# Decoded canonical features and Pose images
@@ -95,7 +94,7 @@ class RGBPartNet(nn.Module):
xrecon_loss, cano_cons_loss = [], []
for t2 in range(t):
t1 = random.randrange(t)
- output = self.ae(x_c1[t2], x_c1[t1], x_c2[t2], y)
+ output = self.ae(x_c1[t2], x_c1[t1], x_c2[t2])
(x_c1_t2, f_p_t2, losses) = output
# Decoded features or image