Pytorch DCGAN example for kernel 3 - machine-learning

I tried out the pytorch dcgan example and it worked fine but when I tried to change the kernel from 4x4 to 3x3. I only changed the kernel and It gave the following error. Why this error is occurring? To solve this what changes are needed?
ValueError: Using a target size (torch.Size([64])) that is different to the input size (torch.Size([256])) is deprecated. Please ensure they have the same size.
Here is the generator:
class Generator(nn.Module):
def __init__(self, ngpu):
super(Generator, self).__init__()
self.ngpu = ngpu
self.main = nn.Sequential(
# input is Z, going into a convolution
nn.ConvTranspose2d( nz, ngf * 8, kernel_size, 1, 0, bias=False),
nn.BatchNorm2d(ngf * 8),
nn.ReLU(True),
# state size. (ngf*8) x 4 x 4
nn.ConvTranspose2d(ngf * 8, ngf * 4, kernel_size, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 4),
nn.ReLU(True),
# state size. (ngf*4) x 8 x 8
nn.ConvTranspose2d( ngf * 4, ngf * 2, kernel_size, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 2),
nn.ReLU(True),
# state size. (ngf*2) x 16 x 16
nn.ConvTranspose2d( ngf * 2, ngf, kernel_size, 2, 1, bias=False),
nn.BatchNorm2d(ngf),
nn.ReLU(True),
# state size. (ngf) x 32 x 32
nn.ConvTranspose2d( ngf, nc, kernel_size, 2, 1, bias=False),
nn.Tanh()
# state size. (nc) x 64 x 64
)
def forward(self, input):
return self.main(input)
discriminator code:
class Discriminator(nn.Module):
def __init__(self, ngpu):
super(Discriminator, self).__init__()
self.ngpu = ngpu
self.main = nn.Sequential(
# input is (nc) x 64 x 64
nn.Conv2d(nc, ndf, kernel_size, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf) x 32 x 32
nn.Conv2d(ndf, ndf * 2, kernel_size, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*2) x 16 x 16
nn.Conv2d(ndf * 2, ndf * 4, kernel_size, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*4) x 8 x 8
nn.Conv2d(ndf * 4, ndf * 8, kernel_size, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 8),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*8) x 4 x 4
nn.Conv2d(ndf * 8, 1, kernel_size, 1, 0, bias=False),
# 1x1x1
nn.Sigmoid()
)
def forward(self, input):
return self.main(input)
The error showing line is:
In the training loop
# Calculate loss on all-real batch
errD_real = criterion(output, label)
Here criterion = nn.BCELoss()
Whole training loop:
img_list = []
G_losses = []
D_losses = []
iters = 0
# For each epoch
for epoch in range(num_epochs):
# For each batch in the dataloader
for i, data in enumerate(dataloader, 0):
############################
# (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
###########################
## Train with all-real batch
netD.zero_grad()
# Format batch
real_cpu = data[0].to(device)
#real_cpu = (data.unsqueeze(dim=1).type(torch.FloatTensor)).to(device)
b_size = real_cpu.size(0)
label = torch.full((b_size,), real_label,dtype=torch.float, device=device, )
# Forward pass real batch through D
output = netD(real_cpu).view(-1)
# Calculate loss on all-real batch
errD_real = criterion(output, label)#ERROR
# Calculate gradients for D in backward pass
errD_real.backward()
D_x = output.mean().item()
## Train with all-fake batch
# Generate batch of latent vectors
noise = torch.randn(b_size, nz, 1, 1, device=device)
# Generate fake image batch with G
fake = netG(noise)
label.fill_(fake_label)
# Classify all fake batch with D
output = netD(fake.detach()).view(-1)
# Calculate D's loss on the all-fake batch
errD_fake = criterion(output, label)
# Calculate the gradients for this batch
errD_fake.backward()
D_G_z1 = output.mean().item()
# Add the gradients from the all-real and all-fake batches
errD = errD_real + errD_fake
# Update D
optimizerD.step()
############################
# (2) Update G network: maximize log(D(G(z))) using 'log D' trick
###########################
netG.zero_grad()
label.fill_(real_label) # fake labels are real for generator cost
# Since we just updated D, perform another forward pass of all-fake batch through D
output = netD(fake).view(-1)
# Calculate G's loss based on this output
errG = criterion(output, label)
# Calculate gradients for G
errG.backward()
D_G_z2 = output.mean().item()
# Update G
optimizerG.step()
# Output training stats
if i % 50 == 0:
print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
% (epoch, num_epochs, i, len(dataloader),
errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
# Save Losses for plotting later
G_losses.append(errG.item())
D_losses.append(errD.item())
# Check how the generator is doing by saving G's output on fixed_noise
if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):
with torch.no_grad():
fake = netG(fixed_noise).detach().cpu()
img_list.append(vutils.make_grid(fake, padding=2, normalize=True))
iters += 1

Related

DCGAN error: Using a target size (torch.Size([128])) that is different to the input size (torch.Size([128, 1, 5, 5])) is deprecated

I keep getting this error when trying to run a DCGAN using the EMNIST dataset, I'm fairly new to this and struggling to debug the issue
class Generator(nn.Module):
def __init__(self, ngpu):
super(Generator, self).__init__()
self.ngpu = ngpu
self.main = nn.Sequential(
# input is Z, going into a convolution
nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False),
nn.BatchNorm2d(ngf * 8),
nn.ReLU(True),
# state size. (ngf*8) x 4 x 4
nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 4),
nn.ReLU(True),
# state size. (ngf*4) x 8 x 8
nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 2),
nn.ReLU(True),
# state size. (ngf*2) x 16 x 16
nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf),
nn.ReLU(True),
# state size. (ngf) x 32 x 32
nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False),
nn.Tanh()
# state size. (nc) x 64 x 64
)
def forward(self, input):
return self.main(input)
class Discriminator(nn.Module):
def __init__(self, ngpu):
super(Discriminator, self).__init__()
self.ngpu = ngpu
self.main = nn.Sequential(
# input is (nc) x 64 x 64
nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf) x 32 x 32
nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*2) x 16 x 16
nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*4) x 8 x 8
#nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
#nn.BatchNorm2d(ndf * 8),
#nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*8) x 4 x 4
nn.Conv2d(ndf * 4, 1, 4, 1, 0, bias=False),
nn.Sigmoid()
)
def forward(self, input):
return self.main(input)
# Lists to keep track of progress
img_list = []
G_losses = []
D_losses = []
iters = 0
print("Starting Training Loop...")
# For each epoch
for epoch in range(num_epochs):
# For each batch in the dataloader
for i, data in enumerate(dataloader, 0):
############################
# (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
###########################
## Train with all-real batch
netD.zero_grad()
# Format batch
real_cpu = data[0].to(device)
b_size = real_cpu.size(0)
label = torch.full((b_size,), real_label, dtype=torch.float, device=device)
# Forward pass real batch through D
output = netD(real_cpu).view(-1)
# Calculate loss on all-real batch
errD_real = criterion(output, label)
# Calculate gradients for D in backward pass
errD_real.backward()
D_x = output.mean().item()
## Train with all-fake batch
# Generate batch of latent vectors
noise = torch.randn(b_size, nz, 1, 1, device=device)
# Generate fake image batch with G
fake = netG(noise)
label.fill_(fake_label)
# Classify all fake batch with D
output = netD(fake.detach()).view(-1)
# Calculate D's loss on the all-fake batch
errD_fake = criterion(output, label)
# Calculate the gradients for this batch, accumulated (summed) with previous gradients
errD_fake.backward()
D_G_z1 = output.mean().item()
# Compute error of D as sum over the fake and the real batches
errD = errD_real + errD_fake
# Update D
optimizerD.step()
############################
# (2) Update G network: maximize log(D(G(z)))
###########################
netG.zero_grad()
label.fill_(real_label) # fake labels are real for generator cost
# Since we just updated D, perform another forward pass of all-fake batch through D
output = netD(fake).view(-1)
# Calculate G's loss based on this output
errG = criterion(output, label)
# Calculate gradients for G
errG.backward()
D_G_z2 = output.mean().item()
# Update G
optimizerG.step()
# Output training stats
if i % 50 == 0:
print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
% (epoch, num_epochs, i, len(dataloader),
errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
# Save Losses for plotting later
G_losses.append(errG.item())
D_losses.append(errD.item())
# Check how the generator is doing by saving G's output on fixed_noise
if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):
with torch.no_grad():
fake = netG(fixed_noise).detach().cpu()
img_list.append(vutils.make_grid(fake, padding=2, normalize=True))
iters += 1
I'm wanting to do this with just the conv2d layers that are in use and I have tried using the .squeeze function to get it working but had no luck

Deep Convolutional GAN (DCGAN) works really well on one set of data but not another

I am working on using PyTorch to create a DCGAN to use to generate trajectory data for a robot based on a dataset of 20000 other simple paths.
The DCGAN works great on the MNIST dataset, but does not work well on my custom dataset. I am trying to tweak/tune the DCGAN to give good results after being trained with my custom dataset.
Below is an example of output from the GAN (top), along with example training data for MNIST (bottom) after 20 epochs. The loss for the Generator and Discriminator each plateau at around 0.7.
Below is the output for my custom trajectory dataset after a similar number of epochs. The top figure shows the output, and bottom figure shows the training set of the batch.
It is clear that the same GAN is much better at making predictions for the MNIST dataset then for my custom dataset. It is interesting to note that the Discriminator and Generator losses also plateau at similar values of about 0.7 for this dataset too. This makes me think that there is some limit to the network of how low the loss can go.
Discriminator Code:
class Discriminator(nn.Module):
def __init__(self, channels_img, features_d):
super(Discriminator, self).__init__()
self.disc = nn.Sequential(
# input: N x channels_img x 64 x 64
nn.Conv2d(
channels_img, features_d, kernel_size=4, stride=2, padding=1
),
nn.LeakyReLU(0.2),
# _block(in_channels, out_channels, kernel_size, stride, padding)
self._block(features_d, features_d * 2, 4, 2, 1),
self._block(features_d * 2, features_d * 4, 4, 2, 1),
self._block(features_d * 4, features_d * 8, 4, 2, 1),
# After all _block img output is 4x4 (Conv2d below makes into 1x1)
nn.Conv2d(features_d * 8, 1, kernel_size=4, stride=2, padding=0),
nn.Sigmoid(),
)
def _block(self, in_channels, out_channels, kernel_size, stride, padding):
return nn.Sequential(
nn.Conv2d(
in_channels,
out_channels,
kernel_size,
stride,
padding,
bias=False,
),
nn.BatchNorm2d(out_channels),
nn.LeakyReLU(0.2),
)
def forward(self, x):
return self.disc(x)
Generator Code:
class Generator(nn.Module):
def __init__(self, channels_noise, channels_img, features_g):
super(Generator, self).__init__()
self.net = nn.Sequential(
# Input: N x channels_noise x 1 x 1
self._block(channels_noise, features_g * 16, 4, 1, 0), # img: 4x4
self._block(features_g * 16, features_g * 8, 4, 2, 1), # img: 8x8
self._block(features_g * 8, features_g * 4, 4, 2, 1), # img: 16x16
self._block(features_g * 4, features_g * 2, 4, 2, 1), # img: 32x32
nn.ConvTranspose2d(
features_g * 2, channels_img, kernel_size=4, stride=2, padding=1
),
# Output: N x channels_img x 64 x 64
nn.Tanh(),
)
def _block(self, in_channels, out_channels, kernel_size, stride, padding):
return nn.Sequential(
nn.ConvTranspose2d(
in_channels,
out_channels,
kernel_size,
stride,
padding,
bias=False,
),
nn.BatchNorm2d(out_channels),
nn.ReLU(),
)
def forward(self, x):
return self.net(x)
Training loop:
opt_gen = optim.Adam(gen.parameters(), lr=LEARNING_RATE_GEN, betas=(0.5, 0.999))
opt_disc = optim.Adam(disc.parameters(), lr=LEARNING_RATE_DISC, betas=(0.5, 0.999))
criterion = nn.BCELoss()
for epoch in range(NUM_EPOCHS):
# Target labels not needed! <3 unsupervised
# for batch_idx, (real, _) in enumerate(dataloader):
for batch_idx, real in enumerate(dataloader):
real = real.to(device)
noise = torch.randn(BATCH_SIZE, NOISE_DIM, 1, 1).to(device)
fake = gen(noise)
### Train Discriminator: max log(D(x)) + log(1 - D(G(z)))
disc_real = disc(real.float()).reshape(-1)
loss_disc_real = criterion(disc_real, torch.ones_like(disc_real))
disc_fake = disc(fake.detach()).reshape(-1)
loss_disc_fake = criterion(disc_fake, torch.zeros_like(disc_fake))
loss_disc = (loss_disc_real + loss_disc_fake) / 2
disc.zero_grad()
loss_disc.backward()
opt_disc.step()
### Train Generator: min log(1 - D(G(z))) <-> max log(D(G(z))
output = disc(fake).reshape(-1)
loss_gen = criterion(output, torch.ones_like(output))
gen.zero_grad()
loss_gen.backward()
opt_gen.step()
# Print losses occasionally and print to tensorboard
if batch_idx % 100 == 0:
print(
f"Epoch [{epoch}/{NUM_EPOCHS}] Batch {batch_idx}/{len(dataloader)} \
Loss D: {loss_disc:.4f}, loss G: {loss_gen:.4f}"
)
with torch.no_grad():
fake = gen(fixed_noise)
# take out (up to) 32 examples
img_grid_real = torchvision.utils.make_grid(
real[:BATCH_SIZE], normalize=True
)
img_grid_fake = torchvision.utils.make_grid(
fake[:BATCH_SIZE], normalize=True
)
writer_real.add_image("Real", img_grid_real, global_step=step)
writer_fake.add_image("Fake", img_grid_fake, global_step=step)
step += 1
(Not enough reputation to comment yet, so I'll reply)
Since your code works fine on the MNIST dataset, the main problem here is that the trajectories in your training data are practically imperceptible, even to a human observer. Note that the trajectory lines are much thinner than the digit lines in the MNIST dataset.

how to build a generator and discriminator for a DCGAN with images of size 256x256

I have the following generators and discriminators for a DCGAN with images of size 128x128, it works excellent.
However, I would like to use the same code to generate images with a size of 256x256, but I cannot build the generators and discriminators.
# direccion del directorio de entrenamiento
dataroot = "./dataset 128x128"
# Number of workers for dataloader
workers = 6
# Batch size during training
batch_size = 1
# Spatial size of training images. All images will be resized to this
# size using a transformer.
image_size = 128
# Number of channels in the training images. For color images this is 3
nc = 3
# Size of z latent vector (i.e. size of generator input)
nz = 100
# Size of feature maps in generator
ngf = 32
# Size of feature maps in discriminator
ndf = 32
# Number of training epochs
num_epochs = 20
# Learning rate for optimizers
lr = 0.0002
# Beta1 hyperparam for Adam optimizers
beta1 = 0.5
# Number of GPUs available. Use 0 for CPU mode.
ngpu = 2
print("Dataset done")
# Generator Code
class Generator(nn.Module):
def __init__(self, ngpu):
super(Generator, self).__init__()
self.ngpu = ngpu
self.main = nn.Sequential(
# input is Z, going into a convolution
nn.ConvTranspose2d( nz, ngf * 16, 4, 1, 0, bias=False),
nn.BatchNorm2d(ngf * 16),
nn.ReLU(True),
# state size. (ngf*16) x 4 x 4
nn.ConvTranspose2d(ngf * 16, ngf * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 8),
nn.ReLU(True),
# state size. (ngf*8) x 8 x 8
nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 4),
nn.ReLU(True),
# state size. (ngf*4) x 16 x 16
nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 2),
nn.ReLU(True),
# state size. (ngf*2) x 32 x 32
nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf),
nn.ReLU(True),
# state size. (ngf) x 64 x 64
nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False),
nn.Tanh()
# state size. (nc) x 128 x 128
)
def forward(self, input):
return self.main(input)
class Discriminator(nn.Module):
def __init__(self, ngpu):
super(Discriminator, self).__init__()
self.ngpu = ngpu
self.main = nn.Sequential(
# input is (nc) x 128 x 128
nn.Conv2d(nc, ndf, 4, stride=2, padding=1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf) x 64 x 64
nn.Conv2d(ndf, ndf * 2, 4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*2) x 32 x 32
nn.Conv2d(ndf * 2, ndf * 4, 4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*4) x 16 x 16
nn.Conv2d(ndf * 4, ndf * 8, 4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(ndf * 8),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*8) x 8 x 8
nn.Conv2d(ndf * 8, ndf * 16, 4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(ndf * 16),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*16) x 4 x 4
nn.Conv2d(ndf * 16, 1, 4, stride=1, padding=0, bias=False),
nn.Sigmoid()
# state size. 1
)
def forward(self, input):
return self.main(input)
# Lists to keep track of progress
img_list = []
G_losses = []
D_losses = []
iters = 0
print("Starting Training Loop...")
# For each epoch
for epoch in range(num_epochs):
# For each batch in the dataloader
for i, data in enumerate(dataloader, 0):
############################
# (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
###########################
## Train with all-real batch
netD.zero_grad()
# Format batch
real_cpu = data[0].to(device)
b_size = real_cpu.size(0)
label = torch.full((b_size,), real_label, dtype=torch.float, device=device)
# Forward pass real batch through D
output = netD(real_cpu).view(-1)
# Calculate loss on all-real batch
errD_real = criterion(output, label)
# Calculate gradients for D in backward pass
errD_real.backward()
D_x = output.mean().item()
## Train with all-fake batch
# Generate batch of latent vectors
noise = torch.randn(b_size, nz, 1, 1, device=device)
# Generate fake image batch with G
fake = netG(noise)
label.fill_(fake_label)
# Classify all fake batch with D
output = netD(fake.detach()).view(-1)
# Calculate D's loss on the all-fake batch
errD_fake = criterion(output, label)
# Calculate the gradients for this batch, accumulated (summed) with previous gradients
errD_fake.backward()
D_G_z1 = output.mean().item()
# Compute error of D as sum over the fake and the real batches
errD = errD_real + errD_fake
# Update D
optimizerD.step()
############################
# (2) Update G network: maximize log(D(G(z)))
###########################
netG.zero_grad()
label.fill_(real_label) # fake labels are real for generator cost
# Since we just updated D, perform another forward pass of all-fake batch through D
output = netD(fake).view(-1)
# Calculate G's loss based on this output
errG = criterion(output, label)
# Calculate gradients for G
errG.backward()
D_G_z2 = output.mean().item()
# Update G
optimizerG.step()
# Output training stats
if i % 50 == 0:
print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
% (epoch, num_epochs, i, len(dataloader),
errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
# Save Losses for plotting later
G_losses.append(errG.item())
D_losses.append(errD.item())
# Check how the generator is doing by saving G's output on fixed_noise
if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):
with torch.no_grad():
fake = netG(fixed_noise).detach().cpu()
img_list.append(vutils.make_grid(fake, padding=2, normalize=True))
iters += 1
How to modify those generators and discriminators for an image of size 256x256?

Working with customised Alexnet for face recognition

I am currently trying to fit my customised cnn model (Alexnet) with the input shape of (224, 224, 1) as I have the the image shape of 224 x 224 and I am dealing with black and white image.
So this is where I am trying to load the data and then get I got the dataset sizes such as the number of samples, features, and height and widths of the images, and finally the number of classes
lfw_people = fetch_lfw_people(min_faces_per_person = 70, resize = 2.39)
n_samples, h, w = lfw_people.images.shape
X = lfw_people.data
n_features = X.shape[1]
y = lfw_people.target
target_names = lfw_people.target_names
n_classes = target_names.shape[0]
after that, I splitted the data using the train test split and reshape with height and width of the image and then I reduced the height to the same length as width which makes it 224 x 224. The result of the counts of y_train and y test is
y_train Count: Counter({3: 384, 1: 176, 6: 108, 2: 94, 4: 84, 0: 64, 5: 56})
y_test Count: Counter({3: 146, 1: 60, 6: 36, 2: 27, 4: 25, 5: 15, 0: 13})
and then I am trying to convert both y_train and y_test to categorical which 7 classes
y_train = to_categorical(
y_train,
num_classes = len(set(y)),
dtype = 'uint8'
)
y_test = to_categorical(
y_test,
num_classes = len(set(y)),
dtype = 'uint8'
)
here is my code for my model where it has the 8 layers in total:
model = Sequential()
# 1st Convolutional Layer
model.add(Conv2D(filters = 96, input_shape = (224, 224, 1),
kernel_size = (11, 11), strides = (4, 4),
padding = 'valid'))
model.add(Activation('relu'))
#Max-Pooling
model.add(MaxPooling2D(pool_size = (2, 2),
strides = (2, 2), padding = 'valid'))
# Batch Normalisation
model.add(BatchNormalization())
# 2nd Convolutional Layer
model.add(Conv2D(filters = 256, kernel_size = (11, 11),
strides = (1, 1), padding = 'valid'))
model.add(Activation('relu'))
# Max-Pooling
model.add(Activation('relu'))
# Batch Normalisation
model.add(BatchNormalization())
# 3rd Convolutional Layer
model.add(Conv2D(filters = 384, kernel_size = (3, 3),
strides = (1, 1), padding = 'valid'))
model.add(Activation('relu'))
# Batch Normalization
model.add(BatchNormalization())
# 4th Convolutional Lauer
model.add(Conv2D(filters = 384, kernel_size = (3, 3),
strides = (1, 1), padding = 'valid'))
model.add(Activation('relu'))
# Batch Normalisation
model.add(BatchNormalization())
# 5th Convolutional Layer
model.add(Conv2D(filters = 256, kernel_size = (3, 3),
strides = (1, 1), padding = 'valid'))
model.add(Activation('relu'))
# Max-pooling
model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2),
padding = 'valid'))
# Batch Normalisation
model.add(BatchNormalization())
# Flattening
model.add(Flatten())
# 1st Dense Layer
model.add(Dense(4096, input_shape = (224*224*1, )))
model.add(Activation('relu'))
# Add Dropout to prevent overfitting
model.add(Dropout(0.4))
# Batch Normalisatoin
model.add(BatchNormalization())
# 2nd Dense Layer
model.add(Dense(4096))
model.add(Activation('relu'))
#Add Dropout
model.add(Dropout(0.4))
# Batch Normalisation
model.add(BatchNormalization())
# output softmax layer
model.add(Dense(7))
model.add(Activation('softmax'))
This is my augmentation where I am trying to generate the image
# Data Augmentation
datagen = ImageDataGenerator(
featurewise_center = True, # set input mean to 0 over the dataset
samplewise_center = True, # set each sample mean to 0
featurewise_std_normalization = True, # divide inputs by std of the dataset
samplewise_std_normalization = True, # divide each inputs by its std
zca_whitening = False, # dimension reduction
rotation_range = 20, # randomly rotate images in the range 5 degrees
zoom_range = 0.1, # Randomly zoom image 10%
width_shift_range = 0.2, # randomly shift images horizontally 10%
height_shift_range = 0.2, # randomly shift images vertically 10%
horizontal_flip = True, # randomly flip images
# vertical_flip = 0.2 # Random flip images
vertical_flip = 0.8
)
history = model.fit(datagen.flow(X_train, y_train, batch_size = batch_size),
epochs = 100, validation_data = (X_test, y_test),
steps_per_epoch = X_train.shape[0] // batch_size,
verbose = 0)
After I ran the model, it gave me the validation accuracy of 0.45 and this my confusion matrix which tells me that it kept predicting 'class 3'
| 0 1 2 3 4 5 6
---------------------------------
0| 0 0 0 13 0 0 0
1| 0 0 0 60 0 0 0
2| 0 0 0 27 0 0 0
3| 0 0 0 146 0 0 0
4| 0 0 0 25 0 0 0
5| 0 0 0 15 0 0 0
6| 0 0 0 36 0 0 0
So how to make it predict classes other than 3?

How to solve the dimension error and effectively use Conv2dTranspose?

i have created a discriminator and generator file to implement GAN, however, i am facing this error.
The initial error i was facing was in the main.py file where i am calling the criterion library and passing the output and label. I solved that error using squeeze function, so that the issue of shape was resolved.
Before using squeeze , an error showed that the shapes of output and labels were not matching ( the shapes were (7,1,1,1) and (7) for the output and the label respectively.
import torch
from torch import nn
class generatorG(nn.Module):
def __init__(self):
super(generatorG, self).__init__()
self.t1 = nn.Sequential(
nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size= (4,4), stride = 2,padding = 1),
nn.LeakyReLU(0.2, inplace = True)
)
self.t2 = nn.Sequential(
nn.Conv2d(in_channels= 64, out_channels = 64, kernel_size = (4,4), stride = 2,padding = 1),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.2, inplace = True)
)
self.t3 = nn.Sequential(
nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 4, stride = 2, padding =1),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, inplace = True)
)
self.t4 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(4, 4), stride = 2,padding=1),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2, inplace=True)
)
self.t5 = nn.Sequential(
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2, inplace=True)
)
self.t6 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=4000, kernel_size=(4, 4)),
nn.BatchNorm2d(4000),
nn.ReLU()
)
self.t7 = nn.Sequential(
nn.ConvTranspose2d(in_channels = 512, out_channels = 256, kernel_size =4, stride = 2, padding = 1),
nn.BatchNorm2d(256),
nn.ReLU()
)
self.t8 = nn.Sequential(
nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(128),
nn.ReLU()
)
self.t9 = nn.Sequential(
nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(64),
nn.ReLU()
)
self.t10 = nn.Sequential(
nn.ConvTranspose2d(in_channels=64, out_channels=3, kernel_size=4, stride=2, padding=1),
nn.Tanh()
)
def forward(self, x):
x = self.t1(x)
x = self.t2(x)
x = self.t3(x)
x = self.t4(x)
x = self.t5(x)
x = self.t6(x)
x = self.t7(x)
x = self.t8(x)
x = self.t9(x)
x = self.t10(x)
return x
model = generatorG()
print(model(torch.randn()).shape)
Discriminator File
import torch
from torch import nn
class DiscriminatorD(nn.Module):
def __init__(self):
super(DiscriminatorD, self).__init__()
self.t1 = nn.Sequential(
nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size =4, stride = 2, padding = 1),
nn.LeakyReLU(0.2, inplace = True)
)
self.t2 = nn.Sequential(
nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 4, stride = 2, padding = 1),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, inplace = True)
)
self.t3 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2, inplace=True)
)
self.t4 = nn.Sequential(
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2, inplace=True)
)
self.t5 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=1, kernel_size=4, stride=1, padding=0),
nn.Sigmoid()
)
def forward(self, x):
x = self.t1(x)
x = self.t2(x)
x = self.t3(x)
x = self.t4(x)
x = self.t5(x)
return x
main.py file
from generator import *
from discriminator import *
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable
import utils
epochs = 100
Batch_Size = 64
lr = 0.0002
beta1 = 0.5
over = 4
parser = argparse.ArgumentParser()
parser.add_argument('--dataroot', default = 'dataset/train', help = 'path to dataset')
opt = parser.parse_args()
try:
os.makedirs('result/train/cropped')
os.makedirs('result/train/real')
os.makedirs('result/train/recon')
os.makedirs('model/')
except:
pass
transform = transforms.Compose([transforms.Scale(128),
transforms.CenterCrop(128),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
dataset = dset.ImageFolder(root=opt.dataroot, transform= transform)
assert dataset
dataloader = torch.utils.data.DataLoader(dataset, batch_size=Batch_Size, shuffle=True, num_workers=0)
wtl2 = 0.999
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv')!=-1:
m.weight.data.normal_(0.0,0.2)
elif classname.find('BatchNorm')!=-1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
resume_epoch = 0
netG = generatorG()
netG.apply(weights_init)
netD = DiscriminatorD()
netD.apply(weights_init)
criterion = nn.BCELoss()
criterionMSE = nn.MSELoss()
input_real = torch.FloatTensor(Batch_Size, 3, 128, 128)
input_cropped = torch.FloatTensor(Batch_Size, 3, 128, 128)
label = torch.FloatTensor(Batch_Size)
real_label = 1
fake_label = 0
real_center = torch.FloatTensor(Batch_Size, 3, 64, 64)
input_real = Variable(input_real)
input_cropped = Variable(input_cropped)
label = Variable(label)
real_center = Variable(real_center)
optimizerD = optim.Adam(netD.parameters(), lr = lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr = lr, betas = (beta1, 0.999))
over = 4
for epoch in range(resume_epoch, epochs):
for i, data in enumerate(dataloader, 0):
real_cpu, _ = data
real_center_cpu = real_cpu[:,:,int(128/4):int(128/4)+int(128/2),int(128/4):int(128/4)+int(128/2)]
batch_size = real_cpu.size(0)
with torch.no_grad():
input_real.resize_(real_cpu.size()).copy_(real_cpu)
input_cropped.resize_(real_cpu.size()).copy_(real_cpu)
real_center.resize_(real_center_cpu.size()).copy_(real_center_cpu)
input_cropped[:, 0, int(128 / 4 + over):int(128 / 4 + 128 / 2 - over),int(128 / 4 + over):int(128 / 4 + 128 / 2 - over)] = 2 * 117.0 / 255.0 - 1.0
input_cropped[:, 1, int(128 / 4 + over):int(128 / 4 + 128 / 2 - over),int(128 / 4 + over):int(128 / 4 + 128 / 2 - over)] = 2 * 104.0 / 255.0 - 1.0
input_cropped[:, 2, int(128 / 4 + over):int(128 / 4 + 128 / 2 - over),int(128 / 4 + over):int(128 / 4 + 128 / 2 - over)] = 2 * 123.0 / 255.0 - 1.0
netD.zero_grad()
with torch.no_grad():
label.resize_(batch_size).fill_(real_label)
output = netD(real_center)
# output = torch.unsqueeze(output[0, 1)
output = torch.squeeze(output, 1)
output = torch.squeeze(output, 1)
output = torch.squeeze(output, 1)
print(output.shape)
# label = label.unsqueeze(1)
# label = label.unsqueeze(1)
# label = label.unsqueeze(1)
print(label.shape)
errD_real = criterion(output, label)
errD_real.backward()
D_x = output.data.mean()
print(input_cropped.shape)
fake = netG(input_cropped)
label.data.fill_(fake_label)
output = netD(fake.detach())
errD_fake = criterion(output, label)
errD_fake.backward()
D_G_z1 = output.data.mean()
errD = errD_real + errD_fake
optimizerD.step()
netG.zero_grad()
label.data.fill_(real_label) # fake labels are real for generator cost
output = netD(fake)
errG_D = criterion(output, label)
wtl2Matrix = real_center.clone()
wtl2Matrix.data.fill_(wtl2 * 10)
wtl2Matrix.data[:, :, int(over):int(128 / 2 - over), int(over):int(128 / 2 - over)] = wtl2
errG_l2 = (fake - real_center).pow(2)
errG_l2 = errG_l2 * wtl2Matrix
errG_l2 = errG_l2.mean()
errG = (1 - wtl2) * errG_D + wtl2 * errG_l2
errG.backward()
D_G_z2 = output.data.mean()
optimizerG.step()
print('[%d / %d][%d / %d] Loss_D: %.4f Loss_G: %.4f / %.4f l_D(x): %.4f l_D(G(z)): %.4f'
% (epoch, epochs, i, len(dataloader),
errD.data, errG_D.data, errG_l2.data, D_x, D_G_z1,))
if i % 100 == 0:
vutils.save_image(real_cpu,
'result/train/real/real_samples_epoch_%03d.png' % (epoch))
vutils.save_image(input_cropped.data,
'result/train/cropped/cropped_samples_epoch_%03d.png' % (epoch))
recon_image = input_cropped.clone()
recon_image.data[:, :, int(128 / 4):int(128 / 4 + 128 / 2), int(128 / 4):int(128 / 4 + 128 / 2)] = fake.data
vutils.save_image(recon_image.data,
'result/train/recon/recon_center_samples_epoch_%03d.png' % (epoch))
utils file
import torch
from PIL import Image
from torch.autograd import Variable
def load_image(filename, size = None, scale = None):
img = Image.open(filename)
if size is not None:
img = img.resize((size, size), Image.ANTIALIAS)
elif scale is not None:
img = img.resize((int(img.size[0]/scale), int(img.size[1]/scale)), Image.ANTIALIAS)
return img
def save_image(filename, data):
img = data.clone().add(1).div(2).mul(255).clamp(0,255).numpy()
img = img.transpose(1,2,0).astype('uint8')
img = Image.fromarray(img)
img.save(filename)
def gram_matrix(y):
(b, ch, h, w) = y.size()
features = y.view(b, ch, w*h)
features_t = features.transpose(1,2)
gram = features.bmm(features_t)/(ch*h*w)
return gram
def normalize_batch(batch):
mean = batch.data.new(batch.data.size())
std = batch.data.new(batch.data.size())
mean[:, 0, :, :] = 0.485
mean[:, 1, :, :] = 0.456
mean[:, 2, :, :] = 0.406
std[:, 0, :, :] = 0.229
std[:, 1, :, :] = 0.224
std[:, 2, :, :] = 0.225
batch = torch.div(batch, 255.0)
batch -= Variable(mean)
# batch /= Variable(std)
batch = torch.div(batch, Variable(std))
return batch
Error message
(impaint_env) vivek#Viveks-MacBook-Pro image_impainter % python main.py
/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torchvision/transforms/transforms.py:310: UserWarning: The use of the transforms.Scale transform is deprecated, please use transforms.Resize instead.
warnings.warn("The use of the transforms.Scale transform is deprecated, " +
torch.Size([7])
torch.Size([7])
torch.Size([7, 3, 128, 128])
Traceback (most recent call last):
File "main.py", line 114, in <module>
fake = netG(input_cropped)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/Users/vivek/DSwork/image_impainter/generator.py", line 70, in forward
x = self.t7(x)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/container.py", line 139, in forward
input = module(input)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 916, in forward
return F.conv_transpose2d(
RuntimeError: Given transposed=1, weight of size [512, 256, 4, 4], expected input[7, 4000, 1, 1] to have 512 channels, but got 4000 channels instead
You have a "gap" between layer t6 and t7 of your generatorG:
# ...
self.t6 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=4000, kernel_size=(4, 4)),
nn.BatchNorm2d(4000),
nn.ReLU()
)
self.t7 = nn.Sequential(
nn.ConvTranspose2d(in_channels = 512, out_channels = 256, kernel_size =4, stride = 2, padding = 1),
nn.BatchNorm2d(256),
nn.ReLU()
)
# ...
Your t6 layer expects the input to have 512 channels and outputs a tensor with 4000 channels. However, the next layer, t7, expects the input to have only 512 channels.
You need to adjust either t6 and t7 such that t6 will output exactly the same number of channels t7 is expecting. That is t6's out_channles ,just equal t7's in_channels.

Resources