DCGAN error: Using a target size (torch.Size([128])) that is different to the input size (torch.Size([128, 1, 5, 5])) is deprecated - image-processing

I keep getting this error when trying to run a DCGAN using the EMNIST dataset, I'm fairly new to this and struggling to debug the issue
class Generator(nn.Module):
def __init__(self, ngpu):
super(Generator, self).__init__()
self.ngpu = ngpu
self.main = nn.Sequential(
# input is Z, going into a convolution
nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False),
nn.BatchNorm2d(ngf * 8),
nn.ReLU(True),
# state size. (ngf*8) x 4 x 4
nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 4),
nn.ReLU(True),
# state size. (ngf*4) x 8 x 8
nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 2),
nn.ReLU(True),
# state size. (ngf*2) x 16 x 16
nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf),
nn.ReLU(True),
# state size. (ngf) x 32 x 32
nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False),
nn.Tanh()
# state size. (nc) x 64 x 64
)
def forward(self, input):
return self.main(input)
class Discriminator(nn.Module):
def __init__(self, ngpu):
super(Discriminator, self).__init__()
self.ngpu = ngpu
self.main = nn.Sequential(
# input is (nc) x 64 x 64
nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf) x 32 x 32
nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*2) x 16 x 16
nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*4) x 8 x 8
#nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
#nn.BatchNorm2d(ndf * 8),
#nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*8) x 4 x 4
nn.Conv2d(ndf * 4, 1, 4, 1, 0, bias=False),
nn.Sigmoid()
)
def forward(self, input):
return self.main(input)
# Lists to keep track of progress
img_list = []
G_losses = []
D_losses = []
iters = 0
print("Starting Training Loop...")
# For each epoch
for epoch in range(num_epochs):
# For each batch in the dataloader
for i, data in enumerate(dataloader, 0):
############################
# (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
###########################
## Train with all-real batch
netD.zero_grad()
# Format batch
real_cpu = data[0].to(device)
b_size = real_cpu.size(0)
label = torch.full((b_size,), real_label, dtype=torch.float, device=device)
# Forward pass real batch through D
output = netD(real_cpu).view(-1)
# Calculate loss on all-real batch
errD_real = criterion(output, label)
# Calculate gradients for D in backward pass
errD_real.backward()
D_x = output.mean().item()
## Train with all-fake batch
# Generate batch of latent vectors
noise = torch.randn(b_size, nz, 1, 1, device=device)
# Generate fake image batch with G
fake = netG(noise)
label.fill_(fake_label)
# Classify all fake batch with D
output = netD(fake.detach()).view(-1)
# Calculate D's loss on the all-fake batch
errD_fake = criterion(output, label)
# Calculate the gradients for this batch, accumulated (summed) with previous gradients
errD_fake.backward()
D_G_z1 = output.mean().item()
# Compute error of D as sum over the fake and the real batches
errD = errD_real + errD_fake
# Update D
optimizerD.step()
############################
# (2) Update G network: maximize log(D(G(z)))
###########################
netG.zero_grad()
label.fill_(real_label) # fake labels are real for generator cost
# Since we just updated D, perform another forward pass of all-fake batch through D
output = netD(fake).view(-1)
# Calculate G's loss based on this output
errG = criterion(output, label)
# Calculate gradients for G
errG.backward()
D_G_z2 = output.mean().item()
# Update G
optimizerG.step()
# Output training stats
if i % 50 == 0:
print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
% (epoch, num_epochs, i, len(dataloader),
errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
# Save Losses for plotting later
G_losses.append(errG.item())
D_losses.append(errD.item())
# Check how the generator is doing by saving G's output on fixed_noise
if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):
with torch.no_grad():
fake = netG(fixed_noise).detach().cpu()
img_list.append(vutils.make_grid(fake, padding=2, normalize=True))
iters += 1
I'm wanting to do this with just the conv2d layers that are in use and I have tried using the .squeeze function to get it working but had no luck

Related

Deep Convolutional GAN (DCGAN) works really well on one set of data but not another

I am working on using PyTorch to create a DCGAN to use to generate trajectory data for a robot based on a dataset of 20000 other simple paths.
The DCGAN works great on the MNIST dataset, but does not work well on my custom dataset. I am trying to tweak/tune the DCGAN to give good results after being trained with my custom dataset.
Below is an example of output from the GAN (top), along with example training data for MNIST (bottom) after 20 epochs. The loss for the Generator and Discriminator each plateau at around 0.7.
Below is the output for my custom trajectory dataset after a similar number of epochs. The top figure shows the output, and bottom figure shows the training set of the batch.
It is clear that the same GAN is much better at making predictions for the MNIST dataset then for my custom dataset. It is interesting to note that the Discriminator and Generator losses also plateau at similar values of about 0.7 for this dataset too. This makes me think that there is some limit to the network of how low the loss can go.
Discriminator Code:
class Discriminator(nn.Module):
def __init__(self, channels_img, features_d):
super(Discriminator, self).__init__()
self.disc = nn.Sequential(
# input: N x channels_img x 64 x 64
nn.Conv2d(
channels_img, features_d, kernel_size=4, stride=2, padding=1
),
nn.LeakyReLU(0.2),
# _block(in_channels, out_channels, kernel_size, stride, padding)
self._block(features_d, features_d * 2, 4, 2, 1),
self._block(features_d * 2, features_d * 4, 4, 2, 1),
self._block(features_d * 4, features_d * 8, 4, 2, 1),
# After all _block img output is 4x4 (Conv2d below makes into 1x1)
nn.Conv2d(features_d * 8, 1, kernel_size=4, stride=2, padding=0),
nn.Sigmoid(),
)
def _block(self, in_channels, out_channels, kernel_size, stride, padding):
return nn.Sequential(
nn.Conv2d(
in_channels,
out_channels,
kernel_size,
stride,
padding,
bias=False,
),
nn.BatchNorm2d(out_channels),
nn.LeakyReLU(0.2),
)
def forward(self, x):
return self.disc(x)
Generator Code:
class Generator(nn.Module):
def __init__(self, channels_noise, channels_img, features_g):
super(Generator, self).__init__()
self.net = nn.Sequential(
# Input: N x channels_noise x 1 x 1
self._block(channels_noise, features_g * 16, 4, 1, 0), # img: 4x4
self._block(features_g * 16, features_g * 8, 4, 2, 1), # img: 8x8
self._block(features_g * 8, features_g * 4, 4, 2, 1), # img: 16x16
self._block(features_g * 4, features_g * 2, 4, 2, 1), # img: 32x32
nn.ConvTranspose2d(
features_g * 2, channels_img, kernel_size=4, stride=2, padding=1
),
# Output: N x channels_img x 64 x 64
nn.Tanh(),
)
def _block(self, in_channels, out_channels, kernel_size, stride, padding):
return nn.Sequential(
nn.ConvTranspose2d(
in_channels,
out_channels,
kernel_size,
stride,
padding,
bias=False,
),
nn.BatchNorm2d(out_channels),
nn.ReLU(),
)
def forward(self, x):
return self.net(x)
Training loop:
opt_gen = optim.Adam(gen.parameters(), lr=LEARNING_RATE_GEN, betas=(0.5, 0.999))
opt_disc = optim.Adam(disc.parameters(), lr=LEARNING_RATE_DISC, betas=(0.5, 0.999))
criterion = nn.BCELoss()
for epoch in range(NUM_EPOCHS):
# Target labels not needed! <3 unsupervised
# for batch_idx, (real, _) in enumerate(dataloader):
for batch_idx, real in enumerate(dataloader):
real = real.to(device)
noise = torch.randn(BATCH_SIZE, NOISE_DIM, 1, 1).to(device)
fake = gen(noise)
### Train Discriminator: max log(D(x)) + log(1 - D(G(z)))
disc_real = disc(real.float()).reshape(-1)
loss_disc_real = criterion(disc_real, torch.ones_like(disc_real))
disc_fake = disc(fake.detach()).reshape(-1)
loss_disc_fake = criterion(disc_fake, torch.zeros_like(disc_fake))
loss_disc = (loss_disc_real + loss_disc_fake) / 2
disc.zero_grad()
loss_disc.backward()
opt_disc.step()
### Train Generator: min log(1 - D(G(z))) <-> max log(D(G(z))
output = disc(fake).reshape(-1)
loss_gen = criterion(output, torch.ones_like(output))
gen.zero_grad()
loss_gen.backward()
opt_gen.step()
# Print losses occasionally and print to tensorboard
if batch_idx % 100 == 0:
print(
f"Epoch [{epoch}/{NUM_EPOCHS}] Batch {batch_idx}/{len(dataloader)} \
Loss D: {loss_disc:.4f}, loss G: {loss_gen:.4f}"
)
with torch.no_grad():
fake = gen(fixed_noise)
# take out (up to) 32 examples
img_grid_real = torchvision.utils.make_grid(
real[:BATCH_SIZE], normalize=True
)
img_grid_fake = torchvision.utils.make_grid(
fake[:BATCH_SIZE], normalize=True
)
writer_real.add_image("Real", img_grid_real, global_step=step)
writer_fake.add_image("Fake", img_grid_fake, global_step=step)
step += 1
(Not enough reputation to comment yet, so I'll reply)
Since your code works fine on the MNIST dataset, the main problem here is that the trajectories in your training data are practically imperceptible, even to a human observer. Note that the trajectory lines are much thinner than the digit lines in the MNIST dataset.

how to build a generator and discriminator for a DCGAN with images of size 256x256

I have the following generators and discriminators for a DCGAN with images of size 128x128, it works excellent.
However, I would like to use the same code to generate images with a size of 256x256, but I cannot build the generators and discriminators.
# direccion del directorio de entrenamiento
dataroot = "./dataset 128x128"
# Number of workers for dataloader
workers = 6
# Batch size during training
batch_size = 1
# Spatial size of training images. All images will be resized to this
# size using a transformer.
image_size = 128
# Number of channels in the training images. For color images this is 3
nc = 3
# Size of z latent vector (i.e. size of generator input)
nz = 100
# Size of feature maps in generator
ngf = 32
# Size of feature maps in discriminator
ndf = 32
# Number of training epochs
num_epochs = 20
# Learning rate for optimizers
lr = 0.0002
# Beta1 hyperparam for Adam optimizers
beta1 = 0.5
# Number of GPUs available. Use 0 for CPU mode.
ngpu = 2
print("Dataset done")
# Generator Code
class Generator(nn.Module):
def __init__(self, ngpu):
super(Generator, self).__init__()
self.ngpu = ngpu
self.main = nn.Sequential(
# input is Z, going into a convolution
nn.ConvTranspose2d( nz, ngf * 16, 4, 1, 0, bias=False),
nn.BatchNorm2d(ngf * 16),
nn.ReLU(True),
# state size. (ngf*16) x 4 x 4
nn.ConvTranspose2d(ngf * 16, ngf * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 8),
nn.ReLU(True),
# state size. (ngf*8) x 8 x 8
nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 4),
nn.ReLU(True),
# state size. (ngf*4) x 16 x 16
nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 2),
nn.ReLU(True),
# state size. (ngf*2) x 32 x 32
nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf),
nn.ReLU(True),
# state size. (ngf) x 64 x 64
nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False),
nn.Tanh()
# state size. (nc) x 128 x 128
)
def forward(self, input):
return self.main(input)
class Discriminator(nn.Module):
def __init__(self, ngpu):
super(Discriminator, self).__init__()
self.ngpu = ngpu
self.main = nn.Sequential(
# input is (nc) x 128 x 128
nn.Conv2d(nc, ndf, 4, stride=2, padding=1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf) x 64 x 64
nn.Conv2d(ndf, ndf * 2, 4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*2) x 32 x 32
nn.Conv2d(ndf * 2, ndf * 4, 4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*4) x 16 x 16
nn.Conv2d(ndf * 4, ndf * 8, 4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(ndf * 8),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*8) x 8 x 8
nn.Conv2d(ndf * 8, ndf * 16, 4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(ndf * 16),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*16) x 4 x 4
nn.Conv2d(ndf * 16, 1, 4, stride=1, padding=0, bias=False),
nn.Sigmoid()
# state size. 1
)
def forward(self, input):
return self.main(input)
# Lists to keep track of progress
img_list = []
G_losses = []
D_losses = []
iters = 0
print("Starting Training Loop...")
# For each epoch
for epoch in range(num_epochs):
# For each batch in the dataloader
for i, data in enumerate(dataloader, 0):
############################
# (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
###########################
## Train with all-real batch
netD.zero_grad()
# Format batch
real_cpu = data[0].to(device)
b_size = real_cpu.size(0)
label = torch.full((b_size,), real_label, dtype=torch.float, device=device)
# Forward pass real batch through D
output = netD(real_cpu).view(-1)
# Calculate loss on all-real batch
errD_real = criterion(output, label)
# Calculate gradients for D in backward pass
errD_real.backward()
D_x = output.mean().item()
## Train with all-fake batch
# Generate batch of latent vectors
noise = torch.randn(b_size, nz, 1, 1, device=device)
# Generate fake image batch with G
fake = netG(noise)
label.fill_(fake_label)
# Classify all fake batch with D
output = netD(fake.detach()).view(-1)
# Calculate D's loss on the all-fake batch
errD_fake = criterion(output, label)
# Calculate the gradients for this batch, accumulated (summed) with previous gradients
errD_fake.backward()
D_G_z1 = output.mean().item()
# Compute error of D as sum over the fake and the real batches
errD = errD_real + errD_fake
# Update D
optimizerD.step()
############################
# (2) Update G network: maximize log(D(G(z)))
###########################
netG.zero_grad()
label.fill_(real_label) # fake labels are real for generator cost
# Since we just updated D, perform another forward pass of all-fake batch through D
output = netD(fake).view(-1)
# Calculate G's loss based on this output
errG = criterion(output, label)
# Calculate gradients for G
errG.backward()
D_G_z2 = output.mean().item()
# Update G
optimizerG.step()
# Output training stats
if i % 50 == 0:
print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
% (epoch, num_epochs, i, len(dataloader),
errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
# Save Losses for plotting later
G_losses.append(errG.item())
D_losses.append(errD.item())
# Check how the generator is doing by saving G's output on fixed_noise
if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):
with torch.no_grad():
fake = netG(fixed_noise).detach().cpu()
img_list.append(vutils.make_grid(fake, padding=2, normalize=True))
iters += 1
How to modify those generators and discriminators for an image of size 256x256?

Working with customised Alexnet for face recognition

I am currently trying to fit my customised cnn model (Alexnet) with the input shape of (224, 224, 1) as I have the the image shape of 224 x 224 and I am dealing with black and white image.
So this is where I am trying to load the data and then get I got the dataset sizes such as the number of samples, features, and height and widths of the images, and finally the number of classes
lfw_people = fetch_lfw_people(min_faces_per_person = 70, resize = 2.39)
n_samples, h, w = lfw_people.images.shape
X = lfw_people.data
n_features = X.shape[1]
y = lfw_people.target
target_names = lfw_people.target_names
n_classes = target_names.shape[0]
after that, I splitted the data using the train test split and reshape with height and width of the image and then I reduced the height to the same length as width which makes it 224 x 224. The result of the counts of y_train and y test is
y_train Count: Counter({3: 384, 1: 176, 6: 108, 2: 94, 4: 84, 0: 64, 5: 56})
y_test Count: Counter({3: 146, 1: 60, 6: 36, 2: 27, 4: 25, 5: 15, 0: 13})
and then I am trying to convert both y_train and y_test to categorical which 7 classes
y_train = to_categorical(
y_train,
num_classes = len(set(y)),
dtype = 'uint8'
)
y_test = to_categorical(
y_test,
num_classes = len(set(y)),
dtype = 'uint8'
)
here is my code for my model where it has the 8 layers in total:
model = Sequential()
# 1st Convolutional Layer
model.add(Conv2D(filters = 96, input_shape = (224, 224, 1),
kernel_size = (11, 11), strides = (4, 4),
padding = 'valid'))
model.add(Activation('relu'))
#Max-Pooling
model.add(MaxPooling2D(pool_size = (2, 2),
strides = (2, 2), padding = 'valid'))
# Batch Normalisation
model.add(BatchNormalization())
# 2nd Convolutional Layer
model.add(Conv2D(filters = 256, kernel_size = (11, 11),
strides = (1, 1), padding = 'valid'))
model.add(Activation('relu'))
# Max-Pooling
model.add(Activation('relu'))
# Batch Normalisation
model.add(BatchNormalization())
# 3rd Convolutional Layer
model.add(Conv2D(filters = 384, kernel_size = (3, 3),
strides = (1, 1), padding = 'valid'))
model.add(Activation('relu'))
# Batch Normalization
model.add(BatchNormalization())
# 4th Convolutional Lauer
model.add(Conv2D(filters = 384, kernel_size = (3, 3),
strides = (1, 1), padding = 'valid'))
model.add(Activation('relu'))
# Batch Normalisation
model.add(BatchNormalization())
# 5th Convolutional Layer
model.add(Conv2D(filters = 256, kernel_size = (3, 3),
strides = (1, 1), padding = 'valid'))
model.add(Activation('relu'))
# Max-pooling
model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2),
padding = 'valid'))
# Batch Normalisation
model.add(BatchNormalization())
# Flattening
model.add(Flatten())
# 1st Dense Layer
model.add(Dense(4096, input_shape = (224*224*1, )))
model.add(Activation('relu'))
# Add Dropout to prevent overfitting
model.add(Dropout(0.4))
# Batch Normalisatoin
model.add(BatchNormalization())
# 2nd Dense Layer
model.add(Dense(4096))
model.add(Activation('relu'))
#Add Dropout
model.add(Dropout(0.4))
# Batch Normalisation
model.add(BatchNormalization())
# output softmax layer
model.add(Dense(7))
model.add(Activation('softmax'))
This is my augmentation where I am trying to generate the image
# Data Augmentation
datagen = ImageDataGenerator(
featurewise_center = True, # set input mean to 0 over the dataset
samplewise_center = True, # set each sample mean to 0
featurewise_std_normalization = True, # divide inputs by std of the dataset
samplewise_std_normalization = True, # divide each inputs by its std
zca_whitening = False, # dimension reduction
rotation_range = 20, # randomly rotate images in the range 5 degrees
zoom_range = 0.1, # Randomly zoom image 10%
width_shift_range = 0.2, # randomly shift images horizontally 10%
height_shift_range = 0.2, # randomly shift images vertically 10%
horizontal_flip = True, # randomly flip images
# vertical_flip = 0.2 # Random flip images
vertical_flip = 0.8
)
history = model.fit(datagen.flow(X_train, y_train, batch_size = batch_size),
epochs = 100, validation_data = (X_test, y_test),
steps_per_epoch = X_train.shape[0] // batch_size,
verbose = 0)
After I ran the model, it gave me the validation accuracy of 0.45 and this my confusion matrix which tells me that it kept predicting 'class 3'
| 0 1 2 3 4 5 6
---------------------------------
0| 0 0 0 13 0 0 0
1| 0 0 0 60 0 0 0
2| 0 0 0 27 0 0 0
3| 0 0 0 146 0 0 0
4| 0 0 0 25 0 0 0
5| 0 0 0 15 0 0 0
6| 0 0 0 36 0 0 0
So how to make it predict classes other than 3?

Pytorch DCGAN example for kernel 3

I tried out the pytorch dcgan example and it worked fine but when I tried to change the kernel from 4x4 to 3x3. I only changed the kernel and It gave the following error. Why this error is occurring? To solve this what changes are needed?
ValueError: Using a target size (torch.Size([64])) that is different to the input size (torch.Size([256])) is deprecated. Please ensure they have the same size.
Here is the generator:
class Generator(nn.Module):
def __init__(self, ngpu):
super(Generator, self).__init__()
self.ngpu = ngpu
self.main = nn.Sequential(
# input is Z, going into a convolution
nn.ConvTranspose2d( nz, ngf * 8, kernel_size, 1, 0, bias=False),
nn.BatchNorm2d(ngf * 8),
nn.ReLU(True),
# state size. (ngf*8) x 4 x 4
nn.ConvTranspose2d(ngf * 8, ngf * 4, kernel_size, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 4),
nn.ReLU(True),
# state size. (ngf*4) x 8 x 8
nn.ConvTranspose2d( ngf * 4, ngf * 2, kernel_size, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 2),
nn.ReLU(True),
# state size. (ngf*2) x 16 x 16
nn.ConvTranspose2d( ngf * 2, ngf, kernel_size, 2, 1, bias=False),
nn.BatchNorm2d(ngf),
nn.ReLU(True),
# state size. (ngf) x 32 x 32
nn.ConvTranspose2d( ngf, nc, kernel_size, 2, 1, bias=False),
nn.Tanh()
# state size. (nc) x 64 x 64
)
def forward(self, input):
return self.main(input)
discriminator code:
class Discriminator(nn.Module):
def __init__(self, ngpu):
super(Discriminator, self).__init__()
self.ngpu = ngpu
self.main = nn.Sequential(
# input is (nc) x 64 x 64
nn.Conv2d(nc, ndf, kernel_size, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf) x 32 x 32
nn.Conv2d(ndf, ndf * 2, kernel_size, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*2) x 16 x 16
nn.Conv2d(ndf * 2, ndf * 4, kernel_size, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*4) x 8 x 8
nn.Conv2d(ndf * 4, ndf * 8, kernel_size, 2, 1, bias=False),
nn.BatchNorm2d(ndf * 8),
nn.LeakyReLU(0.2, inplace=True),
# state size. (ndf*8) x 4 x 4
nn.Conv2d(ndf * 8, 1, kernel_size, 1, 0, bias=False),
# 1x1x1
nn.Sigmoid()
)
def forward(self, input):
return self.main(input)
The error showing line is:
In the training loop
# Calculate loss on all-real batch
errD_real = criterion(output, label)
Here criterion = nn.BCELoss()
Whole training loop:
img_list = []
G_losses = []
D_losses = []
iters = 0
# For each epoch
for epoch in range(num_epochs):
# For each batch in the dataloader
for i, data in enumerate(dataloader, 0):
############################
# (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
###########################
## Train with all-real batch
netD.zero_grad()
# Format batch
real_cpu = data[0].to(device)
#real_cpu = (data.unsqueeze(dim=1).type(torch.FloatTensor)).to(device)
b_size = real_cpu.size(0)
label = torch.full((b_size,), real_label,dtype=torch.float, device=device, )
# Forward pass real batch through D
output = netD(real_cpu).view(-1)
# Calculate loss on all-real batch
errD_real = criterion(output, label)#ERROR
# Calculate gradients for D in backward pass
errD_real.backward()
D_x = output.mean().item()
## Train with all-fake batch
# Generate batch of latent vectors
noise = torch.randn(b_size, nz, 1, 1, device=device)
# Generate fake image batch with G
fake = netG(noise)
label.fill_(fake_label)
# Classify all fake batch with D
output = netD(fake.detach()).view(-1)
# Calculate D's loss on the all-fake batch
errD_fake = criterion(output, label)
# Calculate the gradients for this batch
errD_fake.backward()
D_G_z1 = output.mean().item()
# Add the gradients from the all-real and all-fake batches
errD = errD_real + errD_fake
# Update D
optimizerD.step()
############################
# (2) Update G network: maximize log(D(G(z))) using 'log D' trick
###########################
netG.zero_grad()
label.fill_(real_label) # fake labels are real for generator cost
# Since we just updated D, perform another forward pass of all-fake batch through D
output = netD(fake).view(-1)
# Calculate G's loss based on this output
errG = criterion(output, label)
# Calculate gradients for G
errG.backward()
D_G_z2 = output.mean().item()
# Update G
optimizerG.step()
# Output training stats
if i % 50 == 0:
print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
% (epoch, num_epochs, i, len(dataloader),
errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
# Save Losses for plotting later
G_losses.append(errG.item())
D_losses.append(errD.item())
# Check how the generator is doing by saving G's output on fixed_noise
if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):
with torch.no_grad():
fake = netG(fixed_noise).detach().cpu()
img_list.append(vutils.make_grid(fake, padding=2, normalize=True))
iters += 1

TensorFlow (Neural Network) FC output size

Not sure whether my question is TF specific or just NNs in general but i have created a CNN using tensorflow. and im having trouble understanding why the size of the output on my fully connected layer is what it is.
X = tf.placeholder(tf.float32, [None, 32, 32, 3])
y = tf.placeholder(tf.int64, [None])
is_training = tf.placeholder(tf.bool)
# define model
def complex_model(X,y,is_training):
# conv layer
wconv_1 = tf.get_variable('wconv_1', [7 ,7 ,3, 32])
bconv_1 = tf.get_variable('bconv_1', [32])
# affine layer 1
w1 = tf.get_variable('w1', [26*26*32//4, 1024]) #LINE 13
b1 = tf.get_variable('b1', [1024])
# batchnorm params
bn_gamma = tf.get_variable('bn_gamma', shape=[32]) #scale
bn_beta = tf.get_variable('bn_beta', shape=[32] ) #shift
# affine layer 2
w2 = tf.get_variable('w2', [1024, 10])
b2 = tf.get_variable('b2', [10])
c1_out = tf.nn.conv2d(X, wconv_1, strides=[1, 1, 1, 1], padding="VALID") + bconv_1
activ_1 = tf.nn.relu(c1_out)
mean, var = tf.nn.moments(activ_1, axes=[0,1,2], keep_dims=False)
bn = tf.nn.batch_normalization(act_1, mean, var, bn_gamma, bn_beta, 1e-6)
mp = tf.nn.max_pool(bn, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
affine_in_flat = tf.reshape(mp, [-1, 26*26*32//4])
affine_1 = tf.matmul(affine_in_flat, w1) + b1
activ_2 = tf.nn.relu(affine_1)
affine_2 = tf.matmul(activ_2, w2) + b2
return affine_2
#print(affine_2.shape)
In line 13 where i set the value of w1 i would have expected to just put:
w1 = tf.get_variable('w1', [26*26*32, 1024])
however if i run the code with the line shown above and with
affine_in_flat = tf.reshape(mp, [-1, 26*26*32])
my output size is 16,10 instead of 64,10 which is what i would expect given the initialisations below:
x = np.random.randn(64, 32, 32,3)
with tf.Session() as sess:
with tf.device("/cpu:0"): #"/cpu:0" or "/gpu:0"
tf.global_variables_initializer().run()
#print("train", x.size, is_training, y_out)
ans = sess.run(y_out,feed_dict={X:x,is_training:True})
%timeit sess.run(y_out,feed_dict={X:x,is_training:True})
print(ans.shape)
print(np.array_equal(ans.shape, np.array([64, 10])))
can anybody tell me why i need to divide the size of w1[0] by 4?
Adding print statements for bn and mp I get:
bn: <tf.Tensor 'batchnorm/add_1:0' shape=(?, 26, 26, 32) dtype=float32>
mp: <tf.Tensor 'MaxPool:0' shape=(?, 13, 13, 32) dtype=float32>
Which would seem to be due to the strides=[1, 2, 2, 1] on the max pooling (but to maintain 26, 26 you'd also need padding='SAME').

Resources