Missing Keys in state_dict - image-processing

I am having problems loading my model on google colab. here is the code:
I have attached the code below
I have tried changing the name of the statedict and it does not help
basically, I am trying to save my model for later use, but, this is becoming extremely difficult since I am not being able to properly save and load it. Please help me with the problem. After the section of the code, you will also find the error that I have attached below.
here is the code
from zipfile import ZipFile
file_name = 'data.zip'
with ZipFile(file_name, 'r') as zip:
zip.extractall()
from zipfile import ZipFile
file_name = 'results.zip'
with ZipFile(file_name, 'r') as zip:
zip.extractall()
!pip install tensorflow-gpu
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable
batchSize = 64
imageSize = 64
transform = transforms.Compose([transforms.Resize(imageSize), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),])
dataset = dset.CIFAR10(root = './data', download = True, transform = transform)
dataloader = torch.utils.data.DataLoader(dataset, batch_size = batchSize, shuffle = True, num_workers = 2)
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
m.weight.data.normal_(0.0, 0.02)
elif classname.find('BatchNorm') != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
class G(nn.Module):
def __init__(self):
super(G, self).__init__()
self.main = nn.Sequential(
nn.ConvTranspose2d(100, 512, 4, 1, 0, bias = False),
nn.BatchNorm2d(512),
nn.ReLU(True),
nn.ConvTranspose2d(512, 256, 4, 2, 1, bias = False),
nn.BatchNorm2d(256),
nn.ReLU(True),
nn.ConvTranspose2d(256, 128, 4, 2, 1, bias = False),
nn.BatchNorm2d(128),
nn.ReLU(True),
nn.ConvTranspose2d(128, 64, 4, 2, 1, bias = False),
nn.BatchNorm2d(64),
nn.ReLU(True),
nn.ConvTranspose2d(64, 3, 4, 2, 1, bias = False),
nn.Tanh()
)
def forward(self, input):
output = self.main(input)
return output
netG = G()
netG.load_state_dict(torch.load('generator.pth'))
netG.eval()
#netG.apply(weights_init)
class D(nn.Module):
def __init__(self):
super(D, self).__init__()
self.main = nn.Sequential(
nn.Conv2d(3, 64, 4, 2, 1, bias = False),
nn.LeakyReLU(0.2, inplace = True),
nn.Conv2d(64, 128, 4, 2, 1, bias = False),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, inplace = True),
nn.Conv2d(128, 256, 4, 2, 1, bias = False),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2, inplace = True),
nn.Conv2d(256, 512, 4, 2, 1, bias = False),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2, inplace = True),
nn.Conv2d(512, 1, 4, 1, 0, bias = False),
nn.Sigmoid()
)
def forward(self, input):
output = self.main(input)
return output.view(-1)
netD = D()
netD.load_state_dict(torch.load('discriminator.pth'))
netD.eval()
#netD.apply(weights_init)
criterion = nn.BCELoss()
checkpoint = torch.load('discriminator.pth')
optimizerD = optim.Adam(netD.parameters(), lr = 0.0002, betas = (0.5, 0.999))
optimizerD.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
errD = checkpoint['loss']
checkpoint1 = torch.load('genrator.pth')
optimizerG = optim.Adam(netG.parameters(), lr = 0.0002, betas = (0.5, 0.999))
optimizerG.load_state_dict(checkpoint1['optimizer_state_dict'])
errG = checkpoint1['loss']
k = epoch
for j in range(k, 10):
for i, data in enumerate(dataloader, 0):
netD.zero_grad()
real, _ = data
input = Variable(real)
target = Variable(torch.ones(input.size()[0]))
output = netD(input)
errD_real = criterion(output, target)
noise = Variable(torch.randn(input.size()[0], 100, 1, 1))
fake = netG(noise)
target = Variable(torch.zeros(input.size()[0]))
output = netD(fake.detach())
errD_fake = criterion(output, target)
errD = errD_real + errD_fake
errD.backward()
optimizerD.step()
netG.zero_grad()
target = Variable(torch.ones(input.size()[0]))
output = netD(fake)
errG = criterion(output, target)
errG.backward()
optimizerG.step()
print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f' % (epoch+1, 10, i+1, len(dataloader), errD.data, errG.data))
if i % 100 == 0:
vutils.save_image(real, '%s/real_samples.png' % "./results", normalize = True)
fake = netG(noise)
vutils.save_image(fake.data, '%s/fake_samples_epoch_%03d.png' % ("./results", epoch+1), normalize = True)
torch.save({
'epoch': epoch,
'model_state_dict': netD.state_dict(),
'optimizer_state_dict': optimizerD.state_dict(),
'loss': errD
}, 'discriminator.pth')
torch.save({
'epoch': epoch,
'model_state_dict': netG.state_dict(),
'optimizer_state_dict': optimizerG.state_dict(),
'loss': errG
}, 'generator.pth')
here is the error
RuntimeError Traceback (most recent call last)
<ipython-input-23-3e55546152c7> in <module>()
26 # Creating the generator
27 netG = G()
---> 28 netG.load_state_dict(torch.load('generator.pth'))
29 netG.eval()
30 #netG.apply(weights_init)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in load_state_dict(self, state_dict, strict)
767 if len(error_msgs) > 0:
768 raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
--> 769 self.__class__.__name__, "\n\t".join(error_msgs)))
770
771 def _named_members(self, get_members_fn, prefix='', recurse=True):
RuntimeError: Error(s) in loading state_dict for G:
Missing key(s) in state_dict: "main.0.weight", "main.1.weight", "main.1.bias", "main.1.running_mean", "main.1.running_var", "main.3.weight", "main.4.weight", "main.4.bias", "main.4.running_mean", "main.4.running_var", "main.6.weight", "main.7.weight", "main.7.bias", "main.7.running_mean", "main.7.running_var", "main.9.weight", "main.10.weight", "main.10.bias", "main.10.running_mean", "main.10.running_var", "main.12.weight".
Unexpected key(s) in state_dict: "epoch", "model_state_dict", "optimizer_state_dict", "loss".

You need to access the 'model_state_dict' key inside the loaded checkpoint.
Try:
netG.load_state_dict(torch.load('generator.pth')['model_state_dict'])
You'll probably need to apply the same fix to the discriminator as well.

Related

How to solve the dimension error and effectively use Conv2dTranspose?

i have created a discriminator and generator file to implement GAN, however, i am facing this error.
The initial error i was facing was in the main.py file where i am calling the criterion library and passing the output and label. I solved that error using squeeze function, so that the issue of shape was resolved.
Before using squeeze , an error showed that the shapes of output and labels were not matching ( the shapes were (7,1,1,1) and (7) for the output and the label respectively.
import torch
from torch import nn
class generatorG(nn.Module):
def __init__(self):
super(generatorG, self).__init__()
self.t1 = nn.Sequential(
nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size= (4,4), stride = 2,padding = 1),
nn.LeakyReLU(0.2, inplace = True)
)
self.t2 = nn.Sequential(
nn.Conv2d(in_channels= 64, out_channels = 64, kernel_size = (4,4), stride = 2,padding = 1),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.2, inplace = True)
)
self.t3 = nn.Sequential(
nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 4, stride = 2, padding =1),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, inplace = True)
)
self.t4 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(4, 4), stride = 2,padding=1),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2, inplace=True)
)
self.t5 = nn.Sequential(
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2, inplace=True)
)
self.t6 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=4000, kernel_size=(4, 4)),
nn.BatchNorm2d(4000),
nn.ReLU()
)
self.t7 = nn.Sequential(
nn.ConvTranspose2d(in_channels = 512, out_channels = 256, kernel_size =4, stride = 2, padding = 1),
nn.BatchNorm2d(256),
nn.ReLU()
)
self.t8 = nn.Sequential(
nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(128),
nn.ReLU()
)
self.t9 = nn.Sequential(
nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(64),
nn.ReLU()
)
self.t10 = nn.Sequential(
nn.ConvTranspose2d(in_channels=64, out_channels=3, kernel_size=4, stride=2, padding=1),
nn.Tanh()
)
def forward(self, x):
x = self.t1(x)
x = self.t2(x)
x = self.t3(x)
x = self.t4(x)
x = self.t5(x)
x = self.t6(x)
x = self.t7(x)
x = self.t8(x)
x = self.t9(x)
x = self.t10(x)
return x
model = generatorG()
print(model(torch.randn()).shape)
Discriminator File
import torch
from torch import nn
class DiscriminatorD(nn.Module):
def __init__(self):
super(DiscriminatorD, self).__init__()
self.t1 = nn.Sequential(
nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size =4, stride = 2, padding = 1),
nn.LeakyReLU(0.2, inplace = True)
)
self.t2 = nn.Sequential(
nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 4, stride = 2, padding = 1),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, inplace = True)
)
self.t3 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2, inplace=True)
)
self.t4 = nn.Sequential(
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2, inplace=True)
)
self.t5 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=1, kernel_size=4, stride=1, padding=0),
nn.Sigmoid()
)
def forward(self, x):
x = self.t1(x)
x = self.t2(x)
x = self.t3(x)
x = self.t4(x)
x = self.t5(x)
return x
main.py file
from generator import *
from discriminator import *
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable
import utils
epochs = 100
Batch_Size = 64
lr = 0.0002
beta1 = 0.5
over = 4
parser = argparse.ArgumentParser()
parser.add_argument('--dataroot', default = 'dataset/train', help = 'path to dataset')
opt = parser.parse_args()
try:
os.makedirs('result/train/cropped')
os.makedirs('result/train/real')
os.makedirs('result/train/recon')
os.makedirs('model/')
except:
pass
transform = transforms.Compose([transforms.Scale(128),
transforms.CenterCrop(128),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
dataset = dset.ImageFolder(root=opt.dataroot, transform= transform)
assert dataset
dataloader = torch.utils.data.DataLoader(dataset, batch_size=Batch_Size, shuffle=True, num_workers=0)
wtl2 = 0.999
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv')!=-1:
m.weight.data.normal_(0.0,0.2)
elif classname.find('BatchNorm')!=-1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
resume_epoch = 0
netG = generatorG()
netG.apply(weights_init)
netD = DiscriminatorD()
netD.apply(weights_init)
criterion = nn.BCELoss()
criterionMSE = nn.MSELoss()
input_real = torch.FloatTensor(Batch_Size, 3, 128, 128)
input_cropped = torch.FloatTensor(Batch_Size, 3, 128, 128)
label = torch.FloatTensor(Batch_Size)
real_label = 1
fake_label = 0
real_center = torch.FloatTensor(Batch_Size, 3, 64, 64)
input_real = Variable(input_real)
input_cropped = Variable(input_cropped)
label = Variable(label)
real_center = Variable(real_center)
optimizerD = optim.Adam(netD.parameters(), lr = lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr = lr, betas = (beta1, 0.999))
over = 4
for epoch in range(resume_epoch, epochs):
for i, data in enumerate(dataloader, 0):
real_cpu, _ = data
real_center_cpu = real_cpu[:,:,int(128/4):int(128/4)+int(128/2),int(128/4):int(128/4)+int(128/2)]
batch_size = real_cpu.size(0)
with torch.no_grad():
input_real.resize_(real_cpu.size()).copy_(real_cpu)
input_cropped.resize_(real_cpu.size()).copy_(real_cpu)
real_center.resize_(real_center_cpu.size()).copy_(real_center_cpu)
input_cropped[:, 0, int(128 / 4 + over):int(128 / 4 + 128 / 2 - over),int(128 / 4 + over):int(128 / 4 + 128 / 2 - over)] = 2 * 117.0 / 255.0 - 1.0
input_cropped[:, 1, int(128 / 4 + over):int(128 / 4 + 128 / 2 - over),int(128 / 4 + over):int(128 / 4 + 128 / 2 - over)] = 2 * 104.0 / 255.0 - 1.0
input_cropped[:, 2, int(128 / 4 + over):int(128 / 4 + 128 / 2 - over),int(128 / 4 + over):int(128 / 4 + 128 / 2 - over)] = 2 * 123.0 / 255.0 - 1.0
netD.zero_grad()
with torch.no_grad():
label.resize_(batch_size).fill_(real_label)
output = netD(real_center)
# output = torch.unsqueeze(output[0, 1)
output = torch.squeeze(output, 1)
output = torch.squeeze(output, 1)
output = torch.squeeze(output, 1)
print(output.shape)
# label = label.unsqueeze(1)
# label = label.unsqueeze(1)
# label = label.unsqueeze(1)
print(label.shape)
errD_real = criterion(output, label)
errD_real.backward()
D_x = output.data.mean()
print(input_cropped.shape)
fake = netG(input_cropped)
label.data.fill_(fake_label)
output = netD(fake.detach())
errD_fake = criterion(output, label)
errD_fake.backward()
D_G_z1 = output.data.mean()
errD = errD_real + errD_fake
optimizerD.step()
netG.zero_grad()
label.data.fill_(real_label) # fake labels are real for generator cost
output = netD(fake)
errG_D = criterion(output, label)
wtl2Matrix = real_center.clone()
wtl2Matrix.data.fill_(wtl2 * 10)
wtl2Matrix.data[:, :, int(over):int(128 / 2 - over), int(over):int(128 / 2 - over)] = wtl2
errG_l2 = (fake - real_center).pow(2)
errG_l2 = errG_l2 * wtl2Matrix
errG_l2 = errG_l2.mean()
errG = (1 - wtl2) * errG_D + wtl2 * errG_l2
errG.backward()
D_G_z2 = output.data.mean()
optimizerG.step()
print('[%d / %d][%d / %d] Loss_D: %.4f Loss_G: %.4f / %.4f l_D(x): %.4f l_D(G(z)): %.4f'
% (epoch, epochs, i, len(dataloader),
errD.data, errG_D.data, errG_l2.data, D_x, D_G_z1,))
if i % 100 == 0:
vutils.save_image(real_cpu,
'result/train/real/real_samples_epoch_%03d.png' % (epoch))
vutils.save_image(input_cropped.data,
'result/train/cropped/cropped_samples_epoch_%03d.png' % (epoch))
recon_image = input_cropped.clone()
recon_image.data[:, :, int(128 / 4):int(128 / 4 + 128 / 2), int(128 / 4):int(128 / 4 + 128 / 2)] = fake.data
vutils.save_image(recon_image.data,
'result/train/recon/recon_center_samples_epoch_%03d.png' % (epoch))
utils file
import torch
from PIL import Image
from torch.autograd import Variable
def load_image(filename, size = None, scale = None):
img = Image.open(filename)
if size is not None:
img = img.resize((size, size), Image.ANTIALIAS)
elif scale is not None:
img = img.resize((int(img.size[0]/scale), int(img.size[1]/scale)), Image.ANTIALIAS)
return img
def save_image(filename, data):
img = data.clone().add(1).div(2).mul(255).clamp(0,255).numpy()
img = img.transpose(1,2,0).astype('uint8')
img = Image.fromarray(img)
img.save(filename)
def gram_matrix(y):
(b, ch, h, w) = y.size()
features = y.view(b, ch, w*h)
features_t = features.transpose(1,2)
gram = features.bmm(features_t)/(ch*h*w)
return gram
def normalize_batch(batch):
mean = batch.data.new(batch.data.size())
std = batch.data.new(batch.data.size())
mean[:, 0, :, :] = 0.485
mean[:, 1, :, :] = 0.456
mean[:, 2, :, :] = 0.406
std[:, 0, :, :] = 0.229
std[:, 1, :, :] = 0.224
std[:, 2, :, :] = 0.225
batch = torch.div(batch, 255.0)
batch -= Variable(mean)
# batch /= Variable(std)
batch = torch.div(batch, Variable(std))
return batch
Error message
(impaint_env) vivek#Viveks-MacBook-Pro image_impainter % python main.py
/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torchvision/transforms/transforms.py:310: UserWarning: The use of the transforms.Scale transform is deprecated, please use transforms.Resize instead.
warnings.warn("The use of the transforms.Scale transform is deprecated, " +
torch.Size([7])
torch.Size([7])
torch.Size([7, 3, 128, 128])
Traceback (most recent call last):
File "main.py", line 114, in <module>
fake = netG(input_cropped)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/Users/vivek/DSwork/image_impainter/generator.py", line 70, in forward
x = self.t7(x)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/container.py", line 139, in forward
input = module(input)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 916, in forward
return F.conv_transpose2d(
RuntimeError: Given transposed=1, weight of size [512, 256, 4, 4], expected input[7, 4000, 1, 1] to have 512 channels, but got 4000 channels instead
You have a "gap" between layer t6 and t7 of your generatorG:
# ...
self.t6 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=4000, kernel_size=(4, 4)),
nn.BatchNorm2d(4000),
nn.ReLU()
)
self.t7 = nn.Sequential(
nn.ConvTranspose2d(in_channels = 512, out_channels = 256, kernel_size =4, stride = 2, padding = 1),
nn.BatchNorm2d(256),
nn.ReLU()
)
# ...
Your t6 layer expects the input to have 512 channels and outputs a tensor with 4000 channels. However, the next layer, t7, expects the input to have only 512 channels.
You need to adjust either t6 and t7 such that t6 will output exactly the same number of channels t7 is expecting. That is t6's out_channles ,just equal t7's in_channels.

why function .view(batch_size,-1) gives the same outputs?

I am a freshman in neural network and I have built a vgg16 network.But in every batch all the inputs leads to the same outputs.So I checked the output of every layer and finally found that x=x.view(batch_size,-1) gives the same outputs!I have no idea why this would happen. here are part of my code:
class VGG16(torch.nn.Module):
def __init__(self):
super(VGG16, self).__init__()
self.conv1 = torch.nn.Conv2d(3, 64, padding=1, kernel_size=3) #kernel
self.conv2 = torch.nn.Conv2d(64, 64, padding=1, kernel_size=3)
self.conv3 = torch.nn.Conv2d(64, 128, padding=1, kernel_size=3)
self.conv4 = torch.nn.Conv2d(128, 128, padding=1, kernel_size=3)
self.conv5 = torch.nn.Conv2d(128, 256, padding=1, kernel_size=3)
self.conv6 = torch.nn.Conv2d(256, 256, padding=1, kernel_size=3)
self.conv7 = torch.nn.Conv2d(256, 256, padding=1, kernel_size=3)
self.conv8 = torch.nn.Conv2d(256, 512, padding=1 ,kernel_size=3)
self.conv9 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv10 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv11 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv12 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv13 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.pooling = torch.nn.MaxPool2d(2) #pool
self.fc1 = torch.nn.Linear(25088, 4096) # 7 * 7 * 512 = 25088
self.fc2 = torch.nn.Linear(4096, 4096)
self.fc3 = torch.nn.Linear(4096, 2)
def forward(self,x):
batch_size = x.size(0)
x = F.relu(self.conv1(x)) #layer1
x = self.pooling(F.relu(self.conv2(x))) #layer2
x = F.relu(self.conv3(x)) #layer3
x = self.pooling(F.relu(self.conv4(x))) #layer4
x = F.relu(self.conv5(x)) #layer5
x = F.relu(self.conv6(x)) #layer6
x = self.pooling(F.relu(self.conv7(x))) #layer7
x = F.relu(self.conv8(x)) #layer8
x = F.relu(self.conv9(x)) #layer9
x = self.pooling(F.relu(self.conv10(x))) #layer10
x = F.relu(self.conv11(x)) #layer11
x = F.relu(self.conv12(x)) #layer12
x = self.pooling(F.relu(self.conv13(x))) #layer13
x = x.view(batch_size,-1) #flatten
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
this is the training part:
def train(epoch):
running_loss = 0.0
for batch_idx, data in enumerate(train_loader,0):
inputs, true_labels = data
optimizer.zero_grad() #clear the optimizer to avoid accumulating of grad
#forward
outputs = model(inputs)
loss = criterion(outputs, true_labels)
#backward
loss.backward()
#update
optimizer.step()
running_loss += loss.item()
#output the train result every 10 loop
if (batch_idx + 1) % 10 == 0:
print('[%d %5d] loss: %.3f' %(epoch + 1, batch_idx + 1, running_loss/10 ))
running_loss = 0.0
this is the outputs of layer13(before view):enter image description here
this is the outputs of x.view :enter image description here
I am searching for a long time on net.But no use.Any ideas?
Thanks in advance.
Use of view() method
import torch
torch.tensor([[1,2,3],[4,5,6]]).view(3,2)
#tensor([[1, 2],
[3, 4],
[5, 6]])
Hence no change in tensor value..it will just change its shape

Pytorch Autoencoder - How to improve loss?

I've a UNET style autoencoder below, with a filter I wrote in Pytorch at the end. The network seems to be converging faster than it should and I don't know why. I have a dataset of 4000 images and I'm taking a 128x128 crop every time. I'm employing a training rate schedule and weight decay. I've tried fiddling with my parameters with a tiny dataset to see improvements but nothing seems to work. Once the learning rate goes down, the loss just bounces around and doesn't hit a floor, and in some cases goes back up. My network is as follows:
import torch
import torch.nn as nn
from wiener_3d import wiener_3d
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import random
def np_to_pil(np_imgs):
img_num = np_imgs.shape[0]
channel_num = np_imgs.shape[1]
ar = np.clip(np_imgs*255, 0, 255).astype(np.uint8)
pil_imgs = []
for i in range(img_num):
if channel_num == 1:
img = ar[i][0]
else:
img = ar[i].transpose(1, 2, 0)
pil_imgs.append(Image.fromarray(img))
return pil_imgs
class WienerFilter(nn.Module):
def __init__(self, param_b=16):
super(WienerFilter, self).__init__()
# self.register_parameter("param_a", nn.Parameter(torch.tensor(param_a)))
# self.param_a = nn.Parameter(torch.tensor(param_a))
# self.param_a.requires_grad = True
self.param_b = param_b
def forward(self, input, std):
tensors = input.shape[0]
for i in range(tensors):
tensor = input[i]
tensor = torch.squeeze(tensor)
# tensor = wiener_3d(tensor, self.param_a, self.param_b
tensor = wiener_3d(tensor, 2*std, self.param_b)
tensor = torch.unsqueeze(tensor, 0)
input[i] = tensor
return input
class AutoEncoder(nn.Module):
"""Autoencoder simple implementation """
def __init__(self):
super(AutoEncoder, self).__init__()
# Encoder
# conv layer
self.block1 = nn.Sequential(
nn.Conv2d(1, 96, 3, padding=1),
nn.BatchNorm2d(96),
nn.LeakyReLU(0.1),
nn.Conv2d(96, 96, 3, padding=1),
nn.MaxPool2d(2),
nn.BatchNorm2d(96),
nn.LeakyReLU(0.1)
)
self.block2 = nn.Sequential(
nn.Conv2d(96, 96, 3, padding=1),
nn.MaxPool2d(2),
nn.BatchNorm2d(96),
nn.LeakyReLU(0.1)
)
self.block3 = nn.Sequential(
nn.Conv2d(96, 96, 3, padding=1),
nn.BatchNorm2d(96),
nn.LeakyReLU(0.1),
nn.ConvTranspose2d(96, 96, 2, 2),
nn.BatchNorm2d(96),
nn.LeakyReLU(0.1)
)
self.block4 = nn.Sequential(
nn.Conv2d(192, 192, 3, padding=1),
nn.BatchNorm2d(192),
nn.LeakyReLU(0.1),
nn.Conv2d(192, 192, 3, padding=1),
nn.BatchNorm2d(192),
nn.LeakyReLU(0.1),
nn.ConvTranspose2d(192, 192, 2, 2),
nn.BatchNorm2d(192),
nn.LeakyReLU(0.1)
)
self.block5 = nn.Sequential(
nn.Conv2d(288, 192, 3, padding=1),
nn.BatchNorm2d(192),
nn.LeakyReLU(0.1),
nn.Conv2d(192, 192, 3, padding=1),
nn.BatchNorm2d(192),
nn.LeakyReLU(0.1),
nn.ConvTranspose2d(192, 192, 2, 2),
nn.BatchNorm2d(192),
nn.LeakyReLU(0.1)
)
self.block6 = nn.Sequential(
nn.Conv2d(193, 96, 3, padding=1),
nn.BatchNorm2d(96),
nn.LeakyReLU(0.1),
nn.Conv2d(96, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.1),
nn.Conv2d(64, 32, 3, padding=1),
nn.LeakyReLU(0.1),
nn.Conv2d(32, 1, 3, padding=1),
nn.LeakyReLU(0.1)
)
self.wiener_filter = WienerFilter()
def forward(self, x, std):
# torch.autograd.set_detect_anomaly(True)
# print("input: ", x.shape)
pool1 = self.block1(x)
# print("pool1: ", pool1.shape)
pool2 = self.block2(pool1)
# print("pool2: ", pool2.shape)
pool3 = self.block2(pool2)
# print("pool3: ", pool3.shape)
pool4 = self.block2(pool3)
# print("pool4: ", pool4.shape)
pool5 = self.block2(pool4)
# print("pool5: ", pool5.shape)
upsample5 = self.block3(pool5)
# print("upsample5: ", upsample5.shape)
concat5 = torch.cat((upsample5, pool4), 1)
# print("concat5: ", concat5.shape)
upsample4 = self.block4(concat5)
# print("upsample4: ", upsample4.shape)
concat4 = torch.cat((upsample4, pool3), 1)
# print("concat4: ", concat4.shape)
upsample3 = self.block5(concat4)
# print("upsample3: ", upsample3.shape)
concat3 = torch.cat((upsample3, pool2), 1)
# print("concat3: ", concat3.shape)
upsample2 = self.block5(concat3)
# print("upsample2: ", upsample2.shape)
concat2 = torch.cat((upsample2, pool1), 1)
# print("concat2: ", concat2.shape)
upsample1 = self.block5(concat2)
# print("upsample1: ", upsample1.shape)
concat1 = torch.cat((upsample1, x), 1)
# print("concat1: ", concat1.shape)
output = self.block6(concat1)
path = "test"
t_map = x - output
filtering = self.wiener_filter(t_map, std)
filtered_output = output + filtering
return filtered_output
My current parameters are:
Adam optimizer,
learning rate decay by 0.1 if no improvement for 7 epochs,
intial learning rate 0.001,
0.0001 weight decay,
No batches.
I feel like I've tried everything at this stage. Could someone give me some advice on how to improve my network? Thank you.

Tensorflow multi-GPU MNIST classifier: low accuracy

I am stuck with multiple GPU MNIST classifier in Tensorflow. Code runs without errors, but accuracy is very poor (30%). I am new to Tensorflow so I do not know where is the problem ? GPU: 2x GTX 1080 Ti.
I have found several tutorials for multiple GPU, but code is hard to follow. For this reason I am trying to develop MNIST CNN classifier from scratch.
from __future__ import print_function
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import datetime
def average_gradients(tower_grads):
average_grads = []
for grad_and_vars in zip(*tower_grads):
# Note that each grad_and_vars looks like the following:
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
grads = []
for g, _ in grad_and_vars:
# Add 0 dimension to the gradients to represent the tower.
expanded_g = tf.expand_dims(g, 0)
# Append on a 'tower' dimension which we will average over below.
grads.append(expanded_g)
# Average over the 'tower' dimension.
grad = tf.concat(axis=0, values=grads)
grad = tf.reduce_mean(grad, 0)
# Keep in mind that the Variables are redundant because they are shared
# across towers. So .. we will just return the first tower's pointer to
# the Variable.
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
with tf.device('/cpu:0'):
x = tf.placeholder(tf.float32, [None, 784], name='x')
x_img=tf.reshape(x, [-1, 28, 28, 1])
x_dict={}
x_dict['x0'],x_dict['x1'] = tf.split(x_img,2)
y_dict={}
y = tf.placeholder(tf.float32, [None, 10], name='y')
y_dict['y0'],y_dict['y1'] = tf.split(y,2)
opt=tf.train.GradientDescentOptimizer(0.01)
keep_prob = tf.placeholder(tf.float32)
w0=tf.get_variable('w0',initializer=tf.truncated_normal([5, 5,1,32], stddev=0.1))
b0=tf.get_variable('b0',initializer=tf.zeros([32]))
w1=tf.get_variable('w1',initializer=tf.truncated_normal([5,5,32,64], stddev=0.1))
b1=tf.get_variable('b1',initializer=tf.zeros([64]))
w2=tf.get_variable('w2',initializer=tf.truncated_normal([7*7*64,1024], stddev=0.1))
b2=tf.get_variable('b2',initializer=tf.zeros([1024]))
w3=tf.get_variable('w3',initializer=tf.truncated_normal([1024,10], stddev=0.1))
b3=tf.get_variable('b3',initializer=tf.zeros([10]))
grads=[]
def conv2d(xx, W):
return tf.nn.conv2d(xx, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(xx):
return tf.nn.max_pool(xx, ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1], padding='SAME')
def model_forward(xx):
h_conv1=tf.nn.relu(conv2d(xx,w0)+b0);
h_pool1=max_pool_2x2(h_conv1)
h_conv2=tf.nn.relu(conv2d(h_pool1,w1)+b1);
h_pool2=max_pool_2x2(h_conv2)
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat,w2)+b2)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
y = tf.nn.sigmoid(tf.matmul(h_fc1_drop,w3)+b3)
return y
for i in range(0,2):
with tf.device(('/gpu:{0}').format(i)):
with tf.variable_scope(('scope_gpu_{0}').format(i)):
yy=model_forward(x_dict[('x{0}').format(i)])
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_dict[('y{0}').format(i)] * tf.log(yy), reduction_indices=[1]))
grads.append(opt.compute_gradients(cross_entropy,tf.trainable_variables()))
with tf.device('/cpu:0'):
grad = average_gradients(grads)
train_step = opt.apply_gradients(grad)
yy=model_forward(x_dict['x0'])
correct_prediction = tf.equal(tf.argmax(yy, 1), tf.argmax(y_dict['y0'], 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
def main():
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
sess.run(tf.global_variables_initializer())
writer = tf.summary.FileWriter('C:\\tmp\\test\\', graph=tf.get_default_graph())
t1_1 = datetime.datetime.now()
for step in range(0,10000):
batch_x, batch_y = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_x, y: batch_y, keep_prob: 0.5})
if (step % 200) == 0:
print(step, sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels, keep_prob: 1}))
t2_1 = datetime.datetime.now()
print("Computation time: " + str(t2_1-t1_1))
if __name__ == "__main__":
main()
The problems that I noticed:
Your cross-entropy loss is wrong (see this question for details, in short you're computing binary cross-entropy).
I dropped manual gradient computation in favor of tf.train.AdamOptimizer.
I dropped the split of the input of x (it's not the right way to do distributed computation in tensorflow).
The result model easily gets to 99% accuracy even on one GPU.
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import datetime
x = tf.placeholder(tf.float32, [None, 784], name='x')
x_img = tf.reshape(x, [-1, 28, 28, 1])
y = tf.placeholder(tf.float32, [None, 10], name='y')
keep_prob = tf.placeholder(tf.float32)
stddev = 0.1
w0 = tf.get_variable('w0', initializer=tf.truncated_normal([5, 5, 1, 32], stddev=stddev))
b0 = tf.get_variable('b0', initializer=tf.zeros([32]))
w1 = tf.get_variable('w1', initializer=tf.truncated_normal([5, 5, 32, 64], stddev=stddev))
b1 = tf.get_variable('b1', initializer=tf.zeros([64]))
w2 = tf.get_variable('w2', initializer=tf.truncated_normal([7 * 7 * 64, 1024], stddev=stddev))
b2 = tf.get_variable('b2', initializer=tf.zeros([1024]))
w3 = tf.get_variable('w3', initializer=tf.truncated_normal([1024, 10], stddev=stddev))
b3 = tf.get_variable('b3', initializer=tf.zeros([10]))
def conv2d(xx, W):
return tf.nn.conv2d(xx, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(xx):
return tf.nn.max_pool(xx, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def model_forward(xx):
h_conv1 = tf.nn.relu(conv2d(xx, w0) + b0)
h_pool1 = max_pool_2x2(h_conv1)
h_conv2 = tf.nn.relu(conv2d(h_pool1, w1) + b1)
h_pool2 = max_pool_2x2(h_conv2)
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w2) + b2)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
y = tf.matmul(h_fc1_drop, w3) + b3
return y
yy = model_forward(x_img)
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=yy, labels=y))
train_step = tf.train.AdamOptimizer().minimize(loss)
correct_prediction = tf.equal(tf.argmax(yy, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
def main():
mnist = input_data.read_data_sets("/home/maxim/p/data/mnist-tf", one_hot=True)
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
sess.run(tf.global_variables_initializer())
t1_1 = datetime.datetime.now()
for step in range(0, 10000):
batch_x, batch_y = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_x, y: batch_y, keep_prob: 0.5})
if (step % 200) == 0:
print(step, sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels, keep_prob: 1}))
t2_1 = datetime.datetime.now()
print("Computation time: " + str(t2_1 - t1_1))
if __name__ == "__main__":
main()
Now, if you really want it, you can do data or model parallelism to utilize your GPU power (there is a great post about it, but sometimes it doesn't render correctly due to hosting problems).
Along with the points mentioned in the first two answers, take a look at return average_grads in average_gradients function, it's returning from the 1st iteration of the first for loop, meaning the gradients will only apply to the first variable (probably w0). Hence only w0 is getting updated and so you are getting a very low accuracy since the rest of the variables stay to their original values (either random/zeros).
This is because the model is not using the same weights & biases for inference on CPU as well as on the other GPU devices.
For example:
for i in range(0,2):
with tf.device(('/gpu:{0}').format(i)):
with tf.variable_scope(('scope_gpu_{0}').format(i)) as infer_scope:
yy=model_forward(x_dict[('x{0}').format(i)])
infer_scope.reuse_variables()
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_dict[('y{0}').format(i)] * tf.log(yy), reduction_indices=[1]))
grads.append(opt.compute_gradients(cross_entropy,tf.trainable_variables()))
The reason you are getting low accuracy is that without specifying reuse_variables() and you try to call the model inference inside each epoch, the graph would create a new model with random weights & biases initialization, which is not what you favored.

CNN for cifar10 dataset in Tensorflow

I am trying to replicate results obtained by a convolutional neural network for CIFAR10 using Tensorflow, however after some epochs (~60 epochs) my performance (accuracy) is around 10%, so I do not if the CNN is well trained?
This code is based on Deep mnist for experts https://www.tensorflow.org/get_started/mnist/pros , however in Cifar10 it does not work
import numpy as np
import tensorflow as tf
def unpickle(file):
import cPickle
fo = open(file, 'rb')
dict = cPickle.load(fo)
fo.close()
return dict
#unpacking training and test data
b1 = unpickle("~/cifar-10-batches-py/data_batch_1")
b2 = unpickle("~/cifar-10-batches-py/data_batch_2")
b3 = unpickle("~/cifar-10-batches-py/data_batch_3")
b4 = unpickle("~/cifar-10-batches-py/data_batch_4")
b5 = unpickle("~/cifar-10-batches-py/data_batch_5")
test = unpickle("~/cifar-10-batches-py/test_batch")
#Preparing test data
test_data = test['data']
test_label = test['labels']
#Preparing training data
train_data = np.concatenate([b1['data'],b2['data'],b3['data'],b4['data'],b5['data']],axis=0)
train_label = np.concatenate([b1['labels'],b2['labels'],b3['labels'],b4['labels'],b5['labels']],axis=0)
#Reshaping data
train_data = np.reshape(train_data,[50000,32,32,3])
test_data = np.reshape(test_data,[10000,32,32,3])
batch_size = 100
image_width = 32
image_height = 32
channels = 3
#Constructing Graph
x = tf.placeholder(tf.float32, [None, image_width, image_height, channels])#Training Data
y = tf.placeholder(tf.int32, [None])
one_hot = tf.one_hot(y,depth=10)#Converting in one hot vectors
#Constructing CNN Layers
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
#Given an input tensor of shape [batch, in_height, in_width, in_channels] and a filter / kernel tensor of shape [filter_height, filter_width, in_channels, out_channels], taken from: http://textminingonline.com/dive-into-tensorflow-part-v-deep-mnist
W_conv1 = weight_variable([7, 7, 3, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 32])
b_conv2 = bias_variable([32])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_conv3 = weight_variable([5, 5, 32, 64])
b_conv3 = bias_variable([64])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
#Constructing MLP layers
W_fc1 = weight_variable([8 * 8 * 64, 64])
b_fc1 = bias_variable([64])
h_pool3_flat = tf.reshape(h_conv3, [-1, 8*8*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
W_fc2 = weight_variable([64, 10])
b_fc2 = bias_variable([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc1, W_fc2) + b_fc2)
#Computing Cost function
cross_entropy = -tf.reduce_sum(one_hot*tf.log(tf.clip_by_value(y_conv,1e-10,1e20)))
train_step = tf.train.MomentumOptimizer(learning_rate = 0.0001, momentum = 0.9).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(one_hot,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init = tf.initialize_all_variables()
sess = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=16))
sess.run(init)
epochs = 100
b_per = 0
row = []
for e in range(epochs):
print( "epoch", e)
avg_cost = 0
#foreach batch
for j in range(int(train_data.shape[0]/batch_size)):
subset=range((j*batch_size),((j+1)*batch_size))
data = train_data[subset,:,:,:]
label = train_label[subset]
_,c = sess.run([train_step,cross_entropy], feed_dict={x: data, y: label})
avg_cost += c / data.shape[0]
#print(avg_cost)
b_per = b_per + 1
if b_per%10==0 :
row.append(sess.run(accuracy, feed_dict={x: test_data, y: test_label }))
print(row[-1])
It is wrong in data reshape part! It should be,
# Reshaping data
train_data = train_data.reshape(50000, 3, 32, 32).transpose(
0, 2, 3, 1).astype("uint8")
test_data = test_data.reshape(10000, 3, 32, 32).transpose(
0, 2, 3, 1).astype("uint8")

Resources