How to solve the dimension error and effectively use Conv2dTranspose? - machine-learning

i have created a discriminator and generator file to implement GAN, however, i am facing this error.
The initial error i was facing was in the main.py file where i am calling the criterion library and passing the output and label. I solved that error using squeeze function, so that the issue of shape was resolved.
Before using squeeze , an error showed that the shapes of output and labels were not matching ( the shapes were (7,1,1,1) and (7) for the output and the label respectively.
import torch
from torch import nn
class generatorG(nn.Module):
def __init__(self):
super(generatorG, self).__init__()
self.t1 = nn.Sequential(
nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size= (4,4), stride = 2,padding = 1),
nn.LeakyReLU(0.2, inplace = True)
)
self.t2 = nn.Sequential(
nn.Conv2d(in_channels= 64, out_channels = 64, kernel_size = (4,4), stride = 2,padding = 1),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.2, inplace = True)
)
self.t3 = nn.Sequential(
nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 4, stride = 2, padding =1),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, inplace = True)
)
self.t4 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(4, 4), stride = 2,padding=1),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2, inplace=True)
)
self.t5 = nn.Sequential(
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2, inplace=True)
)
self.t6 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=4000, kernel_size=(4, 4)),
nn.BatchNorm2d(4000),
nn.ReLU()
)
self.t7 = nn.Sequential(
nn.ConvTranspose2d(in_channels = 512, out_channels = 256, kernel_size =4, stride = 2, padding = 1),
nn.BatchNorm2d(256),
nn.ReLU()
)
self.t8 = nn.Sequential(
nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(128),
nn.ReLU()
)
self.t9 = nn.Sequential(
nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(64),
nn.ReLU()
)
self.t10 = nn.Sequential(
nn.ConvTranspose2d(in_channels=64, out_channels=3, kernel_size=4, stride=2, padding=1),
nn.Tanh()
)
def forward(self, x):
x = self.t1(x)
x = self.t2(x)
x = self.t3(x)
x = self.t4(x)
x = self.t5(x)
x = self.t6(x)
x = self.t7(x)
x = self.t8(x)
x = self.t9(x)
x = self.t10(x)
return x
model = generatorG()
print(model(torch.randn()).shape)
Discriminator File
import torch
from torch import nn
class DiscriminatorD(nn.Module):
def __init__(self):
super(DiscriminatorD, self).__init__()
self.t1 = nn.Sequential(
nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size =4, stride = 2, padding = 1),
nn.LeakyReLU(0.2, inplace = True)
)
self.t2 = nn.Sequential(
nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 4, stride = 2, padding = 1),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, inplace = True)
)
self.t3 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2, inplace=True)
)
self.t4 = nn.Sequential(
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2, inplace=True)
)
self.t5 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=1, kernel_size=4, stride=1, padding=0),
nn.Sigmoid()
)
def forward(self, x):
x = self.t1(x)
x = self.t2(x)
x = self.t3(x)
x = self.t4(x)
x = self.t5(x)
return x
main.py file
from generator import *
from discriminator import *
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable
import utils
epochs = 100
Batch_Size = 64
lr = 0.0002
beta1 = 0.5
over = 4
parser = argparse.ArgumentParser()
parser.add_argument('--dataroot', default = 'dataset/train', help = 'path to dataset')
opt = parser.parse_args()
try:
os.makedirs('result/train/cropped')
os.makedirs('result/train/real')
os.makedirs('result/train/recon')
os.makedirs('model/')
except:
pass
transform = transforms.Compose([transforms.Scale(128),
transforms.CenterCrop(128),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
dataset = dset.ImageFolder(root=opt.dataroot, transform= transform)
assert dataset
dataloader = torch.utils.data.DataLoader(dataset, batch_size=Batch_Size, shuffle=True, num_workers=0)
wtl2 = 0.999
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv')!=-1:
m.weight.data.normal_(0.0,0.2)
elif classname.find('BatchNorm')!=-1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
resume_epoch = 0
netG = generatorG()
netG.apply(weights_init)
netD = DiscriminatorD()
netD.apply(weights_init)
criterion = nn.BCELoss()
criterionMSE = nn.MSELoss()
input_real = torch.FloatTensor(Batch_Size, 3, 128, 128)
input_cropped = torch.FloatTensor(Batch_Size, 3, 128, 128)
label = torch.FloatTensor(Batch_Size)
real_label = 1
fake_label = 0
real_center = torch.FloatTensor(Batch_Size, 3, 64, 64)
input_real = Variable(input_real)
input_cropped = Variable(input_cropped)
label = Variable(label)
real_center = Variable(real_center)
optimizerD = optim.Adam(netD.parameters(), lr = lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr = lr, betas = (beta1, 0.999))
over = 4
for epoch in range(resume_epoch, epochs):
for i, data in enumerate(dataloader, 0):
real_cpu, _ = data
real_center_cpu = real_cpu[:,:,int(128/4):int(128/4)+int(128/2),int(128/4):int(128/4)+int(128/2)]
batch_size = real_cpu.size(0)
with torch.no_grad():
input_real.resize_(real_cpu.size()).copy_(real_cpu)
input_cropped.resize_(real_cpu.size()).copy_(real_cpu)
real_center.resize_(real_center_cpu.size()).copy_(real_center_cpu)
input_cropped[:, 0, int(128 / 4 + over):int(128 / 4 + 128 / 2 - over),int(128 / 4 + over):int(128 / 4 + 128 / 2 - over)] = 2 * 117.0 / 255.0 - 1.0
input_cropped[:, 1, int(128 / 4 + over):int(128 / 4 + 128 / 2 - over),int(128 / 4 + over):int(128 / 4 + 128 / 2 - over)] = 2 * 104.0 / 255.0 - 1.0
input_cropped[:, 2, int(128 / 4 + over):int(128 / 4 + 128 / 2 - over),int(128 / 4 + over):int(128 / 4 + 128 / 2 - over)] = 2 * 123.0 / 255.0 - 1.0
netD.zero_grad()
with torch.no_grad():
label.resize_(batch_size).fill_(real_label)
output = netD(real_center)
# output = torch.unsqueeze(output[0, 1)
output = torch.squeeze(output, 1)
output = torch.squeeze(output, 1)
output = torch.squeeze(output, 1)
print(output.shape)
# label = label.unsqueeze(1)
# label = label.unsqueeze(1)
# label = label.unsqueeze(1)
print(label.shape)
errD_real = criterion(output, label)
errD_real.backward()
D_x = output.data.mean()
print(input_cropped.shape)
fake = netG(input_cropped)
label.data.fill_(fake_label)
output = netD(fake.detach())
errD_fake = criterion(output, label)
errD_fake.backward()
D_G_z1 = output.data.mean()
errD = errD_real + errD_fake
optimizerD.step()
netG.zero_grad()
label.data.fill_(real_label) # fake labels are real for generator cost
output = netD(fake)
errG_D = criterion(output, label)
wtl2Matrix = real_center.clone()
wtl2Matrix.data.fill_(wtl2 * 10)
wtl2Matrix.data[:, :, int(over):int(128 / 2 - over), int(over):int(128 / 2 - over)] = wtl2
errG_l2 = (fake - real_center).pow(2)
errG_l2 = errG_l2 * wtl2Matrix
errG_l2 = errG_l2.mean()
errG = (1 - wtl2) * errG_D + wtl2 * errG_l2
errG.backward()
D_G_z2 = output.data.mean()
optimizerG.step()
print('[%d / %d][%d / %d] Loss_D: %.4f Loss_G: %.4f / %.4f l_D(x): %.4f l_D(G(z)): %.4f'
% (epoch, epochs, i, len(dataloader),
errD.data, errG_D.data, errG_l2.data, D_x, D_G_z1,))
if i % 100 == 0:
vutils.save_image(real_cpu,
'result/train/real/real_samples_epoch_%03d.png' % (epoch))
vutils.save_image(input_cropped.data,
'result/train/cropped/cropped_samples_epoch_%03d.png' % (epoch))
recon_image = input_cropped.clone()
recon_image.data[:, :, int(128 / 4):int(128 / 4 + 128 / 2), int(128 / 4):int(128 / 4 + 128 / 2)] = fake.data
vutils.save_image(recon_image.data,
'result/train/recon/recon_center_samples_epoch_%03d.png' % (epoch))
utils file
import torch
from PIL import Image
from torch.autograd import Variable
def load_image(filename, size = None, scale = None):
img = Image.open(filename)
if size is not None:
img = img.resize((size, size), Image.ANTIALIAS)
elif scale is not None:
img = img.resize((int(img.size[0]/scale), int(img.size[1]/scale)), Image.ANTIALIAS)
return img
def save_image(filename, data):
img = data.clone().add(1).div(2).mul(255).clamp(0,255).numpy()
img = img.transpose(1,2,0).astype('uint8')
img = Image.fromarray(img)
img.save(filename)
def gram_matrix(y):
(b, ch, h, w) = y.size()
features = y.view(b, ch, w*h)
features_t = features.transpose(1,2)
gram = features.bmm(features_t)/(ch*h*w)
return gram
def normalize_batch(batch):
mean = batch.data.new(batch.data.size())
std = batch.data.new(batch.data.size())
mean[:, 0, :, :] = 0.485
mean[:, 1, :, :] = 0.456
mean[:, 2, :, :] = 0.406
std[:, 0, :, :] = 0.229
std[:, 1, :, :] = 0.224
std[:, 2, :, :] = 0.225
batch = torch.div(batch, 255.0)
batch -= Variable(mean)
# batch /= Variable(std)
batch = torch.div(batch, Variable(std))
return batch
Error message
(impaint_env) vivek#Viveks-MacBook-Pro image_impainter % python main.py
/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torchvision/transforms/transforms.py:310: UserWarning: The use of the transforms.Scale transform is deprecated, please use transforms.Resize instead.
warnings.warn("The use of the transforms.Scale transform is deprecated, " +
torch.Size([7])
torch.Size([7])
torch.Size([7, 3, 128, 128])
Traceback (most recent call last):
File "main.py", line 114, in <module>
fake = netG(input_cropped)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/Users/vivek/DSwork/image_impainter/generator.py", line 70, in forward
x = self.t7(x)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/container.py", line 139, in forward
input = module(input)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 916, in forward
return F.conv_transpose2d(
RuntimeError: Given transposed=1, weight of size [512, 256, 4, 4], expected input[7, 4000, 1, 1] to have 512 channels, but got 4000 channels instead

You have a "gap" between layer t6 and t7 of your generatorG:
# ...
self.t6 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=4000, kernel_size=(4, 4)),
nn.BatchNorm2d(4000),
nn.ReLU()
)
self.t7 = nn.Sequential(
nn.ConvTranspose2d(in_channels = 512, out_channels = 256, kernel_size =4, stride = 2, padding = 1),
nn.BatchNorm2d(256),
nn.ReLU()
)
# ...
Your t6 layer expects the input to have 512 channels and outputs a tensor with 4000 channels. However, the next layer, t7, expects the input to have only 512 channels.
You need to adjust either t6 and t7 such that t6 will output exactly the same number of channels t7 is expecting. That is t6's out_channles ,just equal t7's in_channels.

Related

I simply wrote a vgg16 but it doesn't work

I have written a vgg16 for binary classification by myself with pytorch, I found that its outputs are same for the inputs in same batch, after back propagation the outputs change but still same for data in same batch. I have no idea why this would happen. I tried vgg16 in torchvision and it works perfectly for the same dataset. So I'm sure there are some problems in my model.
Here is the code of my own model:
class VGG16(torch.nn.Module):
def __init__(self):
super(VGG16, self).__init__()
self.conv1 = torch.nn.Conv2d(3, 64, padding=(1,1), kernel_size=(3,3)) #kernel
self.conv2 = torch.nn.Conv2d(64, 64, padding=(1,1), kernel_size=(3,3))
self.conv3 = torch.nn.Conv2d(64, 128, padding=(1,1), kernel_size=(3,3))
self.conv4 = torch.nn.Conv2d(128, 128, padding=(1,1), kernel_size=(3,3))
self.conv5 = torch.nn.Conv2d(128, 256, padding=(1,1), kernel_size=(3,3))
self.conv6 = torch.nn.Conv2d(256, 256, padding=(1,1), kernel_size=(3,3))
self.conv7 = torch.nn.Conv2d(256, 256, padding=(1,1), kernel_size=(3,3))
self.conv8 = torch.nn.Conv2d(256, 512, padding=(1,1) ,kernel_size=(3,3))
self.conv9 = torch.nn.Conv2d(512, 512, padding=(1,1), kernel_size=(3,3))
self.conv10 = torch.nn.Conv2d(512, 512, padding=(1,1), kernel_size=(3,3))
self.conv11 = torch.nn.Conv2d(512, 512, padding=(1,1), kernel_size=(3,3))
self.conv12 = torch.nn.Conv2d(512, 512, padding=(1,1), kernel_size=(3,3))
self.conv13 = torch.nn.Conv2d(512, 512, padding=(1,1), kernel_size=(3,3))
self.pooling = torch.nn.MaxPool2d(2) # pool
self.fc1 = torch.nn.Linear(25088, 4096) # 7 * 7 * 512 = 25088
self.fc2 = torch.nn.Linear(4096, 4096)
self.fc3 = torch.nn.Linear(4096, 2)
self.Avgpool = torch.nn.AdaptiveAvgPool2d((7, 7))
self.ReLU = nn.ReLU(True)
#self.Drop = nn.Dropout()
def forward(self,x):
batch_size = x.size(0)
x = self.conv1(x)
x = self.ReLU(x) #layer1
x = self.pooling(self.ReLU(self.conv2(x))) #layer2
x = self.ReLU(self.conv3(x)) #layer3
x = self.pooling(self.ReLU(self.conv4(x))) #layer4
x = self.ReLU(self.conv5(x)) #layer5
x = self.ReLU(self.conv6(x)) #layer6
x = self.pooling(self.ReLU(self.conv7(x))) #layer7
x = self.ReLU(self.conv8(x)) #layer8
x = self.ReLU(self.conv9(x)) #layer9
x = self.pooling(self.ReLU(self.conv10(x))) #layer10
x = self.ReLU(self.conv11(x)) #layer11
x = self.ReLU(self.conv12(x)) #layer12
x = self.pooling(self.ReLU(self.conv13(x))) #layer13
x = self.Avgpool(x)
#x = x.view(batch_size,-1)
x = torch.flatten(x, 1) #flatten
#print(x.shape) #for Debug
#print(x)
x = self.ReLU(self.fc1(x))
#x = self.Drop(x)
x = self.ReLU(self.fc2(x))
#x = self.Drop(x)
x = self.fc3(x)
#x = F.sigmoid(x)
return x
Here are the situations of outputs of the same batch:
outputs for a batch,
outputs for another batch
This problem has been bothering me for days, I would be most grateful if you could point out my mistakes.
Here are some inputs with label 0:
input1,input2input3
inputs with label 1:
input4,input5input6
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
I faced the same error, try this out, It worked for me, Call this function in __init__

why function .view(batch_size,-1) gives the same outputs?

I am a freshman in neural network and I have built a vgg16 network.But in every batch all the inputs leads to the same outputs.So I checked the output of every layer and finally found that x=x.view(batch_size,-1) gives the same outputs!I have no idea why this would happen. here are part of my code:
class VGG16(torch.nn.Module):
def __init__(self):
super(VGG16, self).__init__()
self.conv1 = torch.nn.Conv2d(3, 64, padding=1, kernel_size=3) #kernel
self.conv2 = torch.nn.Conv2d(64, 64, padding=1, kernel_size=3)
self.conv3 = torch.nn.Conv2d(64, 128, padding=1, kernel_size=3)
self.conv4 = torch.nn.Conv2d(128, 128, padding=1, kernel_size=3)
self.conv5 = torch.nn.Conv2d(128, 256, padding=1, kernel_size=3)
self.conv6 = torch.nn.Conv2d(256, 256, padding=1, kernel_size=3)
self.conv7 = torch.nn.Conv2d(256, 256, padding=1, kernel_size=3)
self.conv8 = torch.nn.Conv2d(256, 512, padding=1 ,kernel_size=3)
self.conv9 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv10 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv11 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv12 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv13 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.pooling = torch.nn.MaxPool2d(2) #pool
self.fc1 = torch.nn.Linear(25088, 4096) # 7 * 7 * 512 = 25088
self.fc2 = torch.nn.Linear(4096, 4096)
self.fc3 = torch.nn.Linear(4096, 2)
def forward(self,x):
batch_size = x.size(0)
x = F.relu(self.conv1(x)) #layer1
x = self.pooling(F.relu(self.conv2(x))) #layer2
x = F.relu(self.conv3(x)) #layer3
x = self.pooling(F.relu(self.conv4(x))) #layer4
x = F.relu(self.conv5(x)) #layer5
x = F.relu(self.conv6(x)) #layer6
x = self.pooling(F.relu(self.conv7(x))) #layer7
x = F.relu(self.conv8(x)) #layer8
x = F.relu(self.conv9(x)) #layer9
x = self.pooling(F.relu(self.conv10(x))) #layer10
x = F.relu(self.conv11(x)) #layer11
x = F.relu(self.conv12(x)) #layer12
x = self.pooling(F.relu(self.conv13(x))) #layer13
x = x.view(batch_size,-1) #flatten
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
this is the training part:
def train(epoch):
running_loss = 0.0
for batch_idx, data in enumerate(train_loader,0):
inputs, true_labels = data
optimizer.zero_grad() #clear the optimizer to avoid accumulating of grad
#forward
outputs = model(inputs)
loss = criterion(outputs, true_labels)
#backward
loss.backward()
#update
optimizer.step()
running_loss += loss.item()
#output the train result every 10 loop
if (batch_idx + 1) % 10 == 0:
print('[%d %5d] loss: %.3f' %(epoch + 1, batch_idx + 1, running_loss/10 ))
running_loss = 0.0
this is the outputs of layer13(before view):enter image description here
this is the outputs of x.view :enter image description here
I am searching for a long time on net.But no use.Any ideas?
Thanks in advance.
Use of view() method
import torch
torch.tensor([[1,2,3],[4,5,6]]).view(3,2)
#tensor([[1, 2],
[3, 4],
[5, 6]])
Hence no change in tensor value..it will just change its shape

Missing Keys in state_dict

I am having problems loading my model on google colab. here is the code:
I have attached the code below
I have tried changing the name of the statedict and it does not help
basically, I am trying to save my model for later use, but, this is becoming extremely difficult since I am not being able to properly save and load it. Please help me with the problem. After the section of the code, you will also find the error that I have attached below.
here is the code
from zipfile import ZipFile
file_name = 'data.zip'
with ZipFile(file_name, 'r') as zip:
zip.extractall()
from zipfile import ZipFile
file_name = 'results.zip'
with ZipFile(file_name, 'r') as zip:
zip.extractall()
!pip install tensorflow-gpu
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable
batchSize = 64
imageSize = 64
transform = transforms.Compose([transforms.Resize(imageSize), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),])
dataset = dset.CIFAR10(root = './data', download = True, transform = transform)
dataloader = torch.utils.data.DataLoader(dataset, batch_size = batchSize, shuffle = True, num_workers = 2)
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
m.weight.data.normal_(0.0, 0.02)
elif classname.find('BatchNorm') != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
class G(nn.Module):
def __init__(self):
super(G, self).__init__()
self.main = nn.Sequential(
nn.ConvTranspose2d(100, 512, 4, 1, 0, bias = False),
nn.BatchNorm2d(512),
nn.ReLU(True),
nn.ConvTranspose2d(512, 256, 4, 2, 1, bias = False),
nn.BatchNorm2d(256),
nn.ReLU(True),
nn.ConvTranspose2d(256, 128, 4, 2, 1, bias = False),
nn.BatchNorm2d(128),
nn.ReLU(True),
nn.ConvTranspose2d(128, 64, 4, 2, 1, bias = False),
nn.BatchNorm2d(64),
nn.ReLU(True),
nn.ConvTranspose2d(64, 3, 4, 2, 1, bias = False),
nn.Tanh()
)
def forward(self, input):
output = self.main(input)
return output
netG = G()
netG.load_state_dict(torch.load('generator.pth'))
netG.eval()
#netG.apply(weights_init)
class D(nn.Module):
def __init__(self):
super(D, self).__init__()
self.main = nn.Sequential(
nn.Conv2d(3, 64, 4, 2, 1, bias = False),
nn.LeakyReLU(0.2, inplace = True),
nn.Conv2d(64, 128, 4, 2, 1, bias = False),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, inplace = True),
nn.Conv2d(128, 256, 4, 2, 1, bias = False),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2, inplace = True),
nn.Conv2d(256, 512, 4, 2, 1, bias = False),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2, inplace = True),
nn.Conv2d(512, 1, 4, 1, 0, bias = False),
nn.Sigmoid()
)
def forward(self, input):
output = self.main(input)
return output.view(-1)
netD = D()
netD.load_state_dict(torch.load('discriminator.pth'))
netD.eval()
#netD.apply(weights_init)
criterion = nn.BCELoss()
checkpoint = torch.load('discriminator.pth')
optimizerD = optim.Adam(netD.parameters(), lr = 0.0002, betas = (0.5, 0.999))
optimizerD.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
errD = checkpoint['loss']
checkpoint1 = torch.load('genrator.pth')
optimizerG = optim.Adam(netG.parameters(), lr = 0.0002, betas = (0.5, 0.999))
optimizerG.load_state_dict(checkpoint1['optimizer_state_dict'])
errG = checkpoint1['loss']
k = epoch
for j in range(k, 10):
for i, data in enumerate(dataloader, 0):
netD.zero_grad()
real, _ = data
input = Variable(real)
target = Variable(torch.ones(input.size()[0]))
output = netD(input)
errD_real = criterion(output, target)
noise = Variable(torch.randn(input.size()[0], 100, 1, 1))
fake = netG(noise)
target = Variable(torch.zeros(input.size()[0]))
output = netD(fake.detach())
errD_fake = criterion(output, target)
errD = errD_real + errD_fake
errD.backward()
optimizerD.step()
netG.zero_grad()
target = Variable(torch.ones(input.size()[0]))
output = netD(fake)
errG = criterion(output, target)
errG.backward()
optimizerG.step()
print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f' % (epoch+1, 10, i+1, len(dataloader), errD.data, errG.data))
if i % 100 == 0:
vutils.save_image(real, '%s/real_samples.png' % "./results", normalize = True)
fake = netG(noise)
vutils.save_image(fake.data, '%s/fake_samples_epoch_%03d.png' % ("./results", epoch+1), normalize = True)
torch.save({
'epoch': epoch,
'model_state_dict': netD.state_dict(),
'optimizer_state_dict': optimizerD.state_dict(),
'loss': errD
}, 'discriminator.pth')
torch.save({
'epoch': epoch,
'model_state_dict': netG.state_dict(),
'optimizer_state_dict': optimizerG.state_dict(),
'loss': errG
}, 'generator.pth')
here is the error
RuntimeError Traceback (most recent call last)
<ipython-input-23-3e55546152c7> in <module>()
26 # Creating the generator
27 netG = G()
---> 28 netG.load_state_dict(torch.load('generator.pth'))
29 netG.eval()
30 #netG.apply(weights_init)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in load_state_dict(self, state_dict, strict)
767 if len(error_msgs) > 0:
768 raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
--> 769 self.__class__.__name__, "\n\t".join(error_msgs)))
770
771 def _named_members(self, get_members_fn, prefix='', recurse=True):
RuntimeError: Error(s) in loading state_dict for G:
Missing key(s) in state_dict: "main.0.weight", "main.1.weight", "main.1.bias", "main.1.running_mean", "main.1.running_var", "main.3.weight", "main.4.weight", "main.4.bias", "main.4.running_mean", "main.4.running_var", "main.6.weight", "main.7.weight", "main.7.bias", "main.7.running_mean", "main.7.running_var", "main.9.weight", "main.10.weight", "main.10.bias", "main.10.running_mean", "main.10.running_var", "main.12.weight".
Unexpected key(s) in state_dict: "epoch", "model_state_dict", "optimizer_state_dict", "loss".
You need to access the 'model_state_dict' key inside the loaded checkpoint.
Try:
netG.load_state_dict(torch.load('generator.pth')['model_state_dict'])
You'll probably need to apply the same fix to the discriminator as well.

Why there are strips in the predicted image in keras?

In my test, I do two classes of segmentation in keras to mask cloud from satellite image.
Two samples for training and validation and testing, intentionally for overfitting.
The predicted image is very strange with strips like this(Right is predicted label. Left are image and label.):
My code is here.What's wrong with my code or is the problem of keras?
#coding=utf-8
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,UpSampling2D,BatchNormalization,Reshape,Permute,Activation
from keras.utils.np_utils import to_categorical
from keras.preprocessing.image import img_to_array
from keras.optimizers import SGD
from keras.optimizers import RMSprop,Adadelta,Adagrad,Adam
from keras.wrappers.scikit_learn import KerasClassifier
from keras.callbacks import ModelCheckpoint,LearningRateScheduler
from sklearn.preprocessing import LabelEncoder
from PIL import Image
import matplotlib.pyplot as plt
from libtiff import TIFF
from skimage import exposure
from keras import backend as k
from keras.callbacks import TensorBoard
import pandas as pd
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
set_session(tf.Session(config=config))
seed = 7
np.random.seed(seed)
#data_shape = 360*480
img_w = 256
img_h = 256
n_label = 2
classes = [0. , 1.]
labelencoder = LabelEncoder()
labelencoder.fit(classes)
def load_img(path, grayscale=False, target_size=None):
img = Image.open(path)
if grayscale:
if img.mode != 'L':
img = img.convert('L')
if target_size:
wh_tuple = (target_size[1], target_size[0])
if img.size != wh_tuple:
img = img.resize(wh_tuple)
return img
train_url = open(r'/media/private/yanyuan/yan/image4band16bit/train104/test4.txt','r').readlines()
#trainval_url = open(r'/media/wmy/document/BigData/VOCdevkit/VOC2012/ImageSets/Segmentation/trainval.txt','r').readlines()
val_url = open(r'/media/private/yanyuan/yan/image4band16bit/train104/test4.txt','r').readlines()
train_numb = len(train_url)
valid_numb = len(val_url)
print "the number of train data is",train_numb
print "the number of val data is",valid_numb
directory = '/media/private/yanyuan/yan/image_block/image256/'
def generateData(batch_size):
with open(r'/media/private/yanyuan/yan/image4band16bit/train104/test4.txt','r') as f:
train_url = [line.strip('\n') for line in f.readlines()]
while True:
train_data = []
train_label = []
batch = 0
for url in train_url:
batch += 1
# img = load_img(filepath + 'JPEGImages/' + url.strip('\n') + '.jpg', target_size=(img_w, img_h))
# img = img_to_array(img)
tif = TIFF.open(directory + 'images/' + url + '.tiff')
img = tif.read_image()
mean_vec = np.array([456,495,440,446],dtype=np.float32)
mean_vec = mean_vec.reshape(1,1,4)
TIFF.close(tif)
img = np.array(img, dtype=np.float32)
img = img - mean_vec
img *= 1.525902189e-5
# print img.shape
train_data.append(img)
# label = load_img(filepath + 'SegmentationClass/' + url.strip('\n') + '.png', target_size=(img_w, img_h))
label = load_img(directory + 'labels/' + url + '.png', target_size=(img_w, img_h))
label = img_to_array(label).reshape((img_w * img_h,))
# print label.shape
train_label.append(label)
if batch % batch_size==0:
train_data = np.array(train_data)
train_label = np.array(train_label).flatten()
train_label = labelencoder.transform(train_label)
train_label = to_categorical(train_label, num_classes=n_label)
train_label = train_label.reshape((batch_size,img_w * img_h,n_label))
yield (train_data,train_label)
train_data = []
train_label = []
batch = 0
def generateValidData(batch_size):
with open(r'/media/private/yanyuan/yan/image4band16bit/train104/test4.txt','r') as f:
val_url = [line.strip('\n') for line in f.readlines()]
while True:
valid_data = []
valid_label = []
batch = 0
for url in val_url:
batch += 1
#img = load_img(filepath + 'JPEGImages/' + url.strip('\n') + '.jpg', target_size=(img_w, img_h))
#img = img_to_array(img)
tif = TIFF.open(directory + 'images/' + url + '.tiff')
img = tif.read_image()
mean_vec = np.array([456,495,440,446],dtype=np.float32)
mean_vec = mean_vec.reshape(1,1,4)
TIFF.close(tif)
img = np.array(img, dtype=np.float32)
img = img - mean_vec
img *= 1.525902189e-5
#print(img.shape)
# print img.shape
valid_data.append(img)
label = load_img(directory + 'labels/' + url + '.png', target_size=(img_w, img_h))
label = img_to_array(label).reshape((img_w * img_h,))
# print label.shape
valid_label.append(label)
if batch % batch_size==0:
valid_data = np.array(valid_data)
valid_label = np.array(valid_label).flatten()
valid_label = labelencoder.transform(valid_label)
valid_label = to_categorical(valid_label, num_classes=n_label)
valid_label = valid_label.reshape((batch_size,img_w * img_h,n_label))
yield (valid_data,valid_label)
valid_data = []
valid_label = []
batch = 0
def SegNet():
model = Sequential()
#encoder
model.add(Conv2D(64,(7,7),strides=(1,1),input_shape=(img_w,img_h,4),padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
#(128,128)
model.add(Conv2D(64, (7, 7), strides=(1, 1), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
#(64,64)
model.add(Conv2D(64, (7, 7), strides=(1, 1), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
#(32,32)
model.add(Conv2D(64, (7, 7), strides=(1, 1), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
#(16,16)
#decoder
model.add(UpSampling2D(size=(2,2)))
#(16,16)
model.add(Conv2D(64, (7, 7), strides=(1, 1), padding='same'))
model.add(BatchNormalization())
model.add(UpSampling2D(size=(2, 2)))
#(32,32)
model.add(Conv2D(64, (7, 7), strides=(1, 1), padding='same'))
model.add(BatchNormalization())
model.add(UpSampling2D(size=(2, 2)))
#(64,64)
model.add(Conv2D(64, (7, 7), strides=(1, 1), padding='same'))
model.add(BatchNormalization())
model.add(UpSampling2D(size=(2, 2)))
#(128,128)
model.add(Conv2D(64, (7, 7), strides=(1, 1), padding='same'))
model.add(BatchNormalization())
#(256,256)
model.add(Conv2D(n_label, (1, 1), strides=(1, 1), padding='same'))
model.add(Reshape((n_label,img_w*img_h)))
model.add(Permute((2,1)))
model.add(Activation('softmax'))
sgd=SGD(lr=0.1,momentum=0.95,decay=0.0005,nesterov=False)
adam = Adam(lr=0.001,beta_1=0.9,beta_2=0.999,decay=0.0005)
#model.compile(loss='categorical_crossentropy',optimizer=sgd,metrics=['accuracy'])
model.compile(loss=keras.losses.categorical_crossentropy,optimizer=sgd,metrics=['accuracy'])
model.summary()
return model
def train():
k.set_learning_phase(1)
model = SegNet()
modelcheck = ModelCheckpoint('Segnet_params.h5',monitor='val_acc',save_best_only=True,mode='auto')
callable = [modelcheck]
history = model.fit_generator(verbose=2,
generator=generateData(2),
validation_data=generateValidData(2),
steps_per_epoch=1,
epochs=600,
callbacks=callable,
max_queue_size=1,
class_weight = None,
validation_steps=1)
drawLoss(history)
def drawLoss(history):
plt.figure()
plt.plot(history.history['acc'],'g')
plt.plot(history.history['val_acc'],'r')
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
# summarize history for loss
plt.figure()
plt.plot(history.history['loss'],'g')
plt.plot(history.history['val_loss'],'r')
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
def predict():
k.set_learning_phase(0)
model = SegNet()
model.load_weights('Segnet_params.h5')
file = open(r'/media/private/yanyuan/yan/image4band16bit/train104/test4.txt','r')
train_url = [line.strip('\n') for line in file.readlines()]
for url in train_url:
tif = TIFF.open(directory + 'images/' + url + '.tiff')
img = tif.read_image()
TIFF.close(tif)
mean_vec = np.array([456,495,440,446],dtype=np.float32)
mean_vec = mean_vec.reshape(1,1,4)
img = np.array(img, dtype=np.float32)
img = img - mean_vec
img *= 1.525902189e-5
im = np.empty_like(img)
for j in range(4):
l_val,r_val = np.percentile(img[:,:,j],(2,98),interpolation='linear')
im[:,:,j] = exposure.rescale_intensity(img[:,:,j], in_range=(l_val,r_val),out_range='uint8')
im = im[:,:,(2,1,0)]
im = im.astype(np.uint8)
img = img.reshape(1,img_h,img_w,-1)
pred = model.predict_classes(img,verbose=2)
pred = labelencoder.inverse_transform(pred[0])
print np.unique(pred)
pred = pred.reshape((img_h,img_w)).astype(np.uint8)
pred_img = Image.fromarray(pred)
# pred_img.save('1.png',format='png')
label = load_img(directory + 'labels/' + url + '.png', target_size=(img_w, img_h))
#print pred
plt.figure()
plt.subplot(2,2,1)
plt.imshow(im)
plt.subplot(2,2,2)
plt.imshow(pred)
plt.subplot(2,2,3)
plt.imshow(label)
plt.show()
if __name__=='__main__':
train()
predict()
Thanks in advance.

CNN for cifar10 dataset in Tensorflow

I am trying to replicate results obtained by a convolutional neural network for CIFAR10 using Tensorflow, however after some epochs (~60 epochs) my performance (accuracy) is around 10%, so I do not if the CNN is well trained?
This code is based on Deep mnist for experts https://www.tensorflow.org/get_started/mnist/pros , however in Cifar10 it does not work
import numpy as np
import tensorflow as tf
def unpickle(file):
import cPickle
fo = open(file, 'rb')
dict = cPickle.load(fo)
fo.close()
return dict
#unpacking training and test data
b1 = unpickle("~/cifar-10-batches-py/data_batch_1")
b2 = unpickle("~/cifar-10-batches-py/data_batch_2")
b3 = unpickle("~/cifar-10-batches-py/data_batch_3")
b4 = unpickle("~/cifar-10-batches-py/data_batch_4")
b5 = unpickle("~/cifar-10-batches-py/data_batch_5")
test = unpickle("~/cifar-10-batches-py/test_batch")
#Preparing test data
test_data = test['data']
test_label = test['labels']
#Preparing training data
train_data = np.concatenate([b1['data'],b2['data'],b3['data'],b4['data'],b5['data']],axis=0)
train_label = np.concatenate([b1['labels'],b2['labels'],b3['labels'],b4['labels'],b5['labels']],axis=0)
#Reshaping data
train_data = np.reshape(train_data,[50000,32,32,3])
test_data = np.reshape(test_data,[10000,32,32,3])
batch_size = 100
image_width = 32
image_height = 32
channels = 3
#Constructing Graph
x = tf.placeholder(tf.float32, [None, image_width, image_height, channels])#Training Data
y = tf.placeholder(tf.int32, [None])
one_hot = tf.one_hot(y,depth=10)#Converting in one hot vectors
#Constructing CNN Layers
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
#Given an input tensor of shape [batch, in_height, in_width, in_channels] and a filter / kernel tensor of shape [filter_height, filter_width, in_channels, out_channels], taken from: http://textminingonline.com/dive-into-tensorflow-part-v-deep-mnist
W_conv1 = weight_variable([7, 7, 3, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 32])
b_conv2 = bias_variable([32])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_conv3 = weight_variable([5, 5, 32, 64])
b_conv3 = bias_variable([64])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
#Constructing MLP layers
W_fc1 = weight_variable([8 * 8 * 64, 64])
b_fc1 = bias_variable([64])
h_pool3_flat = tf.reshape(h_conv3, [-1, 8*8*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
W_fc2 = weight_variable([64, 10])
b_fc2 = bias_variable([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc1, W_fc2) + b_fc2)
#Computing Cost function
cross_entropy = -tf.reduce_sum(one_hot*tf.log(tf.clip_by_value(y_conv,1e-10,1e20)))
train_step = tf.train.MomentumOptimizer(learning_rate = 0.0001, momentum = 0.9).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(one_hot,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init = tf.initialize_all_variables()
sess = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=16))
sess.run(init)
epochs = 100
b_per = 0
row = []
for e in range(epochs):
print( "epoch", e)
avg_cost = 0
#foreach batch
for j in range(int(train_data.shape[0]/batch_size)):
subset=range((j*batch_size),((j+1)*batch_size))
data = train_data[subset,:,:,:]
label = train_label[subset]
_,c = sess.run([train_step,cross_entropy], feed_dict={x: data, y: label})
avg_cost += c / data.shape[0]
#print(avg_cost)
b_per = b_per + 1
if b_per%10==0 :
row.append(sess.run(accuracy, feed_dict={x: test_data, y: test_label }))
print(row[-1])
It is wrong in data reshape part! It should be,
# Reshaping data
train_data = train_data.reshape(50000, 3, 32, 32).transpose(
0, 2, 3, 1).astype("uint8")
test_data = test_data.reshape(10000, 3, 32, 32).transpose(
0, 2, 3, 1).astype("uint8")

Resources