why function .view(batch_size,-1) gives the same outputs? - machine-learning

I am a freshman in neural network and I have built a vgg16 network.But in every batch all the inputs leads to the same outputs.So I checked the output of every layer and finally found that x=x.view(batch_size,-1) gives the same outputs!I have no idea why this would happen. here are part of my code:
class VGG16(torch.nn.Module):
def __init__(self):
super(VGG16, self).__init__()
self.conv1 = torch.nn.Conv2d(3, 64, padding=1, kernel_size=3) #kernel
self.conv2 = torch.nn.Conv2d(64, 64, padding=1, kernel_size=3)
self.conv3 = torch.nn.Conv2d(64, 128, padding=1, kernel_size=3)
self.conv4 = torch.nn.Conv2d(128, 128, padding=1, kernel_size=3)
self.conv5 = torch.nn.Conv2d(128, 256, padding=1, kernel_size=3)
self.conv6 = torch.nn.Conv2d(256, 256, padding=1, kernel_size=3)
self.conv7 = torch.nn.Conv2d(256, 256, padding=1, kernel_size=3)
self.conv8 = torch.nn.Conv2d(256, 512, padding=1 ,kernel_size=3)
self.conv9 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv10 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv11 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv12 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv13 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.pooling = torch.nn.MaxPool2d(2) #pool
self.fc1 = torch.nn.Linear(25088, 4096) # 7 * 7 * 512 = 25088
self.fc2 = torch.nn.Linear(4096, 4096)
self.fc3 = torch.nn.Linear(4096, 2)
def forward(self,x):
batch_size = x.size(0)
x = F.relu(self.conv1(x)) #layer1
x = self.pooling(F.relu(self.conv2(x))) #layer2
x = F.relu(self.conv3(x)) #layer3
x = self.pooling(F.relu(self.conv4(x))) #layer4
x = F.relu(self.conv5(x)) #layer5
x = F.relu(self.conv6(x)) #layer6
x = self.pooling(F.relu(self.conv7(x))) #layer7
x = F.relu(self.conv8(x)) #layer8
x = F.relu(self.conv9(x)) #layer9
x = self.pooling(F.relu(self.conv10(x))) #layer10
x = F.relu(self.conv11(x)) #layer11
x = F.relu(self.conv12(x)) #layer12
x = self.pooling(F.relu(self.conv13(x))) #layer13
x = x.view(batch_size,-1) #flatten
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
this is the training part:
def train(epoch):
running_loss = 0.0
for batch_idx, data in enumerate(train_loader,0):
inputs, true_labels = data
optimizer.zero_grad() #clear the optimizer to avoid accumulating of grad
#forward
outputs = model(inputs)
loss = criterion(outputs, true_labels)
#backward
loss.backward()
#update
optimizer.step()
running_loss += loss.item()
#output the train result every 10 loop
if (batch_idx + 1) % 10 == 0:
print('[%d %5d] loss: %.3f' %(epoch + 1, batch_idx + 1, running_loss/10 ))
running_loss = 0.0
this is the outputs of layer13(before view):enter image description here
this is the outputs of x.view :enter image description here
I am searching for a long time on net.But no use.Any ideas?
Thanks in advance.

Use of view() method
import torch
torch.tensor([[1,2,3],[4,5,6]]).view(3,2)
#tensor([[1, 2],
[3, 4],
[5, 6]])
Hence no change in tensor value..it will just change its shape

Related

How do I create a branched AlexNet in PyTorch?

I am attempting to create a near identical model architecture to AlexNet, except each channel (Red, Green, and Blue) are disconnected by their own branch and are all concatenated at the end for the classifier.
Similar architecture to this
The base network:
class AlexNet(nn.Module):
def __init__(self, num_classes: int = 1000, dropout: float = 0.5) -> None:
super().__init__()
_log_api_usage_once(self)
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
self.classifier = nn.Sequential(
nn.Dropout(p=dropout),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(p=dropout),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
Training
def train_epoch(self, epoch, total):
self.model.train()
for batch_idx, (features, targets) in enumerate(self.train_loader):
features = features.to(self.device)
targets = targets.to(self.device)
logits = self.model(features)
loss = self.loss_func(logits, targets)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
I would like to have each channel be belong to its own feature extraction, but combine to classify.
red = features[:,0:1,:,:]
green = features[:,1:2,:,:]
blue = features[:,2:3,:,:]
logits = self.model([r,g,b])
I have seen people use groups but I am not sure how to implement it fully.
Any help is greatly appreciated
Since each branch/head would take an image with one channel you could start by just replacing the 3 in the first CNN layer with 1:
nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2),
Now you can send the three single-channeled images through the self.features layers and concat them before passing them to the self.classifier layers:
import torch
import torch.nn as nn
class AlexNet(nn.Module):
def __init__(self, num_classes: int=1000, dropout: float=0.5) -> None:
super().__init__()
self.features = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.avgpool = nn.AdaptiveAvgPool2d((3, 3))
self.classifier = nn.Sequential(
nn.Dropout(p=dropout),
nn.Linear(6912, 4096),
nn.ReLU(inplace=True),
nn.Dropout(p=dropout),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
def forward(self, x_r: torch.Tensor, x_g: torch.Tensor, x_b: torch.Tensor) -> torch.Tensor:
x_r = self.features(x_r)
x_r = torch.flatten(self.avgpool(x_r), 1)
x_g = self.features(x_g)
x_g = torch.flatten(self.avgpool(x_g), 1)
x_b = self.features(x_b)
x_b = torch.flatten(self.avgpool(x_b), 1)
x = torch.concat((x_r, x_g, x_b), -1)
x = self.classifier(x)
return x
model = AlexNet()
img = torch.rand(1, 3, 256, 256)
img_r = torch.rand(1, 1, 256, 256)
img_g = torch.rand(1, 1, 256, 256)
img_b = torch.rand(1, 1, 256, 256)
output = model(img_r, img_g, img_b)
Note that I changed self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) to self.avgpool = nn.AdaptiveAvgPool2d((3, 3)) because the output size of the flattened branches was really big (9216). Now it is 2304 and by concatinating them you get a tensor of size 6912. Hope this helps :)

I simply wrote a vgg16 but it doesn't work

I have written a vgg16 for binary classification by myself with pytorch, I found that its outputs are same for the inputs in same batch, after back propagation the outputs change but still same for data in same batch. I have no idea why this would happen. I tried vgg16 in torchvision and it works perfectly for the same dataset. So I'm sure there are some problems in my model.
Here is the code of my own model:
class VGG16(torch.nn.Module):
def __init__(self):
super(VGG16, self).__init__()
self.conv1 = torch.nn.Conv2d(3, 64, padding=(1,1), kernel_size=(3,3)) #kernel
self.conv2 = torch.nn.Conv2d(64, 64, padding=(1,1), kernel_size=(3,3))
self.conv3 = torch.nn.Conv2d(64, 128, padding=(1,1), kernel_size=(3,3))
self.conv4 = torch.nn.Conv2d(128, 128, padding=(1,1), kernel_size=(3,3))
self.conv5 = torch.nn.Conv2d(128, 256, padding=(1,1), kernel_size=(3,3))
self.conv6 = torch.nn.Conv2d(256, 256, padding=(1,1), kernel_size=(3,3))
self.conv7 = torch.nn.Conv2d(256, 256, padding=(1,1), kernel_size=(3,3))
self.conv8 = torch.nn.Conv2d(256, 512, padding=(1,1) ,kernel_size=(3,3))
self.conv9 = torch.nn.Conv2d(512, 512, padding=(1,1), kernel_size=(3,3))
self.conv10 = torch.nn.Conv2d(512, 512, padding=(1,1), kernel_size=(3,3))
self.conv11 = torch.nn.Conv2d(512, 512, padding=(1,1), kernel_size=(3,3))
self.conv12 = torch.nn.Conv2d(512, 512, padding=(1,1), kernel_size=(3,3))
self.conv13 = torch.nn.Conv2d(512, 512, padding=(1,1), kernel_size=(3,3))
self.pooling = torch.nn.MaxPool2d(2) # pool
self.fc1 = torch.nn.Linear(25088, 4096) # 7 * 7 * 512 = 25088
self.fc2 = torch.nn.Linear(4096, 4096)
self.fc3 = torch.nn.Linear(4096, 2)
self.Avgpool = torch.nn.AdaptiveAvgPool2d((7, 7))
self.ReLU = nn.ReLU(True)
#self.Drop = nn.Dropout()
def forward(self,x):
batch_size = x.size(0)
x = self.conv1(x)
x = self.ReLU(x) #layer1
x = self.pooling(self.ReLU(self.conv2(x))) #layer2
x = self.ReLU(self.conv3(x)) #layer3
x = self.pooling(self.ReLU(self.conv4(x))) #layer4
x = self.ReLU(self.conv5(x)) #layer5
x = self.ReLU(self.conv6(x)) #layer6
x = self.pooling(self.ReLU(self.conv7(x))) #layer7
x = self.ReLU(self.conv8(x)) #layer8
x = self.ReLU(self.conv9(x)) #layer9
x = self.pooling(self.ReLU(self.conv10(x))) #layer10
x = self.ReLU(self.conv11(x)) #layer11
x = self.ReLU(self.conv12(x)) #layer12
x = self.pooling(self.ReLU(self.conv13(x))) #layer13
x = self.Avgpool(x)
#x = x.view(batch_size,-1)
x = torch.flatten(x, 1) #flatten
#print(x.shape) #for Debug
#print(x)
x = self.ReLU(self.fc1(x))
#x = self.Drop(x)
x = self.ReLU(self.fc2(x))
#x = self.Drop(x)
x = self.fc3(x)
#x = F.sigmoid(x)
return x
Here are the situations of outputs of the same batch:
outputs for a batch,
outputs for another batch
This problem has been bothering me for days, I would be most grateful if you could point out my mistakes.
Here are some inputs with label 0:
input1,input2input3
inputs with label 1:
input4,input5input6
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
I faced the same error, try this out, It worked for me, Call this function in __init__

How to solve the dimension error and effectively use Conv2dTranspose?

i have created a discriminator and generator file to implement GAN, however, i am facing this error.
The initial error i was facing was in the main.py file where i am calling the criterion library and passing the output and label. I solved that error using squeeze function, so that the issue of shape was resolved.
Before using squeeze , an error showed that the shapes of output and labels were not matching ( the shapes were (7,1,1,1) and (7) for the output and the label respectively.
import torch
from torch import nn
class generatorG(nn.Module):
def __init__(self):
super(generatorG, self).__init__()
self.t1 = nn.Sequential(
nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size= (4,4), stride = 2,padding = 1),
nn.LeakyReLU(0.2, inplace = True)
)
self.t2 = nn.Sequential(
nn.Conv2d(in_channels= 64, out_channels = 64, kernel_size = (4,4), stride = 2,padding = 1),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.2, inplace = True)
)
self.t3 = nn.Sequential(
nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 4, stride = 2, padding =1),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, inplace = True)
)
self.t4 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(4, 4), stride = 2,padding=1),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2, inplace=True)
)
self.t5 = nn.Sequential(
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2, inplace=True)
)
self.t6 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=4000, kernel_size=(4, 4)),
nn.BatchNorm2d(4000),
nn.ReLU()
)
self.t7 = nn.Sequential(
nn.ConvTranspose2d(in_channels = 512, out_channels = 256, kernel_size =4, stride = 2, padding = 1),
nn.BatchNorm2d(256),
nn.ReLU()
)
self.t8 = nn.Sequential(
nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(128),
nn.ReLU()
)
self.t9 = nn.Sequential(
nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(64),
nn.ReLU()
)
self.t10 = nn.Sequential(
nn.ConvTranspose2d(in_channels=64, out_channels=3, kernel_size=4, stride=2, padding=1),
nn.Tanh()
)
def forward(self, x):
x = self.t1(x)
x = self.t2(x)
x = self.t3(x)
x = self.t4(x)
x = self.t5(x)
x = self.t6(x)
x = self.t7(x)
x = self.t8(x)
x = self.t9(x)
x = self.t10(x)
return x
model = generatorG()
print(model(torch.randn()).shape)
Discriminator File
import torch
from torch import nn
class DiscriminatorD(nn.Module):
def __init__(self):
super(DiscriminatorD, self).__init__()
self.t1 = nn.Sequential(
nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size =4, stride = 2, padding = 1),
nn.LeakyReLU(0.2, inplace = True)
)
self.t2 = nn.Sequential(
nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 4, stride = 2, padding = 1),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, inplace = True)
)
self.t3 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2, inplace=True)
)
self.t4 = nn.Sequential(
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2, inplace=True)
)
self.t5 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=1, kernel_size=4, stride=1, padding=0),
nn.Sigmoid()
)
def forward(self, x):
x = self.t1(x)
x = self.t2(x)
x = self.t3(x)
x = self.t4(x)
x = self.t5(x)
return x
main.py file
from generator import *
from discriminator import *
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable
import utils
epochs = 100
Batch_Size = 64
lr = 0.0002
beta1 = 0.5
over = 4
parser = argparse.ArgumentParser()
parser.add_argument('--dataroot', default = 'dataset/train', help = 'path to dataset')
opt = parser.parse_args()
try:
os.makedirs('result/train/cropped')
os.makedirs('result/train/real')
os.makedirs('result/train/recon')
os.makedirs('model/')
except:
pass
transform = transforms.Compose([transforms.Scale(128),
transforms.CenterCrop(128),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
dataset = dset.ImageFolder(root=opt.dataroot, transform= transform)
assert dataset
dataloader = torch.utils.data.DataLoader(dataset, batch_size=Batch_Size, shuffle=True, num_workers=0)
wtl2 = 0.999
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv')!=-1:
m.weight.data.normal_(0.0,0.2)
elif classname.find('BatchNorm')!=-1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
resume_epoch = 0
netG = generatorG()
netG.apply(weights_init)
netD = DiscriminatorD()
netD.apply(weights_init)
criterion = nn.BCELoss()
criterionMSE = nn.MSELoss()
input_real = torch.FloatTensor(Batch_Size, 3, 128, 128)
input_cropped = torch.FloatTensor(Batch_Size, 3, 128, 128)
label = torch.FloatTensor(Batch_Size)
real_label = 1
fake_label = 0
real_center = torch.FloatTensor(Batch_Size, 3, 64, 64)
input_real = Variable(input_real)
input_cropped = Variable(input_cropped)
label = Variable(label)
real_center = Variable(real_center)
optimizerD = optim.Adam(netD.parameters(), lr = lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr = lr, betas = (beta1, 0.999))
over = 4
for epoch in range(resume_epoch, epochs):
for i, data in enumerate(dataloader, 0):
real_cpu, _ = data
real_center_cpu = real_cpu[:,:,int(128/4):int(128/4)+int(128/2),int(128/4):int(128/4)+int(128/2)]
batch_size = real_cpu.size(0)
with torch.no_grad():
input_real.resize_(real_cpu.size()).copy_(real_cpu)
input_cropped.resize_(real_cpu.size()).copy_(real_cpu)
real_center.resize_(real_center_cpu.size()).copy_(real_center_cpu)
input_cropped[:, 0, int(128 / 4 + over):int(128 / 4 + 128 / 2 - over),int(128 / 4 + over):int(128 / 4 + 128 / 2 - over)] = 2 * 117.0 / 255.0 - 1.0
input_cropped[:, 1, int(128 / 4 + over):int(128 / 4 + 128 / 2 - over),int(128 / 4 + over):int(128 / 4 + 128 / 2 - over)] = 2 * 104.0 / 255.0 - 1.0
input_cropped[:, 2, int(128 / 4 + over):int(128 / 4 + 128 / 2 - over),int(128 / 4 + over):int(128 / 4 + 128 / 2 - over)] = 2 * 123.0 / 255.0 - 1.0
netD.zero_grad()
with torch.no_grad():
label.resize_(batch_size).fill_(real_label)
output = netD(real_center)
# output = torch.unsqueeze(output[0, 1)
output = torch.squeeze(output, 1)
output = torch.squeeze(output, 1)
output = torch.squeeze(output, 1)
print(output.shape)
# label = label.unsqueeze(1)
# label = label.unsqueeze(1)
# label = label.unsqueeze(1)
print(label.shape)
errD_real = criterion(output, label)
errD_real.backward()
D_x = output.data.mean()
print(input_cropped.shape)
fake = netG(input_cropped)
label.data.fill_(fake_label)
output = netD(fake.detach())
errD_fake = criterion(output, label)
errD_fake.backward()
D_G_z1 = output.data.mean()
errD = errD_real + errD_fake
optimizerD.step()
netG.zero_grad()
label.data.fill_(real_label) # fake labels are real for generator cost
output = netD(fake)
errG_D = criterion(output, label)
wtl2Matrix = real_center.clone()
wtl2Matrix.data.fill_(wtl2 * 10)
wtl2Matrix.data[:, :, int(over):int(128 / 2 - over), int(over):int(128 / 2 - over)] = wtl2
errG_l2 = (fake - real_center).pow(2)
errG_l2 = errG_l2 * wtl2Matrix
errG_l2 = errG_l2.mean()
errG = (1 - wtl2) * errG_D + wtl2 * errG_l2
errG.backward()
D_G_z2 = output.data.mean()
optimizerG.step()
print('[%d / %d][%d / %d] Loss_D: %.4f Loss_G: %.4f / %.4f l_D(x): %.4f l_D(G(z)): %.4f'
% (epoch, epochs, i, len(dataloader),
errD.data, errG_D.data, errG_l2.data, D_x, D_G_z1,))
if i % 100 == 0:
vutils.save_image(real_cpu,
'result/train/real/real_samples_epoch_%03d.png' % (epoch))
vutils.save_image(input_cropped.data,
'result/train/cropped/cropped_samples_epoch_%03d.png' % (epoch))
recon_image = input_cropped.clone()
recon_image.data[:, :, int(128 / 4):int(128 / 4 + 128 / 2), int(128 / 4):int(128 / 4 + 128 / 2)] = fake.data
vutils.save_image(recon_image.data,
'result/train/recon/recon_center_samples_epoch_%03d.png' % (epoch))
utils file
import torch
from PIL import Image
from torch.autograd import Variable
def load_image(filename, size = None, scale = None):
img = Image.open(filename)
if size is not None:
img = img.resize((size, size), Image.ANTIALIAS)
elif scale is not None:
img = img.resize((int(img.size[0]/scale), int(img.size[1]/scale)), Image.ANTIALIAS)
return img
def save_image(filename, data):
img = data.clone().add(1).div(2).mul(255).clamp(0,255).numpy()
img = img.transpose(1,2,0).astype('uint8')
img = Image.fromarray(img)
img.save(filename)
def gram_matrix(y):
(b, ch, h, w) = y.size()
features = y.view(b, ch, w*h)
features_t = features.transpose(1,2)
gram = features.bmm(features_t)/(ch*h*w)
return gram
def normalize_batch(batch):
mean = batch.data.new(batch.data.size())
std = batch.data.new(batch.data.size())
mean[:, 0, :, :] = 0.485
mean[:, 1, :, :] = 0.456
mean[:, 2, :, :] = 0.406
std[:, 0, :, :] = 0.229
std[:, 1, :, :] = 0.224
std[:, 2, :, :] = 0.225
batch = torch.div(batch, 255.0)
batch -= Variable(mean)
# batch /= Variable(std)
batch = torch.div(batch, Variable(std))
return batch
Error message
(impaint_env) vivek#Viveks-MacBook-Pro image_impainter % python main.py
/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torchvision/transforms/transforms.py:310: UserWarning: The use of the transforms.Scale transform is deprecated, please use transforms.Resize instead.
warnings.warn("The use of the transforms.Scale transform is deprecated, " +
torch.Size([7])
torch.Size([7])
torch.Size([7, 3, 128, 128])
Traceback (most recent call last):
File "main.py", line 114, in <module>
fake = netG(input_cropped)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/Users/vivek/DSwork/image_impainter/generator.py", line 70, in forward
x = self.t7(x)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/container.py", line 139, in forward
input = module(input)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 916, in forward
return F.conv_transpose2d(
RuntimeError: Given transposed=1, weight of size [512, 256, 4, 4], expected input[7, 4000, 1, 1] to have 512 channels, but got 4000 channels instead
You have a "gap" between layer t6 and t7 of your generatorG:
# ...
self.t6 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=4000, kernel_size=(4, 4)),
nn.BatchNorm2d(4000),
nn.ReLU()
)
self.t7 = nn.Sequential(
nn.ConvTranspose2d(in_channels = 512, out_channels = 256, kernel_size =4, stride = 2, padding = 1),
nn.BatchNorm2d(256),
nn.ReLU()
)
# ...
Your t6 layer expects the input to have 512 channels and outputs a tensor with 4000 channels. However, the next layer, t7, expects the input to have only 512 channels.
You need to adjust either t6 and t7 such that t6 will output exactly the same number of channels t7 is expecting. That is t6's out_channles ,just equal t7's in_channels.

Getting RuntimeError: Graph disconnected: cannot obtain value for tensor

I want to create a custom model of ResNet101 by retrieving one of its layer called 'avg_pool' and convert it to my custom layer. I have done this similar thing another pre-trained Imagnet model named resnet50, but getting an error in Resnet101. I am a newbie in transfer learning, please point me what is my mistake
def resnet101_model(weights_path=None):
eps = 1.1e-5
# Handle Dimension Ordering for different backends
global bn_axis
if K.image_dim_ordering() == 'tf':
bn_axis = 3
img_input = Input(shape=(224, 224, 3), name='data')
else:
bn_axis = 1
img_input = Input(shape=(3, 224, 224), name='data')
x = ZeroPadding2D((3, 3), name='conv1_zeropadding')(img_input)
x = Convolution2D(64, 7, 7, subsample=(2, 2), name='conv1', bias=False)(x)
x = BatchNormalization(epsilon=eps, axis=bn_axis, name='bn_conv1')(x)
x = Scale(axis=bn_axis, name='scale_conv1')(x)
x = Activation('relu', name='conv1_relu')(x)
x = MaxPooling2D((3, 3), strides=(2, 2), name='pool1')(x)
x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
for i in range(1,3):
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b'+str(i))
x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
for i in range(1,23):
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b'+str(i))
x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
x_fc = AveragePooling2D((7, 7), name='avg_pool')(x)
x_fc = Flatten()(x_fc)
x_fc = Dense(1000, activation='softmax', name='fc1000')(x_fc)
model = Model(img_input, x_fc)
# load weights
if weights_path:
model.load_weights(weights_path, by_name=True)
return model
im = cv2.resize(cv2.imread('human.jpg'), (224, 224)).astype(np.float32)
# Remove train image mean
im[:,:,0] -= 103.939
im[:,:,1] -= 116.779
im[:,:,2] -= 123.68
# Transpose image dimensions (Theano uses the channels as the 1st dimension)
if K.image_dim_ordering() == 'th':
im = im.transpose((2,0,1))
weights_path = 'resnet101_weights_th.h5'
else:
weights_path = 'resnet101_weights_tf.h5'
im = np.expand_dims(im, axis=0)
image_input = Input(shape=(224, 224, 3))
model = resnet101_model(weights_path)
model.summary()
last_layer = model.get_layer('avg_pool').output
x = Flatten(name='flatten')(last_layer)
out = Dense(num_classes, activation='softmax', name='fc1000')(x)
custom_resnet_model = Model(inputs=image_input,outputs= out)
custom_resnet_model.summary()
Graph disconnected happens, when your inputs and outputs are not connected. In your case image_input is not connected to out. You should pass it through Resnet model and then it should work

CNN for cifar10 dataset in Tensorflow

I am trying to replicate results obtained by a convolutional neural network for CIFAR10 using Tensorflow, however after some epochs (~60 epochs) my performance (accuracy) is around 10%, so I do not if the CNN is well trained?
This code is based on Deep mnist for experts https://www.tensorflow.org/get_started/mnist/pros , however in Cifar10 it does not work
import numpy as np
import tensorflow as tf
def unpickle(file):
import cPickle
fo = open(file, 'rb')
dict = cPickle.load(fo)
fo.close()
return dict
#unpacking training and test data
b1 = unpickle("~/cifar-10-batches-py/data_batch_1")
b2 = unpickle("~/cifar-10-batches-py/data_batch_2")
b3 = unpickle("~/cifar-10-batches-py/data_batch_3")
b4 = unpickle("~/cifar-10-batches-py/data_batch_4")
b5 = unpickle("~/cifar-10-batches-py/data_batch_5")
test = unpickle("~/cifar-10-batches-py/test_batch")
#Preparing test data
test_data = test['data']
test_label = test['labels']
#Preparing training data
train_data = np.concatenate([b1['data'],b2['data'],b3['data'],b4['data'],b5['data']],axis=0)
train_label = np.concatenate([b1['labels'],b2['labels'],b3['labels'],b4['labels'],b5['labels']],axis=0)
#Reshaping data
train_data = np.reshape(train_data,[50000,32,32,3])
test_data = np.reshape(test_data,[10000,32,32,3])
batch_size = 100
image_width = 32
image_height = 32
channels = 3
#Constructing Graph
x = tf.placeholder(tf.float32, [None, image_width, image_height, channels])#Training Data
y = tf.placeholder(tf.int32, [None])
one_hot = tf.one_hot(y,depth=10)#Converting in one hot vectors
#Constructing CNN Layers
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
#Given an input tensor of shape [batch, in_height, in_width, in_channels] and a filter / kernel tensor of shape [filter_height, filter_width, in_channels, out_channels], taken from: http://textminingonline.com/dive-into-tensorflow-part-v-deep-mnist
W_conv1 = weight_variable([7, 7, 3, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 32])
b_conv2 = bias_variable([32])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_conv3 = weight_variable([5, 5, 32, 64])
b_conv3 = bias_variable([64])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
#Constructing MLP layers
W_fc1 = weight_variable([8 * 8 * 64, 64])
b_fc1 = bias_variable([64])
h_pool3_flat = tf.reshape(h_conv3, [-1, 8*8*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
W_fc2 = weight_variable([64, 10])
b_fc2 = bias_variable([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc1, W_fc2) + b_fc2)
#Computing Cost function
cross_entropy = -tf.reduce_sum(one_hot*tf.log(tf.clip_by_value(y_conv,1e-10,1e20)))
train_step = tf.train.MomentumOptimizer(learning_rate = 0.0001, momentum = 0.9).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(one_hot,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init = tf.initialize_all_variables()
sess = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=16))
sess.run(init)
epochs = 100
b_per = 0
row = []
for e in range(epochs):
print( "epoch", e)
avg_cost = 0
#foreach batch
for j in range(int(train_data.shape[0]/batch_size)):
subset=range((j*batch_size),((j+1)*batch_size))
data = train_data[subset,:,:,:]
label = train_label[subset]
_,c = sess.run([train_step,cross_entropy], feed_dict={x: data, y: label})
avg_cost += c / data.shape[0]
#print(avg_cost)
b_per = b_per + 1
if b_per%10==0 :
row.append(sess.run(accuracy, feed_dict={x: test_data, y: test_label }))
print(row[-1])
It is wrong in data reshape part! It should be,
# Reshaping data
train_data = train_data.reshape(50000, 3, 32, 32).transpose(
0, 2, 3, 1).astype("uint8")
test_data = test_data.reshape(10000, 3, 32, 32).transpose(
0, 2, 3, 1).astype("uint8")

Resources