Currently, I'm doing research to create object detection model using YOLOv2 from scratch. I have target data with shape (13, 13, 32), where 32 is become from 4 anchor box, 5 prediction value (confidence_score, delta_x, delta_y, delta_w, delta_h) and 3 class probability (my dataset has 3 class).
I'm stuck in a loss function and have built and tried several versions, and still got loss with nan value.
This is the code :
class YOLOLoss(tf.keras.losses.Loss):
def __init__(self, anchor_box, lambda_coord, lambda_noobj, name='yolo_loss'):
super(YOLOLoss, self).__init__(name=name)
self.anchor_box = anchor_box
self.lambda_coord = lambda_coord
self.lambda_noobj = lambda_noobj
cy, cx = tf.meshgrid(tf.range(13), tf.range(13))
self.cell = tf.stack([cx, cy], axis = -1)
self.cell = tf.expand_dims(self.cell, axis = -2)
self.cell = tf.tile(self.cell, [1, 1, 4, 1])
self.cell = tf.cast(self.cell, tf.float32)
def call(self, y_true, y_pred):
y_true = tf.reshape(y_true, (-1, 13, 13, 4, 8))
y_pred = tf.reshape(y_pred, (-1, 13, 13, 4, 8))
true_conf = y_true[..., 0]
pred_conf = y_pred[..., 0]
true_coord = y_true[..., 1:5]
pred_coord = y_pred[..., 1:5]
true_prob = y_true[..., 5:]
pred_prob = y_pred[..., 5:]
objectness = tf.where(true_conf == 1, 1., 0.)
ious = self.iou(true_coord, pred_coord)
coord_loss = tf.reduce_sum(tf.square(pred_coord - true_coord), axis = -1)
coord_loss = self.lambda_coord * tf.reduce_sum(objectness * coord_loss)
object_loss = tf.reduce_sum(objectness * tf.square(tf.math.sigmoid(pred_conf) - ious))
no_object_loss = self.lambda_noobj * tf.reduce_sum((1 - objectness) * tf.square((tf.math.sigmoid(pred_conf) - 0)))
class_loss = tf.reduce_sum(tf.square(tf.nn.softmax(pred_prob) - true_prob), axis = -1)
class_loss = tf.reduce_sum(objectness * class_loss)
print('')
print(tf.math.sigmoid(pred_conf))
print(f'COORDINATE LOSS\t: {coord_loss.numpy()}')
print(f'OBJECT LOSS\t: {object_loss.numpy()}')
print(f'NO OBJECT LOSS\t: {no_object_loss.numpy()}')
print(f'CLASS LOSS\t: {class_loss.numpy()}')
print('')
total_loss = coord_loss + object_loss + no_object_loss + class_loss
return total_loss
def convXY(self, delta_xy):
xy_grid = delta_xy * self.anchor_box + 0.5
xy = 32 * xy_grid + 32 * self.cell
return tf.round(xy)
def convWH(self, delta_wh):
wh_grid = self.anchor_box * tf.math.exp(delta_wh)
wh = wh_grid * 32
return tf.round(wh)
def iou(self, true_coord, pred_coord):
true_delta_xy = true_coord[..., :2]
pred_delta_xy = pred_coord[..., :2]
true_delta_wh = true_coord[..., 2:]
pred_delta_wh = pred_coord[..., 2:]
true_xy = self.convXY(true_delta_xy)
true_wh = self.convWH(true_delta_wh)
pred_xy = self.convXY(pred_delta_xy)
pred_wh = self.convWH(pred_delta_wh)
x1, y1 = true_xy[..., 0], true_xy[..., 1]
w1, h1 = true_wh[..., 0], true_wh[..., 1]
x2, y2 = pred_xy[..., 0], pred_xy[..., 1]
w2, h2 = pred_wh[..., 0], pred_wh[..., 1]
intersection = tf.math.minimum(x1 + w1, x2 + w2) - tf.math.minimum(x1, x2)
intersection *= tf.math.minimum(y1 + h1, y2 + h2) - tf.math.minimum(y1, y2)
area1 = w1 * h1
area2 = w2 * h2
union = area1 + area2 - intersection
iou = intersection / union
return iou
and this is the Darknet-19 architecture that I created, I don't know if this is correct or not :
input_layer = tf.keras.layers.Input((416, 416, 3))
# Convolution 1
x = tf.keras.layers.Conv2D(32, (3, 3), padding = 'same', name = 'conv_1')(input_layer)
x = tf.keras.layers.BatchNormalization(name = 'norm_1')(x)
x = tf.keras.layers.LeakyReLU(alpha = 0.1)(x)
#Maxpool 1
x = tf.keras.layers.MaxPooling2D((2, 2), strides = 2)(x)
# Convolution 2
x = tf.keras.layers.Conv2D(64, (3, 3), padding = 'same', name = 'conv_2')(x)
x = tf.keras.layers.BatchNormalization(name = 'norm_2')(x)
x = tf.keras.layers.LeakyReLU(alpha = 0.1)(x)
#Maxpool 2
x = tf.keras.layers.MaxPooling2D((2, 2), strides = 2)(x)
# Convolution 3
x = tf.keras.layers.Conv2D(128, (3, 3), padding = 'same', name = 'conv_3')(x)
x = tf.keras.layers.BatchNormalization(name = 'norm_3')(x)
x = tf.keras.layers.LeakyReLU(alpha = 0.1)(x)
# Convolution 4
x = tf.keras.layers.Conv2D(64, (1, 1), padding = 'same', name = 'conv_4')(x)
x = tf.keras.layers.BatchNormalization(name = 'norm_4')(x)
x = tf.keras.layers.LeakyReLU(alpha = 0.1)(x)
# Convolution 5
x = tf.keras.layers.Conv2D(128, (3, 3), padding = 'same', name = 'conv_5')(x)
x = tf.keras.layers.BatchNormalization(name = 'norm_5')(x)
x = tf.keras.layers.LeakyReLU(alpha = 0.1)(x)
#Maxpool 3
x = tf.keras.layers.MaxPooling2D((2, 2), strides = 2)(x)
# Convolution 6
x = tf.keras.layers.Conv2D(256, (3, 3), padding = 'same', name = 'conv_6')(x)
x = tf.keras.layers.BatchNormalization(name = 'norm_6')(x)
x = tf.keras.layers.LeakyReLU(alpha = 0.1)(x)
# Convolution 7
x = tf.keras.layers.Conv2D(128, (1, 1), padding = 'same', name = 'conv_7')(x)
x = tf.keras.layers.BatchNormalization(name = 'norm_7')(x)
x = tf.keras.layers.LeakyReLU(alpha = 0.1)(x)
# Convolution 8
x = tf.keras.layers.Conv2D(256, (3, 3), padding = 'same', name = 'conv_8')(x)
x = tf.keras.layers.BatchNormalization(name = 'norm_8')(x)
x = tf.keras.layers.LeakyReLU(alpha = 0.1)(x)
#Maxpool 4
x = tf.keras.layers.MaxPooling2D((2, 2), strides = 2)(x)
# Convolution 9
x = tf.keras.layers.Conv2D(512, (3, 3), padding = 'same', name = 'conv_9')(x)
x = tf.keras.layers.BatchNormalization(name = 'norm_9')(x)
x = tf.keras.layers.LeakyReLU(alpha = 0.1)(x)
# Convolution 10
x = tf.keras.layers.Conv2D(256, (1, 1), padding = 'same', name = 'conv_10')(x)
x = tf.keras.layers.BatchNormalization(name = 'norm_10')(x)
x = tf.keras.layers.LeakyReLU(alpha = 0.1)(x)
# Convolution 11
x = tf.keras.layers.Conv2D(512, (3, 3), padding = 'same', name = 'conv_11')(x)
x = tf.keras.layers.BatchNormalization(name = 'norm_11')(x)
x = tf.keras.layers.LeakyReLU(alpha = 0.1)(x)
# Convolution 12
x = tf.keras.layers.Conv2D(256, (1, 1), padding = 'same', name = 'conv_12')(x)
x = tf.keras.layers.BatchNormalization(name = 'norm_12')(x)
x = tf.keras.layers.LeakyReLU(alpha = 0.1)(x)
# Convolution 13
x = tf.keras.layers.Conv2D(512, (3, 3), padding = 'same', name = 'conv_13')(x)
x = tf.keras.layers.BatchNormalization(name = 'norm_13')(x)
x = tf.keras.layers.LeakyReLU(alpha = 0.1)(x)
#Maxpool 5
x = tf.keras.layers.MaxPooling2D((2, 2), strides = 2)(x)
# Convolution 14
x = tf.keras.layers.Conv2D(1024, (3, 3), padding = 'same', name = 'conv_14')(x)
x = tf.keras.layers.BatchNormalization(name = 'norm_14')(x)
x = tf.keras.layers.LeakyReLU(alpha = 0.1)(x)
# Convolution 15
x = tf.keras.layers.Conv2D(512, (3, 3), padding = 'same', name = 'conv_15')(x)
x = tf.keras.layers.BatchNormalization(name = 'norm_15')(x)
x = tf.keras.layers.LeakyReLU(alpha = 0.1)(x)
# Convolution 16
x = tf.keras.layers.Conv2D(1024, (3, 3), padding = 'same', name = 'conv_16')(x)
x = tf.keras.layers.BatchNormalization(name = 'norm_16')(x)
x = tf.keras.layers.LeakyReLU(alpha = 0.1)(x)
# Convolution 17
x = tf.keras.layers.Conv2D(512, (3, 3), padding = 'same', name = 'conv_17')(x)
x = tf.keras.layers.BatchNormalization(name = 'norm_17')(x)
x = tf.keras.layers.LeakyReLU(alpha = 0.1)(x)
# Convolution 18
x = tf.keras.layers.Conv2D(1024, (3, 3), padding = 'same', name = 'conv_18')(x)
x = tf.keras.layers.BatchNormalization(name = 'norm_18')(x)
x = tf.keras.layers.LeakyReLU(alpha = 0.1)(x)
# Convolution 19
output = tf.keras.layers.Conv2D(32, (1, 1), padding = 'same', activation = 'linear', name = 'conv_19')(x)
for the optimizer I use SGD like this :
optimizer = tf.keras.optimizers.SGD(
learning_rate = 0.001,
momentum = 0.9,
decay = 0.0005
)
model.compile(
optimizer = optimizer,
loss = YOLOLoss(anchor_box = anchor_grid, lambda_coord = 5.0, lambda_noobj = 0.5),
metrics = ['accuracy'],
run_eagerly = True
)
when I try to train a model, I get nan for loss
enter image description here
Please someone tell me what's wrong with my code. I Appreciate it! :)
You can see the full jupyter notebook right here : https://colab.research.google.com/drive/19T3geZakP2Wc6oaEqCJX_3tHwBMt60qG?usp=sharing
I try to train YOLOv2 object detection model, I hope the model have a good accuracy, but when I try to train the model I got the loss with nan value.
Related
I have written a vgg16 for binary classification by myself with pytorch, I found that its outputs are same for the inputs in same batch, after back propagation the outputs change but still same for data in same batch. I have no idea why this would happen. I tried vgg16 in torchvision and it works perfectly for the same dataset. So I'm sure there are some problems in my model.
Here is the code of my own model:
class VGG16(torch.nn.Module):
def __init__(self):
super(VGG16, self).__init__()
self.conv1 = torch.nn.Conv2d(3, 64, padding=(1,1), kernel_size=(3,3)) #kernel
self.conv2 = torch.nn.Conv2d(64, 64, padding=(1,1), kernel_size=(3,3))
self.conv3 = torch.nn.Conv2d(64, 128, padding=(1,1), kernel_size=(3,3))
self.conv4 = torch.nn.Conv2d(128, 128, padding=(1,1), kernel_size=(3,3))
self.conv5 = torch.nn.Conv2d(128, 256, padding=(1,1), kernel_size=(3,3))
self.conv6 = torch.nn.Conv2d(256, 256, padding=(1,1), kernel_size=(3,3))
self.conv7 = torch.nn.Conv2d(256, 256, padding=(1,1), kernel_size=(3,3))
self.conv8 = torch.nn.Conv2d(256, 512, padding=(1,1) ,kernel_size=(3,3))
self.conv9 = torch.nn.Conv2d(512, 512, padding=(1,1), kernel_size=(3,3))
self.conv10 = torch.nn.Conv2d(512, 512, padding=(1,1), kernel_size=(3,3))
self.conv11 = torch.nn.Conv2d(512, 512, padding=(1,1), kernel_size=(3,3))
self.conv12 = torch.nn.Conv2d(512, 512, padding=(1,1), kernel_size=(3,3))
self.conv13 = torch.nn.Conv2d(512, 512, padding=(1,1), kernel_size=(3,3))
self.pooling = torch.nn.MaxPool2d(2) # pool
self.fc1 = torch.nn.Linear(25088, 4096) # 7 * 7 * 512 = 25088
self.fc2 = torch.nn.Linear(4096, 4096)
self.fc3 = torch.nn.Linear(4096, 2)
self.Avgpool = torch.nn.AdaptiveAvgPool2d((7, 7))
self.ReLU = nn.ReLU(True)
#self.Drop = nn.Dropout()
def forward(self,x):
batch_size = x.size(0)
x = self.conv1(x)
x = self.ReLU(x) #layer1
x = self.pooling(self.ReLU(self.conv2(x))) #layer2
x = self.ReLU(self.conv3(x)) #layer3
x = self.pooling(self.ReLU(self.conv4(x))) #layer4
x = self.ReLU(self.conv5(x)) #layer5
x = self.ReLU(self.conv6(x)) #layer6
x = self.pooling(self.ReLU(self.conv7(x))) #layer7
x = self.ReLU(self.conv8(x)) #layer8
x = self.ReLU(self.conv9(x)) #layer9
x = self.pooling(self.ReLU(self.conv10(x))) #layer10
x = self.ReLU(self.conv11(x)) #layer11
x = self.ReLU(self.conv12(x)) #layer12
x = self.pooling(self.ReLU(self.conv13(x))) #layer13
x = self.Avgpool(x)
#x = x.view(batch_size,-1)
x = torch.flatten(x, 1) #flatten
#print(x.shape) #for Debug
#print(x)
x = self.ReLU(self.fc1(x))
#x = self.Drop(x)
x = self.ReLU(self.fc2(x))
#x = self.Drop(x)
x = self.fc3(x)
#x = F.sigmoid(x)
return x
Here are the situations of outputs of the same batch:
outputs for a batch,
outputs for another batch
This problem has been bothering me for days, I would be most grateful if you could point out my mistakes.
Here are some inputs with label 0:
input1,input2input3
inputs with label 1:
input4,input5input6
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
I faced the same error, try this out, It worked for me, Call this function in __init__
i have created a discriminator and generator file to implement GAN, however, i am facing this error.
The initial error i was facing was in the main.py file where i am calling the criterion library and passing the output and label. I solved that error using squeeze function, so that the issue of shape was resolved.
Before using squeeze , an error showed that the shapes of output and labels were not matching ( the shapes were (7,1,1,1) and (7) for the output and the label respectively.
import torch
from torch import nn
class generatorG(nn.Module):
def __init__(self):
super(generatorG, self).__init__()
self.t1 = nn.Sequential(
nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size= (4,4), stride = 2,padding = 1),
nn.LeakyReLU(0.2, inplace = True)
)
self.t2 = nn.Sequential(
nn.Conv2d(in_channels= 64, out_channels = 64, kernel_size = (4,4), stride = 2,padding = 1),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.2, inplace = True)
)
self.t3 = nn.Sequential(
nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 4, stride = 2, padding =1),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, inplace = True)
)
self.t4 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(4, 4), stride = 2,padding=1),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2, inplace=True)
)
self.t5 = nn.Sequential(
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2, inplace=True)
)
self.t6 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=4000, kernel_size=(4, 4)),
nn.BatchNorm2d(4000),
nn.ReLU()
)
self.t7 = nn.Sequential(
nn.ConvTranspose2d(in_channels = 512, out_channels = 256, kernel_size =4, stride = 2, padding = 1),
nn.BatchNorm2d(256),
nn.ReLU()
)
self.t8 = nn.Sequential(
nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(128),
nn.ReLU()
)
self.t9 = nn.Sequential(
nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(64),
nn.ReLU()
)
self.t10 = nn.Sequential(
nn.ConvTranspose2d(in_channels=64, out_channels=3, kernel_size=4, stride=2, padding=1),
nn.Tanh()
)
def forward(self, x):
x = self.t1(x)
x = self.t2(x)
x = self.t3(x)
x = self.t4(x)
x = self.t5(x)
x = self.t6(x)
x = self.t7(x)
x = self.t8(x)
x = self.t9(x)
x = self.t10(x)
return x
model = generatorG()
print(model(torch.randn()).shape)
Discriminator File
import torch
from torch import nn
class DiscriminatorD(nn.Module):
def __init__(self):
super(DiscriminatorD, self).__init__()
self.t1 = nn.Sequential(
nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size =4, stride = 2, padding = 1),
nn.LeakyReLU(0.2, inplace = True)
)
self.t2 = nn.Sequential(
nn.Conv2d(in_channels = 64, out_channels = 128, kernel_size = 4, stride = 2, padding = 1),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2, inplace = True)
)
self.t3 = nn.Sequential(
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(256),
nn.LeakyReLU(0.2, inplace=True)
)
self.t4 = nn.Sequential(
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2, inplace=True)
)
self.t5 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=1, kernel_size=4, stride=1, padding=0),
nn.Sigmoid()
)
def forward(self, x):
x = self.t1(x)
x = self.t2(x)
x = self.t3(x)
x = self.t4(x)
x = self.t5(x)
return x
main.py file
from generator import *
from discriminator import *
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable
import utils
epochs = 100
Batch_Size = 64
lr = 0.0002
beta1 = 0.5
over = 4
parser = argparse.ArgumentParser()
parser.add_argument('--dataroot', default = 'dataset/train', help = 'path to dataset')
opt = parser.parse_args()
try:
os.makedirs('result/train/cropped')
os.makedirs('result/train/real')
os.makedirs('result/train/recon')
os.makedirs('model/')
except:
pass
transform = transforms.Compose([transforms.Scale(128),
transforms.CenterCrop(128),
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
dataset = dset.ImageFolder(root=opt.dataroot, transform= transform)
assert dataset
dataloader = torch.utils.data.DataLoader(dataset, batch_size=Batch_Size, shuffle=True, num_workers=0)
wtl2 = 0.999
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv')!=-1:
m.weight.data.normal_(0.0,0.2)
elif classname.find('BatchNorm')!=-1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
resume_epoch = 0
netG = generatorG()
netG.apply(weights_init)
netD = DiscriminatorD()
netD.apply(weights_init)
criterion = nn.BCELoss()
criterionMSE = nn.MSELoss()
input_real = torch.FloatTensor(Batch_Size, 3, 128, 128)
input_cropped = torch.FloatTensor(Batch_Size, 3, 128, 128)
label = torch.FloatTensor(Batch_Size)
real_label = 1
fake_label = 0
real_center = torch.FloatTensor(Batch_Size, 3, 64, 64)
input_real = Variable(input_real)
input_cropped = Variable(input_cropped)
label = Variable(label)
real_center = Variable(real_center)
optimizerD = optim.Adam(netD.parameters(), lr = lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr = lr, betas = (beta1, 0.999))
over = 4
for epoch in range(resume_epoch, epochs):
for i, data in enumerate(dataloader, 0):
real_cpu, _ = data
real_center_cpu = real_cpu[:,:,int(128/4):int(128/4)+int(128/2),int(128/4):int(128/4)+int(128/2)]
batch_size = real_cpu.size(0)
with torch.no_grad():
input_real.resize_(real_cpu.size()).copy_(real_cpu)
input_cropped.resize_(real_cpu.size()).copy_(real_cpu)
real_center.resize_(real_center_cpu.size()).copy_(real_center_cpu)
input_cropped[:, 0, int(128 / 4 + over):int(128 / 4 + 128 / 2 - over),int(128 / 4 + over):int(128 / 4 + 128 / 2 - over)] = 2 * 117.0 / 255.0 - 1.0
input_cropped[:, 1, int(128 / 4 + over):int(128 / 4 + 128 / 2 - over),int(128 / 4 + over):int(128 / 4 + 128 / 2 - over)] = 2 * 104.0 / 255.0 - 1.0
input_cropped[:, 2, int(128 / 4 + over):int(128 / 4 + 128 / 2 - over),int(128 / 4 + over):int(128 / 4 + 128 / 2 - over)] = 2 * 123.0 / 255.0 - 1.0
netD.zero_grad()
with torch.no_grad():
label.resize_(batch_size).fill_(real_label)
output = netD(real_center)
# output = torch.unsqueeze(output[0, 1)
output = torch.squeeze(output, 1)
output = torch.squeeze(output, 1)
output = torch.squeeze(output, 1)
print(output.shape)
# label = label.unsqueeze(1)
# label = label.unsqueeze(1)
# label = label.unsqueeze(1)
print(label.shape)
errD_real = criterion(output, label)
errD_real.backward()
D_x = output.data.mean()
print(input_cropped.shape)
fake = netG(input_cropped)
label.data.fill_(fake_label)
output = netD(fake.detach())
errD_fake = criterion(output, label)
errD_fake.backward()
D_G_z1 = output.data.mean()
errD = errD_real + errD_fake
optimizerD.step()
netG.zero_grad()
label.data.fill_(real_label) # fake labels are real for generator cost
output = netD(fake)
errG_D = criterion(output, label)
wtl2Matrix = real_center.clone()
wtl2Matrix.data.fill_(wtl2 * 10)
wtl2Matrix.data[:, :, int(over):int(128 / 2 - over), int(over):int(128 / 2 - over)] = wtl2
errG_l2 = (fake - real_center).pow(2)
errG_l2 = errG_l2 * wtl2Matrix
errG_l2 = errG_l2.mean()
errG = (1 - wtl2) * errG_D + wtl2 * errG_l2
errG.backward()
D_G_z2 = output.data.mean()
optimizerG.step()
print('[%d / %d][%d / %d] Loss_D: %.4f Loss_G: %.4f / %.4f l_D(x): %.4f l_D(G(z)): %.4f'
% (epoch, epochs, i, len(dataloader),
errD.data, errG_D.data, errG_l2.data, D_x, D_G_z1,))
if i % 100 == 0:
vutils.save_image(real_cpu,
'result/train/real/real_samples_epoch_%03d.png' % (epoch))
vutils.save_image(input_cropped.data,
'result/train/cropped/cropped_samples_epoch_%03d.png' % (epoch))
recon_image = input_cropped.clone()
recon_image.data[:, :, int(128 / 4):int(128 / 4 + 128 / 2), int(128 / 4):int(128 / 4 + 128 / 2)] = fake.data
vutils.save_image(recon_image.data,
'result/train/recon/recon_center_samples_epoch_%03d.png' % (epoch))
utils file
import torch
from PIL import Image
from torch.autograd import Variable
def load_image(filename, size = None, scale = None):
img = Image.open(filename)
if size is not None:
img = img.resize((size, size), Image.ANTIALIAS)
elif scale is not None:
img = img.resize((int(img.size[0]/scale), int(img.size[1]/scale)), Image.ANTIALIAS)
return img
def save_image(filename, data):
img = data.clone().add(1).div(2).mul(255).clamp(0,255).numpy()
img = img.transpose(1,2,0).astype('uint8')
img = Image.fromarray(img)
img.save(filename)
def gram_matrix(y):
(b, ch, h, w) = y.size()
features = y.view(b, ch, w*h)
features_t = features.transpose(1,2)
gram = features.bmm(features_t)/(ch*h*w)
return gram
def normalize_batch(batch):
mean = batch.data.new(batch.data.size())
std = batch.data.new(batch.data.size())
mean[:, 0, :, :] = 0.485
mean[:, 1, :, :] = 0.456
mean[:, 2, :, :] = 0.406
std[:, 0, :, :] = 0.229
std[:, 1, :, :] = 0.224
std[:, 2, :, :] = 0.225
batch = torch.div(batch, 255.0)
batch -= Variable(mean)
# batch /= Variable(std)
batch = torch.div(batch, Variable(std))
return batch
Error message
(impaint_env) vivek#Viveks-MacBook-Pro image_impainter % python main.py
/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torchvision/transforms/transforms.py:310: UserWarning: The use of the transforms.Scale transform is deprecated, please use transforms.Resize instead.
warnings.warn("The use of the transforms.Scale transform is deprecated, " +
torch.Size([7])
torch.Size([7])
torch.Size([7, 3, 128, 128])
Traceback (most recent call last):
File "main.py", line 114, in <module>
fake = netG(input_cropped)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/Users/vivek/DSwork/image_impainter/generator.py", line 70, in forward
x = self.t7(x)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/container.py", line 139, in forward
input = module(input)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/Users/vivek/DSwork/image_impainter/impaint_env/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 916, in forward
return F.conv_transpose2d(
RuntimeError: Given transposed=1, weight of size [512, 256, 4, 4], expected input[7, 4000, 1, 1] to have 512 channels, but got 4000 channels instead
You have a "gap" between layer t6 and t7 of your generatorG:
# ...
self.t6 = nn.Sequential(
nn.Conv2d(in_channels=512, out_channels=4000, kernel_size=(4, 4)),
nn.BatchNorm2d(4000),
nn.ReLU()
)
self.t7 = nn.Sequential(
nn.ConvTranspose2d(in_channels = 512, out_channels = 256, kernel_size =4, stride = 2, padding = 1),
nn.BatchNorm2d(256),
nn.ReLU()
)
# ...
Your t6 layer expects the input to have 512 channels and outputs a tensor with 4000 channels. However, the next layer, t7, expects the input to have only 512 channels.
You need to adjust either t6 and t7 such that t6 will output exactly the same number of channels t7 is expecting. That is t6's out_channles ,just equal t7's in_channels.
I am a freshman in neural network and I have built a vgg16 network.But in every batch all the inputs leads to the same outputs.So I checked the output of every layer and finally found that x=x.view(batch_size,-1) gives the same outputs!I have no idea why this would happen. here are part of my code:
class VGG16(torch.nn.Module):
def __init__(self):
super(VGG16, self).__init__()
self.conv1 = torch.nn.Conv2d(3, 64, padding=1, kernel_size=3) #kernel
self.conv2 = torch.nn.Conv2d(64, 64, padding=1, kernel_size=3)
self.conv3 = torch.nn.Conv2d(64, 128, padding=1, kernel_size=3)
self.conv4 = torch.nn.Conv2d(128, 128, padding=1, kernel_size=3)
self.conv5 = torch.nn.Conv2d(128, 256, padding=1, kernel_size=3)
self.conv6 = torch.nn.Conv2d(256, 256, padding=1, kernel_size=3)
self.conv7 = torch.nn.Conv2d(256, 256, padding=1, kernel_size=3)
self.conv8 = torch.nn.Conv2d(256, 512, padding=1 ,kernel_size=3)
self.conv9 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv10 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv11 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv12 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv13 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.pooling = torch.nn.MaxPool2d(2) #pool
self.fc1 = torch.nn.Linear(25088, 4096) # 7 * 7 * 512 = 25088
self.fc2 = torch.nn.Linear(4096, 4096)
self.fc3 = torch.nn.Linear(4096, 2)
def forward(self,x):
batch_size = x.size(0)
x = F.relu(self.conv1(x)) #layer1
x = self.pooling(F.relu(self.conv2(x))) #layer2
x = F.relu(self.conv3(x)) #layer3
x = self.pooling(F.relu(self.conv4(x))) #layer4
x = F.relu(self.conv5(x)) #layer5
x = F.relu(self.conv6(x)) #layer6
x = self.pooling(F.relu(self.conv7(x))) #layer7
x = F.relu(self.conv8(x)) #layer8
x = F.relu(self.conv9(x)) #layer9
x = self.pooling(F.relu(self.conv10(x))) #layer10
x = F.relu(self.conv11(x)) #layer11
x = F.relu(self.conv12(x)) #layer12
x = self.pooling(F.relu(self.conv13(x))) #layer13
x = x.view(batch_size,-1) #flatten
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
this is the training part:
def train(epoch):
running_loss = 0.0
for batch_idx, data in enumerate(train_loader,0):
inputs, true_labels = data
optimizer.zero_grad() #clear the optimizer to avoid accumulating of grad
#forward
outputs = model(inputs)
loss = criterion(outputs, true_labels)
#backward
loss.backward()
#update
optimizer.step()
running_loss += loss.item()
#output the train result every 10 loop
if (batch_idx + 1) % 10 == 0:
print('[%d %5d] loss: %.3f' %(epoch + 1, batch_idx + 1, running_loss/10 ))
running_loss = 0.0
this is the outputs of layer13(before view):enter image description here
this is the outputs of x.view :enter image description here
I am searching for a long time on net.But no use.Any ideas?
Thanks in advance.
Use of view() method
import torch
torch.tensor([[1,2,3],[4,5,6]]).view(3,2)
#tensor([[1, 2],
[3, 4],
[5, 6]])
Hence no change in tensor value..it will just change its shape
My data has the following shapes:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)
(942, 32, 32, 1) (236, 32, 32, 1) (942, 3, 3) (236, 3, 3)
And whenever I try to run my CNN I get the following error:
from tensorflow.keras import layers
from tensorflow.keras import Model
img_input = layers.Input(shape=(32, 32, 1))
x = layers.Conv2D(16, (3,3), activation='relu', strides = 1, padding = 'same')(img_input)
x = layers.Conv2D(32, (3,3), activation='relu', strides = 2)(x)
x = layers.Conv2D(128, (3,3), activation='relu', strides = 2)(x)
x = layers.MaxPool2D(pool_size=2)(x)
x = layers.Conv2D(3, 3, activation='linear', strides = 2)(x)
output = layers.Flatten()(x)
model = Model(img_input, output)
model.summary()
model.compile(loss='mean_squared_error',optimizer= 'adam', metrics=['mse'])
history = model.fit(X_train,Y_train,validation_data=(X_test, Y_test), epochs = 100,verbose=1)
Error:
InvalidArgumentError: Incompatible shapes: [32,3] vs. [32,3,3]
[[node BroadcastGradientArgs_2 (defined at /usr/local/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1751) ]] [Op:__inference_distributed_function_7567]
Function call stack:
distributed_function
What am I missing here?
you don't handle the dimensionality inside your network properly. Firstly expand the dimension of your y in order to get them in this format (n_sample, 3, 3, 1). At this point adjust the network (I remove flatten and max pooling and adjust the last conv output)
# create dummy data
n_sample = 10
X = np.random.uniform(0,1, (n_sample, 32, 32, 1))
y = np.random.uniform(0,1, (n_sample, 3, 3))
# expand y dim
y = y[...,np.newaxis]
print(X.shape, y.shape)
img_input = Input(shape=(32, 32, 1))
x = Conv2D(16, (3,3), activation='relu', strides = 1, padding = 'same')(img_input)
x = Conv2D(32, (3,3), activation='relu', strides = 2)(x)
x = Conv2D(128, (3,3), activation='relu', strides = 2)(x)
x = Conv2D(1, (3,3), activation='linear', strides = 2)(x)
model = Model(img_input, x)
model.summary()
model.compile(loss='mean_squared_error',optimizer= 'adam', metrics=['mse'])
model.fit(X,y, epochs=3)
I want to create a custom model of ResNet101 by retrieving one of its layer called 'avg_pool' and convert it to my custom layer. I have done this similar thing another pre-trained Imagnet model named resnet50, but getting an error in Resnet101. I am a newbie in transfer learning, please point me what is my mistake
def resnet101_model(weights_path=None):
eps = 1.1e-5
# Handle Dimension Ordering for different backends
global bn_axis
if K.image_dim_ordering() == 'tf':
bn_axis = 3
img_input = Input(shape=(224, 224, 3), name='data')
else:
bn_axis = 1
img_input = Input(shape=(3, 224, 224), name='data')
x = ZeroPadding2D((3, 3), name='conv1_zeropadding')(img_input)
x = Convolution2D(64, 7, 7, subsample=(2, 2), name='conv1', bias=False)(x)
x = BatchNormalization(epsilon=eps, axis=bn_axis, name='bn_conv1')(x)
x = Scale(axis=bn_axis, name='scale_conv1')(x)
x = Activation('relu', name='conv1_relu')(x)
x = MaxPooling2D((3, 3), strides=(2, 2), name='pool1')(x)
x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
for i in range(1,3):
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b'+str(i))
x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
for i in range(1,23):
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b'+str(i))
x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
x_fc = AveragePooling2D((7, 7), name='avg_pool')(x)
x_fc = Flatten()(x_fc)
x_fc = Dense(1000, activation='softmax', name='fc1000')(x_fc)
model = Model(img_input, x_fc)
# load weights
if weights_path:
model.load_weights(weights_path, by_name=True)
return model
im = cv2.resize(cv2.imread('human.jpg'), (224, 224)).astype(np.float32)
# Remove train image mean
im[:,:,0] -= 103.939
im[:,:,1] -= 116.779
im[:,:,2] -= 123.68
# Transpose image dimensions (Theano uses the channels as the 1st dimension)
if K.image_dim_ordering() == 'th':
im = im.transpose((2,0,1))
weights_path = 'resnet101_weights_th.h5'
else:
weights_path = 'resnet101_weights_tf.h5'
im = np.expand_dims(im, axis=0)
image_input = Input(shape=(224, 224, 3))
model = resnet101_model(weights_path)
model.summary()
last_layer = model.get_layer('avg_pool').output
x = Flatten(name='flatten')(last_layer)
out = Dense(num_classes, activation='softmax', name='fc1000')(x)
custom_resnet_model = Model(inputs=image_input,outputs= out)
custom_resnet_model.summary()
Graph disconnected happens, when your inputs and outputs are not connected. In your case image_input is not connected to out. You should pass it through Resnet model and then it should work