I am attempting to create a near identical model architecture to AlexNet, except each channel (Red, Green, and Blue) are disconnected by their own branch and are all concatenated at the end for the classifier.
Similar architecture to this
The base network:
class AlexNet(nn.Module):
def __init__(self, num_classes: int = 1000, dropout: float = 0.5) -> None:
super().__init__()
_log_api_usage_once(self)
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
self.classifier = nn.Sequential(
nn.Dropout(p=dropout),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(p=dropout),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
Training
def train_epoch(self, epoch, total):
self.model.train()
for batch_idx, (features, targets) in enumerate(self.train_loader):
features = features.to(self.device)
targets = targets.to(self.device)
logits = self.model(features)
loss = self.loss_func(logits, targets)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
I would like to have each channel be belong to its own feature extraction, but combine to classify.
red = features[:,0:1,:,:]
green = features[:,1:2,:,:]
blue = features[:,2:3,:,:]
logits = self.model([r,g,b])
I have seen people use groups but I am not sure how to implement it fully.
Any help is greatly appreciated
Since each branch/head would take an image with one channel you could start by just replacing the 3 in the first CNN layer with 1:
nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2),
Now you can send the three single-channeled images through the self.features layers and concat them before passing them to the self.classifier layers:
import torch
import torch.nn as nn
class AlexNet(nn.Module):
def __init__(self, num_classes: int=1000, dropout: float=0.5) -> None:
super().__init__()
self.features = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.avgpool = nn.AdaptiveAvgPool2d((3, 3))
self.classifier = nn.Sequential(
nn.Dropout(p=dropout),
nn.Linear(6912, 4096),
nn.ReLU(inplace=True),
nn.Dropout(p=dropout),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
def forward(self, x_r: torch.Tensor, x_g: torch.Tensor, x_b: torch.Tensor) -> torch.Tensor:
x_r = self.features(x_r)
x_r = torch.flatten(self.avgpool(x_r), 1)
x_g = self.features(x_g)
x_g = torch.flatten(self.avgpool(x_g), 1)
x_b = self.features(x_b)
x_b = torch.flatten(self.avgpool(x_b), 1)
x = torch.concat((x_r, x_g, x_b), -1)
x = self.classifier(x)
return x
model = AlexNet()
img = torch.rand(1, 3, 256, 256)
img_r = torch.rand(1, 1, 256, 256)
img_g = torch.rand(1, 1, 256, 256)
img_b = torch.rand(1, 1, 256, 256)
output = model(img_r, img_g, img_b)
Note that I changed self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) to self.avgpool = nn.AdaptiveAvgPool2d((3, 3)) because the output size of the flattened branches was really big (9216). Now it is 2304 and by concatinating them you get a tensor of size 6912. Hope this helps :)
Related
I was trying to implement an autoencoder with periodic activation function - siren Link. But I have found examples using only linear layer. In the paper they mentioned a custom Hypernetwork which uses CNN but i didn't understood how to use for my autoencoder.
Is it possible to implement siren for CNN
how to implement for my autoencoder code mentioned in the below. simply adding sin() as activation function doesn't work because the author mentioned a custom initialization and some other tricks to make the periodic activation work.
I am attaching the paper here - Link
class Autoencoder(nn.Module):
def __init__(self):
super(Autoencoder, self).__init__()
# encoder layers
self.enc1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
self.enc2 = nn.Conv2d(64, 32, kernel_size=3, padding=1)
self.enc3 = nn.Conv2d(32, 16, kernel_size=3, padding=1)
self.enc4 = nn.Conv2d(16, 8, kernel_size=3, padding=1)
self.pool = nn.MaxPool2d(2, 2)
# decoder layers
self.dec1 = nn.ConvTranspose2d(8, 8, kernel_size=3, stride=2)
self.dec2 = nn.ConvTranspose2d(8, 16, kernel_size=3, stride=2)
self.dec3 = nn.ConvTranspose2d(16, 32, kernel_size=2, stride=2)
self.dec4 = nn.ConvTranspose2d(32, 64, kernel_size=2, stride=2)
self.out = nn.Conv2d(64, 1, kernel_size=3, padding=1)
def forward(self, x):
# encode
x = F.relu(self.enc1(x))
x = self.pool(x)
x = F.relu(self.enc2(x))
x = self.pool(x)
x = F.relu(self.enc3(x))
x = self.pool(x)
x = F.relu(self.enc4(x))
x = self.pool(x) # the latent space representation
# decode
x = F.relu(self.dec1(x))
x = F.relu(self.dec2(x))
x = F.relu(self.dec3(x))
x = F.relu(self.dec4(x))
x = F.sigmoid(self.out(x))
return x
net = Autoencoder()
print(net)
I am a freshman in neural network and I have built a vgg16 network.But in every batch all the inputs leads to the same outputs.So I checked the output of every layer and finally found that x=x.view(batch_size,-1) gives the same outputs!I have no idea why this would happen. here are part of my code:
class VGG16(torch.nn.Module):
def __init__(self):
super(VGG16, self).__init__()
self.conv1 = torch.nn.Conv2d(3, 64, padding=1, kernel_size=3) #kernel
self.conv2 = torch.nn.Conv2d(64, 64, padding=1, kernel_size=3)
self.conv3 = torch.nn.Conv2d(64, 128, padding=1, kernel_size=3)
self.conv4 = torch.nn.Conv2d(128, 128, padding=1, kernel_size=3)
self.conv5 = torch.nn.Conv2d(128, 256, padding=1, kernel_size=3)
self.conv6 = torch.nn.Conv2d(256, 256, padding=1, kernel_size=3)
self.conv7 = torch.nn.Conv2d(256, 256, padding=1, kernel_size=3)
self.conv8 = torch.nn.Conv2d(256, 512, padding=1 ,kernel_size=3)
self.conv9 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv10 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv11 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv12 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.conv13 = torch.nn.Conv2d(512, 512, padding=1, kernel_size=3)
self.pooling = torch.nn.MaxPool2d(2) #pool
self.fc1 = torch.nn.Linear(25088, 4096) # 7 * 7 * 512 = 25088
self.fc2 = torch.nn.Linear(4096, 4096)
self.fc3 = torch.nn.Linear(4096, 2)
def forward(self,x):
batch_size = x.size(0)
x = F.relu(self.conv1(x)) #layer1
x = self.pooling(F.relu(self.conv2(x))) #layer2
x = F.relu(self.conv3(x)) #layer3
x = self.pooling(F.relu(self.conv4(x))) #layer4
x = F.relu(self.conv5(x)) #layer5
x = F.relu(self.conv6(x)) #layer6
x = self.pooling(F.relu(self.conv7(x))) #layer7
x = F.relu(self.conv8(x)) #layer8
x = F.relu(self.conv9(x)) #layer9
x = self.pooling(F.relu(self.conv10(x))) #layer10
x = F.relu(self.conv11(x)) #layer11
x = F.relu(self.conv12(x)) #layer12
x = self.pooling(F.relu(self.conv13(x))) #layer13
x = x.view(batch_size,-1) #flatten
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
this is the training part:
def train(epoch):
running_loss = 0.0
for batch_idx, data in enumerate(train_loader,0):
inputs, true_labels = data
optimizer.zero_grad() #clear the optimizer to avoid accumulating of grad
#forward
outputs = model(inputs)
loss = criterion(outputs, true_labels)
#backward
loss.backward()
#update
optimizer.step()
running_loss += loss.item()
#output the train result every 10 loop
if (batch_idx + 1) % 10 == 0:
print('[%d %5d] loss: %.3f' %(epoch + 1, batch_idx + 1, running_loss/10 ))
running_loss = 0.0
this is the outputs of layer13(before view):enter image description here
this is the outputs of x.view :enter image description here
I am searching for a long time on net.But no use.Any ideas?
Thanks in advance.
Use of view() method
import torch
torch.tensor([[1,2,3],[4,5,6]]).view(3,2)
#tensor([[1, 2],
[3, 4],
[5, 6]])
Hence no change in tensor value..it will just change its shape
I am trying to use Alexnet over the CIFAR-10 dataset. I have resized my image to 224x224 which I'm guessing is the issue. Nevertheless, I get the following error:
<ipython-input-11-34884668038d> in forward(self, x)
37 def forward(self, x):
38 x = self.features(x)
---> 39 x = x.view(x.size(0), 256 * 2 * 2)
40 x = self.classifier(x)
41 return x
RuntimeError: shape '[1, 1024]' is invalid for input of size 50176
My Alexnet model code is as follows:
NUM_CLASSES = 10
class AlexNet(nn.Module):
def __init__(self, num_classes=NUM_CLASSES):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(64, 192, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2),
)
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 2 * 2, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), 256 * 2 * 2)
x = self.classifier(x)
return x
Any help would be appreciated :)
CIFAR nets expects input data much smaller than 224x224, usually 32x32.
I've a UNET style autoencoder below, with a filter I wrote in Pytorch at the end. The network seems to be converging faster than it should and I don't know why. I have a dataset of 4000 images and I'm taking a 128x128 crop every time. I'm employing a training rate schedule and weight decay. I've tried fiddling with my parameters with a tiny dataset to see improvements but nothing seems to work. Once the learning rate goes down, the loss just bounces around and doesn't hit a floor, and in some cases goes back up. My network is as follows:
import torch
import torch.nn as nn
from wiener_3d import wiener_3d
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import random
def np_to_pil(np_imgs):
img_num = np_imgs.shape[0]
channel_num = np_imgs.shape[1]
ar = np.clip(np_imgs*255, 0, 255).astype(np.uint8)
pil_imgs = []
for i in range(img_num):
if channel_num == 1:
img = ar[i][0]
else:
img = ar[i].transpose(1, 2, 0)
pil_imgs.append(Image.fromarray(img))
return pil_imgs
class WienerFilter(nn.Module):
def __init__(self, param_b=16):
super(WienerFilter, self).__init__()
# self.register_parameter("param_a", nn.Parameter(torch.tensor(param_a)))
# self.param_a = nn.Parameter(torch.tensor(param_a))
# self.param_a.requires_grad = True
self.param_b = param_b
def forward(self, input, std):
tensors = input.shape[0]
for i in range(tensors):
tensor = input[i]
tensor = torch.squeeze(tensor)
# tensor = wiener_3d(tensor, self.param_a, self.param_b
tensor = wiener_3d(tensor, 2*std, self.param_b)
tensor = torch.unsqueeze(tensor, 0)
input[i] = tensor
return input
class AutoEncoder(nn.Module):
"""Autoencoder simple implementation """
def __init__(self):
super(AutoEncoder, self).__init__()
# Encoder
# conv layer
self.block1 = nn.Sequential(
nn.Conv2d(1, 96, 3, padding=1),
nn.BatchNorm2d(96),
nn.LeakyReLU(0.1),
nn.Conv2d(96, 96, 3, padding=1),
nn.MaxPool2d(2),
nn.BatchNorm2d(96),
nn.LeakyReLU(0.1)
)
self.block2 = nn.Sequential(
nn.Conv2d(96, 96, 3, padding=1),
nn.MaxPool2d(2),
nn.BatchNorm2d(96),
nn.LeakyReLU(0.1)
)
self.block3 = nn.Sequential(
nn.Conv2d(96, 96, 3, padding=1),
nn.BatchNorm2d(96),
nn.LeakyReLU(0.1),
nn.ConvTranspose2d(96, 96, 2, 2),
nn.BatchNorm2d(96),
nn.LeakyReLU(0.1)
)
self.block4 = nn.Sequential(
nn.Conv2d(192, 192, 3, padding=1),
nn.BatchNorm2d(192),
nn.LeakyReLU(0.1),
nn.Conv2d(192, 192, 3, padding=1),
nn.BatchNorm2d(192),
nn.LeakyReLU(0.1),
nn.ConvTranspose2d(192, 192, 2, 2),
nn.BatchNorm2d(192),
nn.LeakyReLU(0.1)
)
self.block5 = nn.Sequential(
nn.Conv2d(288, 192, 3, padding=1),
nn.BatchNorm2d(192),
nn.LeakyReLU(0.1),
nn.Conv2d(192, 192, 3, padding=1),
nn.BatchNorm2d(192),
nn.LeakyReLU(0.1),
nn.ConvTranspose2d(192, 192, 2, 2),
nn.BatchNorm2d(192),
nn.LeakyReLU(0.1)
)
self.block6 = nn.Sequential(
nn.Conv2d(193, 96, 3, padding=1),
nn.BatchNorm2d(96),
nn.LeakyReLU(0.1),
nn.Conv2d(96, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.1),
nn.Conv2d(64, 32, 3, padding=1),
nn.LeakyReLU(0.1),
nn.Conv2d(32, 1, 3, padding=1),
nn.LeakyReLU(0.1)
)
self.wiener_filter = WienerFilter()
def forward(self, x, std):
# torch.autograd.set_detect_anomaly(True)
# print("input: ", x.shape)
pool1 = self.block1(x)
# print("pool1: ", pool1.shape)
pool2 = self.block2(pool1)
# print("pool2: ", pool2.shape)
pool3 = self.block2(pool2)
# print("pool3: ", pool3.shape)
pool4 = self.block2(pool3)
# print("pool4: ", pool4.shape)
pool5 = self.block2(pool4)
# print("pool5: ", pool5.shape)
upsample5 = self.block3(pool5)
# print("upsample5: ", upsample5.shape)
concat5 = torch.cat((upsample5, pool4), 1)
# print("concat5: ", concat5.shape)
upsample4 = self.block4(concat5)
# print("upsample4: ", upsample4.shape)
concat4 = torch.cat((upsample4, pool3), 1)
# print("concat4: ", concat4.shape)
upsample3 = self.block5(concat4)
# print("upsample3: ", upsample3.shape)
concat3 = torch.cat((upsample3, pool2), 1)
# print("concat3: ", concat3.shape)
upsample2 = self.block5(concat3)
# print("upsample2: ", upsample2.shape)
concat2 = torch.cat((upsample2, pool1), 1)
# print("concat2: ", concat2.shape)
upsample1 = self.block5(concat2)
# print("upsample1: ", upsample1.shape)
concat1 = torch.cat((upsample1, x), 1)
# print("concat1: ", concat1.shape)
output = self.block6(concat1)
path = "test"
t_map = x - output
filtering = self.wiener_filter(t_map, std)
filtered_output = output + filtering
return filtered_output
My current parameters are:
Adam optimizer,
learning rate decay by 0.1 if no improvement for 7 epochs,
intial learning rate 0.001,
0.0001 weight decay,
No batches.
I feel like I've tried everything at this stage. Could someone give me some advice on how to improve my network? Thank you.
I want to create a custom model of ResNet101 by retrieving one of its layer called 'avg_pool' and convert it to my custom layer. I have done this similar thing another pre-trained Imagnet model named resnet50, but getting an error in Resnet101. I am a newbie in transfer learning, please point me what is my mistake
def resnet101_model(weights_path=None):
eps = 1.1e-5
# Handle Dimension Ordering for different backends
global bn_axis
if K.image_dim_ordering() == 'tf':
bn_axis = 3
img_input = Input(shape=(224, 224, 3), name='data')
else:
bn_axis = 1
img_input = Input(shape=(3, 224, 224), name='data')
x = ZeroPadding2D((3, 3), name='conv1_zeropadding')(img_input)
x = Convolution2D(64, 7, 7, subsample=(2, 2), name='conv1', bias=False)(x)
x = BatchNormalization(epsilon=eps, axis=bn_axis, name='bn_conv1')(x)
x = Scale(axis=bn_axis, name='scale_conv1')(x)
x = Activation('relu', name='conv1_relu')(x)
x = MaxPooling2D((3, 3), strides=(2, 2), name='pool1')(x)
x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
for i in range(1,3):
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b'+str(i))
x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
for i in range(1,23):
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b'+str(i))
x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
x_fc = AveragePooling2D((7, 7), name='avg_pool')(x)
x_fc = Flatten()(x_fc)
x_fc = Dense(1000, activation='softmax', name='fc1000')(x_fc)
model = Model(img_input, x_fc)
# load weights
if weights_path:
model.load_weights(weights_path, by_name=True)
return model
im = cv2.resize(cv2.imread('human.jpg'), (224, 224)).astype(np.float32)
# Remove train image mean
im[:,:,0] -= 103.939
im[:,:,1] -= 116.779
im[:,:,2] -= 123.68
# Transpose image dimensions (Theano uses the channels as the 1st dimension)
if K.image_dim_ordering() == 'th':
im = im.transpose((2,0,1))
weights_path = 'resnet101_weights_th.h5'
else:
weights_path = 'resnet101_weights_tf.h5'
im = np.expand_dims(im, axis=0)
image_input = Input(shape=(224, 224, 3))
model = resnet101_model(weights_path)
model.summary()
last_layer = model.get_layer('avg_pool').output
x = Flatten(name='flatten')(last_layer)
out = Dense(num_classes, activation='softmax', name='fc1000')(x)
custom_resnet_model = Model(inputs=image_input,outputs= out)
custom_resnet_model.summary()
Graph disconnected happens, when your inputs and outputs are not connected. In your case image_input is not connected to out. You should pass it through Resnet model and then it should work