Related
I am attempting to create a near identical model architecture to AlexNet, except each channel (Red, Green, and Blue) are disconnected by their own branch and are all concatenated at the end for the classifier.
Similar architecture to this
The base network:
class AlexNet(nn.Module):
def __init__(self, num_classes: int = 1000, dropout: float = 0.5) -> None:
super().__init__()
_log_api_usage_once(self)
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
self.classifier = nn.Sequential(
nn.Dropout(p=dropout),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(p=dropout),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
Training
def train_epoch(self, epoch, total):
self.model.train()
for batch_idx, (features, targets) in enumerate(self.train_loader):
features = features.to(self.device)
targets = targets.to(self.device)
logits = self.model(features)
loss = self.loss_func(logits, targets)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
I would like to have each channel be belong to its own feature extraction, but combine to classify.
red = features[:,0:1,:,:]
green = features[:,1:2,:,:]
blue = features[:,2:3,:,:]
logits = self.model([r,g,b])
I have seen people use groups but I am not sure how to implement it fully.
Any help is greatly appreciated
Since each branch/head would take an image with one channel you could start by just replacing the 3 in the first CNN layer with 1:
nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2),
Now you can send the three single-channeled images through the self.features layers and concat them before passing them to the self.classifier layers:
import torch
import torch.nn as nn
class AlexNet(nn.Module):
def __init__(self, num_classes: int=1000, dropout: float=0.5) -> None:
super().__init__()
self.features = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.avgpool = nn.AdaptiveAvgPool2d((3, 3))
self.classifier = nn.Sequential(
nn.Dropout(p=dropout),
nn.Linear(6912, 4096),
nn.ReLU(inplace=True),
nn.Dropout(p=dropout),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
def forward(self, x_r: torch.Tensor, x_g: torch.Tensor, x_b: torch.Tensor) -> torch.Tensor:
x_r = self.features(x_r)
x_r = torch.flatten(self.avgpool(x_r), 1)
x_g = self.features(x_g)
x_g = torch.flatten(self.avgpool(x_g), 1)
x_b = self.features(x_b)
x_b = torch.flatten(self.avgpool(x_b), 1)
x = torch.concat((x_r, x_g, x_b), -1)
x = self.classifier(x)
return x
model = AlexNet()
img = torch.rand(1, 3, 256, 256)
img_r = torch.rand(1, 1, 256, 256)
img_g = torch.rand(1, 1, 256, 256)
img_b = torch.rand(1, 1, 256, 256)
output = model(img_r, img_g, img_b)
Note that I changed self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) to self.avgpool = nn.AdaptiveAvgPool2d((3, 3)) because the output size of the flattened branches was really big (9216). Now it is 2304 and by concatinating them you get a tensor of size 6912. Hope this helps :)
My data has the following shapes:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)
(942, 32, 32, 1) (236, 32, 32, 1) (942, 3, 3) (236, 3, 3)
And whenever I try to run my CNN I get the following error:
from tensorflow.keras import layers
from tensorflow.keras import Model
img_input = layers.Input(shape=(32, 32, 1))
x = layers.Conv2D(16, (3,3), activation='relu', strides = 1, padding = 'same')(img_input)
x = layers.Conv2D(32, (3,3), activation='relu', strides = 2)(x)
x = layers.Conv2D(128, (3,3), activation='relu', strides = 2)(x)
x = layers.MaxPool2D(pool_size=2)(x)
x = layers.Conv2D(3, 3, activation='linear', strides = 2)(x)
output = layers.Flatten()(x)
model = Model(img_input, output)
model.summary()
model.compile(loss='mean_squared_error',optimizer= 'adam', metrics=['mse'])
history = model.fit(X_train,Y_train,validation_data=(X_test, Y_test), epochs = 100,verbose=1)
Error:
InvalidArgumentError: Incompatible shapes: [32,3] vs. [32,3,3]
[[node BroadcastGradientArgs_2 (defined at /usr/local/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1751) ]] [Op:__inference_distributed_function_7567]
Function call stack:
distributed_function
What am I missing here?
you don't handle the dimensionality inside your network properly. Firstly expand the dimension of your y in order to get them in this format (n_sample, 3, 3, 1). At this point adjust the network (I remove flatten and max pooling and adjust the last conv output)
# create dummy data
n_sample = 10
X = np.random.uniform(0,1, (n_sample, 32, 32, 1))
y = np.random.uniform(0,1, (n_sample, 3, 3))
# expand y dim
y = y[...,np.newaxis]
print(X.shape, y.shape)
img_input = Input(shape=(32, 32, 1))
x = Conv2D(16, (3,3), activation='relu', strides = 1, padding = 'same')(img_input)
x = Conv2D(32, (3,3), activation='relu', strides = 2)(x)
x = Conv2D(128, (3,3), activation='relu', strides = 2)(x)
x = Conv2D(1, (3,3), activation='linear', strides = 2)(x)
model = Model(img_input, x)
model.summary()
model.compile(loss='mean_squared_error',optimizer= 'adam', metrics=['mse'])
model.fit(X,y, epochs=3)
So I’m building a denoiser with an autoencoder. The idea is that before computing my loss (after the autoencoder), I apply an empirical wiener filter to a texture map of the image and add it back to my autoencoder output (adding back ‘lost detail’). I’ve coded this filter with PyTorch.
My first attempt worked by adding the filter to the end of my autoencoder’s forward function. I can train this network and it backpropagates through my filter in training. However, if I print my network, the filter is not listed, and torchsummary doesn’t include it when calculating parameters.
This has me thinking that I am only training the autoencoder and my filter is filtering the same way every time and not learning.
Is what I’m trying to do possible?
Below is my Autoencoder:
class AutoEncoder(nn.Module):
"""Autoencoder simple implementation """
def __init__(self):
super(AutoEncoder, self).__init__()
# Encoder
# conv layer
self.block1 = nn.Sequential(
nn.Conv2d(1, 48, 3, padding=1),
nn.Conv2d(48, 48, 3, padding=1),
nn.MaxPool2d(2),
nn.BatchNorm2d(48),
nn.LeakyReLU(0.1)
)
self.block2 = nn.Sequential(
nn.Conv2d(48, 48, 3, padding=1),
nn.MaxPool2d(2),
nn.BatchNorm2d(48),
nn.LeakyReLU(0.1)
)
self.block3 = nn.Sequential(
nn.Conv2d(48, 48, 3, padding=1),
nn.ConvTranspose2d(48, 48, 2, 2, output_padding=1),
nn.BatchNorm2d(48),
nn.LeakyReLU(0.1)
)
self.block4 = nn.Sequential(
nn.Conv2d(96, 96, 3, padding=1),
nn.Conv2d(96, 96, 3, padding=1),
nn.ConvTranspose2d(96, 96, 2, 2),
nn.BatchNorm2d(96),
nn.LeakyReLU(0.1)
)
self.block5 = nn.Sequential(
nn.Conv2d(144, 96, 3, padding=1),
nn.Conv2d(96, 96, 3, padding=1),
nn.ConvTranspose2d(96, 96, 2, 2),
nn.BatchNorm2d(96),
nn.LeakyReLU(0.1)
)
self.block6 = nn.Sequential(
nn.Conv2d(97, 64, 3, padding=1),
nn.BatchNorm2d(64),
nn.Conv2d(64, 32, 3, padding=1),
nn.BatchNorm2d(32),
nn.Conv2d(32, 1, 3, padding=1),
nn.LeakyReLU(0.1)
)
# self.blockNorm = nn.Sequential(
# nn.BatchNorm2d(1),
# nn.LeakyReLU(0.1)
# )
def forward(self, x):
# torch.autograd.set_detect_anomaly(True)
# print("input: ", x.shape)
pool1 = self.block1(x)
# print("pool1: ", pool1.shape)
pool2 = self.block2(pool1)
# print("pool2: ", pool2.shape)
pool3 = self.block2(pool2)
# print("pool3: ", pool3.shape)
pool4 = self.block2(pool3)
# print("pool4: ", pool4.shape)
pool5 = self.block2(pool4)
# print("pool5: ", pool5.shape)
upsample5 = self.block3(pool5)
# print("upsample5: ", upsample5.shape)
concat5 = torch.cat((upsample5, pool4), 1)
# print("concat5: ", concat5.shape)
upsample4 = self.block4(concat5)
# print("upsample4: ", upsample4.shape)
concat4 = torch.cat((upsample4, pool3), 1)
# print("concat4: ", concat4.shape)
upsample3 = self.block5(concat4)
# print("upsample3: ", upsample3.shape)
concat3 = torch.cat((upsample3, pool2), 1)
# print("concat3: ", concat3.shape)
upsample2 = self.block5(concat3)
# print("upsample2: ", upsample2.shape)
concat2 = torch.cat((upsample2, pool1), 1)
# print("concat2: ", concat2.shape)
upsample1 = self.block5(concat2)
# print("upsample1: ", upsample1.shape)
concat1 = torch.cat((upsample1, x), 1)
# print("concat1: ", concat1.shape)
output = self.block6(concat1)
t_map = x - output
for i in range(4):
tensor = t_map[i, :, :, :] # Take each item in batch separately. Could account for this in Wiener instead
tensor = torch.squeeze(tensor) # Squeeze for Wiener input format
tensor = wiener_3d(tensor, 0.05, 10) # Apply Wiener with specified std and block size
tensor = torch.unsqueeze(tensor, 0) # unsqueeze to put back into block
t_map[i, :, :, :] = tensor # put back into block
filtered_output = output + t_map
return filtered_output
The for loop at the end is to apply the filter to each image in the batch. I get that this isn’t parallelisable so if anyone has ideas for this, I’d appreciate it. I can post the ‘wiener 3d()’ filter function if that helps, just want to keep the post short.
I’ve tried to define a custom layer class with the filter inside it but I got lost very quickly.
Any help would be greatly appreciated!
If all you want is to turn your Wiener filter into a module, the following would do:
class WienerFilter(T.nn.Module):
def __init__(self, param_a=0.05, param_b=10):
super(WienerFilter, self).__init__()
# This can be accessed like any other member via self.param_a
self.register_parameter("param_a", T.nn.Parameter(T.tensor(param_a)))
self.param_b = param_b
def forward(self, input):
for i in range(4):
tensor = input[i]
tensor = torch.squeeze(tensor)
tensor = wiener_3d(tensor, self.param_a, self.param_b)
tensor = torch.unsqueeze(tensor, 0)
input[i] = tensor
return input
You can apply this by adding a line
self.wiener_filter = WienerFilter()
in the init function of your AutoEncoder.
in the forward then you all it by replacing the for loop with
filtered_output = output + self.wiener_filter(t_map)
Torch knows that the wiener_filter module is a member module so it will list the module if you print your AutoEncoder's modules.
If you want to parallelize your wiener filter, you need to do that in PyTorch's terms, meaning using its operations on tensors. Those operations are implemented in a parallel fashion.
I'm using an ImageDataGenerator to input batches of images to a neural network, but can't work out the correct way to feed it. Running the following:
train_datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)
training_set = train_datagen.flow_from_directory('/home/Training', target_size=(256,256), batch_size=32, class_mode='binary', color_mode = 'grayscale')
test_set = test_datagen.flow_from_directory('/home/Test', target_size=(256,256), batch_size=32, class_mode='binary',color_mode = 'grayscale' )
input_size = (256, 256, 1)
inputs = Input(input_size)
conv1 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(inputs)
conv2 = Conv2D(2, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv1)
conv3 = Conv2D(1, 1, activation = 'sigmoid')(conv2)
model1 = Model(inputs = inputs, outputs = conv3)
model1.compile(optimizer = Adam(lr = 1e-4), loss = 'binary_crossentropy', metrics = ['accuracy'])
model1.fit_generator(training_set, steps_per_epoch=160, epochs=10, validation_data=test_set, validation_steps=800)
Results in:
Error when checking target: expected conv2d_198 to have 4 dimensions,
but got array with shape (14, 1)
It seems to use the batches as the input tensor, since removing all layers but the input layer results in a similar error. How can I correctly input them into the network?
Basically Keras is expecting your to pass your input dimensions and rows. Looks like you are passing an array with two dimensions. Can you make sure you are passing something like (-1, dimension 1, dimension 2, channels) ? you may need to use reshape. The -1 should tell Keras to infer the rows/observations. Im pretty new to Keras so I am sure someone else will have a better answer but you might be able to just do.. myinputarray.reshape()
I got the error: ValueError: Input 0 is incompatible with layer lstm_1: expected ndim=3, found ndim=2 with the following code:
def make_model():
model = Sequential()
model.add(Conv2D(20,(5,5), input_shape = (24,48,30), activation = "relu", strides = 1, padding = "valid"))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(50, (5,5), use_bias = 50))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(20, activation = "relu"))
model.add(LSTM(50, activation="relu", return_sequences=True))
return model
My input is 30 matrices of size 24*48 each.
The problem lies in the fact that after the last Dense layer (before the lstm layer), the output shape is (?, 20) and the lstm layer expects 3D tensor, not 2D.So, you can expand the dimensions in order to add one more before feeding to lstm layer.
You can expand dimensions using tf.expand_dims (assuming you use tensorflow as backend) tf expand
input_layer = Input((30,24,48))
model = Conv2D(20,(5,5), input_shape = (30,24,48), activation = "relu", strides = 1, padding = "valid")(input_layer)
model = MaxPooling2D(pool_size=(2,2))(model)
model = Conv2D(50, (5,5), use_bias = 50)(model)
model = MaxPooling2D(pool_size=(2,2))(model)
model = Flatten()(model)
model = Dense(20, activation = "relu")(model)
model = tf.expand_dims(model, axis=-1)
model = LSTM(50, activation="relu", return_sequences=True)(model)
(I didn't use Sequential mode, I am using functional api since it is more flexible)
If you want to use sequential model:
model = Sequential()
model.add(Conv2D(20,(5,5), input_shape = (30, 24, 48), activation = "relu", strides = 1, padding = "valid"))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(50, (5,5), use_bias = 50))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(20, activation = "relu"))
model.add(Lambda(lambda x: tf.expand_dims(model.output, axis=-1)))
model.add(LSTM(50, activation="relu", return_sequences=True))
you must use expand dims inside Lambda