Building layer wise model for Encoding-Decoding Image - image-processing

I am writing an autoencoder model for an image encoding-decoding problem.
I want to understand the node distribution in each layer of the model suitable for images.
For the below code I am using 10 images of shape (21*28*3).
class Autoencoder(Chain):
def __init__(self, activation=F.relu):
super().__init__()
with self.init_scope():
# encoder part
self.l1 = L.Linear(1764,882)
self.l2 = L.Linear(882,441)
# decoder part
self.l3 = L.Linear(441,882)
self.l4 = L.Linear(882,1764)
self.activation = activation
def forward(self,x):
h = self.encode(x)
x_recon = self.decode(h)
return x_recon
def __call__(self,x):
x_recon = self.forward(x)
loss = F.mean_squared_error(h, x)
return loss
def encode(self, x):
h = F.dropout(self.activation(self.l1(x)))
return self.activation(self.l2(x))
def decode(self, h, train=True):
h = self.activation(self.l3(h))
return self.l4(x)
gpu_id = 0
n_epoch = 5
batch_size = 2
model = Autoencoder()
optimizer = optimizers.SGD(lr=0.05).setup(model)
train_iter = iterators.SerialIterator(xs,batch_size)
valid_iter = iterators.SerialIterator(xs,batch_size)
updater = training.StandardUpdater(train_iter,optimizer)
trainer = training.Trainer(updater,(n_epoch,"epoch"),out="result")
from chainer.training import extensions
trainer.extend(extensions.Evaluator(valid_iter, model, device=gpu_id))
While running trainer.run():
InvalidType:
Invalid operation is performed in: LinearFunction (Forward)
Expect: x.shape[1] == W.shape[1]
Actual: 1764 != 882
I want to understand how node distribution works layer wise in a model. Please suggest any resource. Also how to assign nodes in layers in case of small number of training images.

Related

How to set the initial weights to Xavier?

I would like to set the initial weights in Pytorch to Xavier.
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
# self.linear1 = nn.Linear(1, 516*4*4)
self.linear1 = nn.Linear(2, 512*8)
self.linear2 = nn.Linear(256*16, 256*8)
self.linear3 = nn.Linear(256*8, 1)
def forward(self, x):
x = self.linear1(x)
x = torch.sigmoid(x)
x = self.linear2(x)
x = torch.sigmoid(x)
x = self.linear3(x)
You can loop over each layer using the children generator and initialize the layer weights using the built-in xavier_uniform_ initializer provided by nn.init module.
For example, you could put this in your __init__ definition:
for m in self.children():
if isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight)
m.bias.data.fill_(0.01)

Could not get batch from DataLoader

I'm trying to implement a model using lstm to generate sonnects in pytorch. When I tested the dataLoader, It took a lot of time and could not return data as expected. I review it and don't know where error locates. Please help me, here is the code(more detail in(https://paste.ubuntu.com/p/vspS3msNVW/))
(10th cell)
class SonnetDataset(Dataset):
def __init__(self, sonnet_in_ids: list, vocab: list, max_seq_length: int):
super().__init__()
self.data = sonnet_in_ids
self.vocab = vocab
self.vocab_size = len(vocab)
self.pad_id = self.vocab.index('<PAD>')
self.start_id = self.vocab.index('<START>')
self.end_id = self.vocab.index('<END>')
self.max_seq_length = max_seq_length + 2
print('init successfully')
def __len__(self):
return len(self.data)
def __getitem__(self, index) -> torch.LongTensor:
print('get item')
x = self.data[index]
x = [self.start_id] + x + [self.end_id]
# padding
x += [self.pad_id] * (self.max_seq_length - len(x))
x = torch.LongTensor(x)
print(x)
return x
batch_size = 4
train_set = SonnetDataset(sonnet_in_ids=sonnets_in_ids, vocab=vocab, max_seq_length=max_length)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True)
next(iter(train_loader))
Could not get data

Pytorch custom model automatically stored in cuda

I built a custom NN model like so:
class MyNNet(torch.nn.Module):
def __init__(self, inp_dim, n_classes):
super(MyNNet, self).__init__()
self.flat = torch.nn.Flatten()
self.l1 = torch.nn.Linear(inp_dim * inp_dim, 32)
self.l2 = torch.nn.Linear(32, 16)
self.l3 = torch.nn.Linear(16, n_classes)
def forward(self, X):
out = self.flat(X)
out = F.relu(self.l1(out))
out = F.relu(self.l2(out))
return self.l3(out)
And a simple training script that updates the model parameters:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MyNNet(28, 10)
model.to(device)
optimizer = torch.optim.Adam(model.parameters())
loss = torch.nn.CrossEntropyLoss()
epochs = 20
for e in range(epochs):
train_l = 0.
for i, (s, c) in enumerate(train_loader):
s.to(device)
c.to(device)
y_hat = model(s)
l = loss(y_hat, c)
train_l += l
l.backward()
optimizer.step()
optimizer.zero_grad()
print(f'Epoch: {e}, AvgLoss: {train_l / len(train_loader)}')
As in the script I store the model to cuda and so I do with each batch of the dataset (MNIST). However the folllowing error appears: Expected all tensors to be on the same device, but found at least two devices
but when I comment model.to(device), then the script works. Does this mean PyTorch stores the custom models automatically into cuda?
Thanks.
Unlike Modules (where .to(...) works in-place), when moving Tensors to a device, you need to reassign them:
s = s.to(device)
c = c.to(device)

How to use Pytorch to create a custom EfficientNet with the last layer written correctly

I have a classification problem to predict 8 classes for example, I am using EfficientNetB3 in pytorch from here. However, I got confused on whether my custom class is correctly written. I think I want to strip the last layer of the pre-trained model to suit the 8 outputs right? Did I do it correctly? Because when I print y_preds = model(images) in my DataLoader, it seems to give me 1536 predictions. Is this an expected behavior?
!pip install geffnet
import geffnet
class EfficientNet(nn.Module):
def __init__(self, config):
super().__init__()
self.config = config
self.model = geffnet.create_model(config.effnet, pretrained=True)
n_features = self.model.classifier.in_features
# does the name fc matter?
self.fc = nn.Linear(n_features, config.num_classes)
self.model.classifier = nn.Identity()
def extract(self, x):
x = self.model(x)
return x
def forward(self, x):
x = self.extract(x).squeeze(-1).squeeze(-1)
return x
model = EfficientNet(config=config)
if torch.cuda.is_available():
model.cuda()
Sample code for printing y_pred:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for step, (images, labels) in enumerate(sample_loader):
images = images.to(device)
labels = labels.to(device)
batch_size = images.shape[0]
y_preds = model(images)
print('The predictions of the 4 images is as follows\n', y_preds)
break
You're not even using self.fc in forward pass.
Either just introduce it as:
def forward(self, x):
....
x = extract(x)...
x = fc(x)
return x
Or you can simply replace the layer named classifier (this way you don't need Identity layer):
self.model.classifier = nn.Linear(n_features, config.num_classes)
Also, here config.num_classes should be 8.

Chainer Autoencoder

I am trying to write a vanilla autoencoder for compressing 13 images. However I am getting the following error:
ValueError: train argument is not supported anymore. Use chainer.using_config
The shape of images is (21,28,3).
filelist = 'ex1.png', 'ex2.png',...11 other images
x = np.array([np.array(Image.open(fname)) for fname in filelist])
xs = x.astype('float32')/255.
class Autoencoder(Chain):
def __init__(self, activation=F.relu):
super().__init__()
with self.init_scope():
# encoder part
self.l1 = L.Linear(1764,800)
self.l2 = L.Linear(800,300)
# decoder part
self.l3 = L.Linear(300,800)
self.l4 = L.Linear(800,1764)
self.activation = activation
def forward(self,x):
h = self.encode(x)
x_recon = self.decode(h)
return x_recon
def __call__(self,x):
x_recon = self.forward(x)
loss = F.mean_squared_error(h, x)
return loss
def encode(self, x, train=True):
h = F.dropout(self.activation(self.l1(x)), train=train)
return self.activation(self.l2(x))
def decode(self, h, train=True):
h = self.activation(self.l3(h))
return self.l4(x)
n_epoch = 5
batch_size = 2
model = Autoencoder()
optimizer = optimizers.SGD(lr=0.05).setup(model)
train_iter = iterators.SerialIterator(xs,batch_size)
valid_iter = iterators.SerialIterator(xs,batch_size)
updater = training.StandardUpdater(train_iter,optimizer)
trainer = training.Trainer(updater,(n_epoch,"epoch"),out="result")
from chainer.training import extensions
trainer.extend(extensions.Evaluator(valid_iter, model, device=gpu_id))
trainer.run()
Is the issue because of the number of nodes in the model or otherwise?
You need to wirte "decoder" part.
When you take mean_squared_error loss, the shape of h and x must be same.
AutoEncoder will encode original x to small space (100-dim) h, but after that we need to reconstruct x' from this h by adding decoder part.
Then loss can be calculated on this reconstructed x'.
For example, as follows (sorry i have not test it to run)
For Chainer v2~
train argument is handled by global configs, so you do not need train argument in dropout function.
class Autoencoder(Chain):
def __init__(self, activation=F.relu):
super().__init__()
with self.init_scope():
# encoder part
self.l1 = L.Linear(1308608,500)
self.l2 = L.Linear(500,100)
# decoder part
self.l3 = L.Linear(100,500)
self.l4 = L.Linear(500,1308608)
self.activation = activation
def forward(self,x):
h = self.encode(x)
x_recon = self.decode(h)
return x_recon
def __call__(self,x):
x_recon = self.forward(x)
loss = F.mean_squared_error(h, x)
return loss
def encode(self, x):
h = F.dropout(self.activation(self.l1(x)))
return self.activation(self.l2(x))
def decode(self, h, train=True):
h = self.activation(self.l3(h))
return self.l4(x)
For Chainer v1
class Autoencoder(Chain):
def __init__(self, activation=F.relu):
super().__init__()
with self.init_scope():
# encoder part
self.l1 = L.Linear(1308608,500)
self.l2 = L.Linear(500,100)
# decoder part
self.l3 = L.Linear(100,500)
self.l4 = L.Linear(500,1308608)
self.activation = activation
def forward(self,x):
h = self.encode(x)
x_recon = self.decode(h)
return x_recon
def __call__(self,x):
x_recon = self.forward(x)
loss = F.mean_squared_error(h, x)
return loss
def encode(self, x, train=True):
h = F.dropout(self.activation(self.l1(x)), train=train)
return self.activation(self.l2(x))
def decode(self, h, train=True):
h = self.activation(self.l3(h))
return self.l4(x)
You can also refer official Variational Auto Encoder example for the next step:
https://github.com/chainer/chainer/tree/master/examples/vae

Resources