I would like to set the initial weights in Pytorch to Xavier.
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
# self.linear1 = nn.Linear(1, 516*4*4)
self.linear1 = nn.Linear(2, 512*8)
self.linear2 = nn.Linear(256*16, 256*8)
self.linear3 = nn.Linear(256*8, 1)
def forward(self, x):
x = self.linear1(x)
x = torch.sigmoid(x)
x = self.linear2(x)
x = torch.sigmoid(x)
x = self.linear3(x)
You can loop over each layer using the children generator and initialize the layer weights using the built-in xavier_uniform_ initializer provided by nn.init module.
For example, you could put this in your __init__ definition:
for m in self.children():
if isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight)
m.bias.data.fill_(0.01)
Related
I built a custom NN model like so:
class MyNNet(torch.nn.Module):
def __init__(self, inp_dim, n_classes):
super(MyNNet, self).__init__()
self.flat = torch.nn.Flatten()
self.l1 = torch.nn.Linear(inp_dim * inp_dim, 32)
self.l2 = torch.nn.Linear(32, 16)
self.l3 = torch.nn.Linear(16, n_classes)
def forward(self, X):
out = self.flat(X)
out = F.relu(self.l1(out))
out = F.relu(self.l2(out))
return self.l3(out)
And a simple training script that updates the model parameters:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MyNNet(28, 10)
model.to(device)
optimizer = torch.optim.Adam(model.parameters())
loss = torch.nn.CrossEntropyLoss()
epochs = 20
for e in range(epochs):
train_l = 0.
for i, (s, c) in enumerate(train_loader):
s.to(device)
c.to(device)
y_hat = model(s)
l = loss(y_hat, c)
train_l += l
l.backward()
optimizer.step()
optimizer.zero_grad()
print(f'Epoch: {e}, AvgLoss: {train_l / len(train_loader)}')
As in the script I store the model to cuda and so I do with each batch of the dataset (MNIST). However the folllowing error appears: Expected all tensors to be on the same device, but found at least two devices
but when I comment model.to(device), then the script works. Does this mean PyTorch stores the custom models automatically into cuda?
Thanks.
Unlike Modules (where .to(...) works in-place), when moving Tensors to a device, you need to reassign them:
s = s.to(device)
c = c.to(device)
I get an error saying that the input should be of type Tensor, not tuple. I do not know how to work around this problem, as I am already implementing the return_dict=False method as stated in the migration plan.
My model is as follows:
class XLNetClassifier(torch.nn.Module):
def __init__(self, dropout_rate=0.1):
super(XLNetClassifier, self).__init__()
self.XLNet = XLNetModel.from_pretrained('xlnet-base-cased', return_dict=False)
self.d1 = torch.nn.Dropout(dropout_rate)
self.l1 = torch.nn.Linear(768, 64)
self.bn1 = torch.nn.LayerNorm(64)
self.d2 = torch.nn.Dropout(dropout_rate)
self.l2 = torch.nn.Linear(64, 3)
def forward(self, input_ids, attention_mask):
x = self.XLNet(input_ids=input_ids, attention_masks = attention_mask)
x = self.d1(x)
x = self.l1(x)
x = self.bn1(x)
x = torch.nn.Tanh()(x)
x = self.d2(x)
x = self.l2(x)
return x
The error occurs when calling the dropout.
The XLNetModel returns two output values:
last_hidden_state
mems
That means you get a tuple and not a single tensor as the error message says. Your class definition should therefore be:
from transformers import XLNetModel, XLNetTokenizerFast
import torch
class XLNetClassifier(torch.nn.Module):
def __init__(self, dropout_rate=0.1):
super(XLNetClassifier, self).__init__()
self.XLNet = XLNetModel.from_pretrained('xlnet-base-cased', return_dict=False)
self.d1 = torch.nn.Dropout(dropout_rate)
self.l1 = torch.nn.Linear(768, 64)
self.bn1 = torch.nn.LayerNorm(64)
self.d2 = torch.nn.Dropout(dropout_rate)
self.l2 = torch.nn.Linear(64, 3)
def forward(self, input_ids, attention_mask):
x = self.XLNet(input_ids=input_ids, attention_masks = attention_mask)
x = self.d1(x[0])
x = self.l1(x)
x = self.bn1(x)
x = torch.nn.Tanh()(x)
x = self.d2(x)
x = self.l2(x)
return x
tokenizer = XLNetTokenizerFast.from_pretrained('xlnet-base-cased')
model = XLNetClassifier()
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt", return_token_type_ids=False)
outputs = model(**inputs)
or even better without return_dict=False
class XLNetClassifier(torch.nn.Module):
def __init__(self, dropout_rate=0.1):
super(XLNetClassifier, self).__init__()
self.XLNet = XLNetModel.from_pretrained('xlnet-base-cased')
self.d1 = torch.nn.Dropout(dropout_rate)
self.l1 = torch.nn.Linear(768, 64)
self.bn1 = torch.nn.LayerNorm(64)
self.d2 = torch.nn.Dropout(dropout_rate)
self.l2 = torch.nn.Linear(64, 3)
def forward(self, input_ids, attention_mask):
x = self.XLNet(input_ids=input_ids, attention_masks = attention_mask)
x = self.d1(x.last_hidden_state)
x = self.l1(x)
x = self.bn1(x)
x = torch.nn.Tanh()(x)
x = self.d2(x)
x = self.l2(x)
return x
I have a classification problem to predict 8 classes for example, I am using EfficientNetB3 in pytorch from here. However, I got confused on whether my custom class is correctly written. I think I want to strip the last layer of the pre-trained model to suit the 8 outputs right? Did I do it correctly? Because when I print y_preds = model(images) in my DataLoader, it seems to give me 1536 predictions. Is this an expected behavior?
!pip install geffnet
import geffnet
class EfficientNet(nn.Module):
def __init__(self, config):
super().__init__()
self.config = config
self.model = geffnet.create_model(config.effnet, pretrained=True)
n_features = self.model.classifier.in_features
# does the name fc matter?
self.fc = nn.Linear(n_features, config.num_classes)
self.model.classifier = nn.Identity()
def extract(self, x):
x = self.model(x)
return x
def forward(self, x):
x = self.extract(x).squeeze(-1).squeeze(-1)
return x
model = EfficientNet(config=config)
if torch.cuda.is_available():
model.cuda()
Sample code for printing y_pred:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for step, (images, labels) in enumerate(sample_loader):
images = images.to(device)
labels = labels.to(device)
batch_size = images.shape[0]
y_preds = model(images)
print('The predictions of the 4 images is as follows\n', y_preds)
break
You're not even using self.fc in forward pass.
Either just introduce it as:
def forward(self, x):
....
x = extract(x)...
x = fc(x)
return x
Or you can simply replace the layer named classifier (this way you don't need Identity layer):
self.model.classifier = nn.Linear(n_features, config.num_classes)
Also, here config.num_classes should be 8.
I am trying to write a vanilla autoencoder for compressing 13 images. However I am getting the following error:
ValueError: train argument is not supported anymore. Use chainer.using_config
The shape of images is (21,28,3).
filelist = 'ex1.png', 'ex2.png',...11 other images
x = np.array([np.array(Image.open(fname)) for fname in filelist])
xs = x.astype('float32')/255.
class Autoencoder(Chain):
def __init__(self, activation=F.relu):
super().__init__()
with self.init_scope():
# encoder part
self.l1 = L.Linear(1764,800)
self.l2 = L.Linear(800,300)
# decoder part
self.l3 = L.Linear(300,800)
self.l4 = L.Linear(800,1764)
self.activation = activation
def forward(self,x):
h = self.encode(x)
x_recon = self.decode(h)
return x_recon
def __call__(self,x):
x_recon = self.forward(x)
loss = F.mean_squared_error(h, x)
return loss
def encode(self, x, train=True):
h = F.dropout(self.activation(self.l1(x)), train=train)
return self.activation(self.l2(x))
def decode(self, h, train=True):
h = self.activation(self.l3(h))
return self.l4(x)
n_epoch = 5
batch_size = 2
model = Autoencoder()
optimizer = optimizers.SGD(lr=0.05).setup(model)
train_iter = iterators.SerialIterator(xs,batch_size)
valid_iter = iterators.SerialIterator(xs,batch_size)
updater = training.StandardUpdater(train_iter,optimizer)
trainer = training.Trainer(updater,(n_epoch,"epoch"),out="result")
from chainer.training import extensions
trainer.extend(extensions.Evaluator(valid_iter, model, device=gpu_id))
trainer.run()
Is the issue because of the number of nodes in the model or otherwise?
You need to wirte "decoder" part.
When you take mean_squared_error loss, the shape of h and x must be same.
AutoEncoder will encode original x to small space (100-dim) h, but after that we need to reconstruct x' from this h by adding decoder part.
Then loss can be calculated on this reconstructed x'.
For example, as follows (sorry i have not test it to run)
For Chainer v2~
train argument is handled by global configs, so you do not need train argument in dropout function.
class Autoencoder(Chain):
def __init__(self, activation=F.relu):
super().__init__()
with self.init_scope():
# encoder part
self.l1 = L.Linear(1308608,500)
self.l2 = L.Linear(500,100)
# decoder part
self.l3 = L.Linear(100,500)
self.l4 = L.Linear(500,1308608)
self.activation = activation
def forward(self,x):
h = self.encode(x)
x_recon = self.decode(h)
return x_recon
def __call__(self,x):
x_recon = self.forward(x)
loss = F.mean_squared_error(h, x)
return loss
def encode(self, x):
h = F.dropout(self.activation(self.l1(x)))
return self.activation(self.l2(x))
def decode(self, h, train=True):
h = self.activation(self.l3(h))
return self.l4(x)
For Chainer v1
class Autoencoder(Chain):
def __init__(self, activation=F.relu):
super().__init__()
with self.init_scope():
# encoder part
self.l1 = L.Linear(1308608,500)
self.l2 = L.Linear(500,100)
# decoder part
self.l3 = L.Linear(100,500)
self.l4 = L.Linear(500,1308608)
self.activation = activation
def forward(self,x):
h = self.encode(x)
x_recon = self.decode(h)
return x_recon
def __call__(self,x):
x_recon = self.forward(x)
loss = F.mean_squared_error(h, x)
return loss
def encode(self, x, train=True):
h = F.dropout(self.activation(self.l1(x)), train=train)
return self.activation(self.l2(x))
def decode(self, h, train=True):
h = self.activation(self.l3(h))
return self.l4(x)
You can also refer official Variational Auto Encoder example for the next step:
https://github.com/chainer/chainer/tree/master/examples/vae
I am writing an autoencoder model for an image encoding-decoding problem.
I want to understand the node distribution in each layer of the model suitable for images.
For the below code I am using 10 images of shape (21*28*3).
class Autoencoder(Chain):
def __init__(self, activation=F.relu):
super().__init__()
with self.init_scope():
# encoder part
self.l1 = L.Linear(1764,882)
self.l2 = L.Linear(882,441)
# decoder part
self.l3 = L.Linear(441,882)
self.l4 = L.Linear(882,1764)
self.activation = activation
def forward(self,x):
h = self.encode(x)
x_recon = self.decode(h)
return x_recon
def __call__(self,x):
x_recon = self.forward(x)
loss = F.mean_squared_error(h, x)
return loss
def encode(self, x):
h = F.dropout(self.activation(self.l1(x)))
return self.activation(self.l2(x))
def decode(self, h, train=True):
h = self.activation(self.l3(h))
return self.l4(x)
gpu_id = 0
n_epoch = 5
batch_size = 2
model = Autoencoder()
optimizer = optimizers.SGD(lr=0.05).setup(model)
train_iter = iterators.SerialIterator(xs,batch_size)
valid_iter = iterators.SerialIterator(xs,batch_size)
updater = training.StandardUpdater(train_iter,optimizer)
trainer = training.Trainer(updater,(n_epoch,"epoch"),out="result")
from chainer.training import extensions
trainer.extend(extensions.Evaluator(valid_iter, model, device=gpu_id))
While running trainer.run():
InvalidType:
Invalid operation is performed in: LinearFunction (Forward)
Expect: x.shape[1] == W.shape[1]
Actual: 1764 != 882
I want to understand how node distribution works layer wise in a model. Please suggest any resource. Also how to assign nodes in layers in case of small number of training images.