Custom weight initialisation causing error - pytorch - machine-learning

%reset -f
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
import torch.utils.data as data_utils
import torch.nn as nn
import torch.nn.functional as F
num_epochs = 20
x1 = np.array([0,0])
x2 = np.array([0,1])
x3 = np.array([1,0])
x4 = np.array([1,1])
num_epochs = 200
x = torch.tensor([x1,x2,x3,x4]).float()
y = torch.tensor([0,1,1,0]).long()
train = data_utils.TensorDataset(x,y)
train_loader = data_utils.DataLoader(train , batch_size=2 , shuffle=True)
device = 'cpu'
input_size = 2
hidden_size = 100
num_classes = 2
learning_rate = .0001
torch.manual_seed(24)
def weights_init(m):
m.weight.data.normal_(0.0, 1)
class NeuralNet(nn.Module) :
def __init__(self, input_size, hidden_size, num_classes) :
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size , hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size , num_classes)
def forward(self, x) :
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
model = NeuralNet(input_size, hidden_size, num_classes).to(device)
model.apply(weights_init)
criterionCE = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for i in range(0 , 1) :
total_step = len(train_loader)
for epoch in range(num_epochs) :
for i,(images , labels) in enumerate(train_loader) :
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
loss = criterionCE(outputs , labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
outputs = model(x)
print(outputs.data.max(1)[1])
I'm using to initialize the weights :
def weights_init(m):
m.weight.data.normal_(0.0, 1)
But following error is thrown :
~/anaconda3/envs/pytorch/lib/python3.7/site-packages/torch/nn/modules/module.py in __getattr__(self, name)
533 return modules[name]
534 raise AttributeError("'{}' object has no attribute '{}'".format(
--> 535 type(self).__name__, name))
536
537 def __setattr__(self, name, value):
AttributeError: 'ReLU' object has no attribute 'weight'
Is this the correct method to initialize the weights ?
Also, should object be of type nn.Module , not Relu ?

In addition to what Fabio mentioned about checking the layer type and ReLU being an activation and not a trainable layer, as it is about initialization, you can do the weight initialization in the __init__ method itself like its done here:
https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py
def __init__(self, features, num_classes=1000,...):
----snip---
self._initialize_weights()
def _initialize_weights(self):
if isinstance(m, nn.Linear):
m.weight.data.normal_(0.0, 1)

You are trying to set the weights of a weight-free layer (ReLU).
Inside weights_init, you should check the type of layers before initializing weights. For instance:
def weights_init(m):
if type(m) == nn.Linear:
m.weight.data.normal_(0.0, 1)
See How to initialize weights in PyTorch?.

Related

I am new to pytorch , why am I getting the attribute error even after adding super(ClassName,self).__init__()

I am working on a chatbot based out of pytorch
I am unable to figure out the reason behind the attribute error even after adding super to the NueralNet class.
Using jupyter notebook for this project.
HERE IS MY CODE SO FAR , parts from both model.ipynb and train.ipynb
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super().__init__()
self.l1 = nn.Linear(input_size, hidden_size)
self.l2 = nn.Linear(hidden_size, hidden_size)
self.l3 = nn.Linear(hidden_size, num_classes)
self.relu = nn.ReLU()
#Train
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from ipynb.fs.full.model import NeuralNet
class ChatDataset(Dataset):
def __init__(self):
self.n_samples = len(x_train)
self.x_data = x_train
self.y_data = y_train
#hyperParameters
batch_size = 8
hidden_size = 8
output_size = len(tags)
input_size = len(all_words)
# print(input_size, len(all_words))
# print(output_size, tags)
learning_rate = 0.001
num_epochs = 1000
dataset = ChatDataset()
train_loader = DataLoader(dataset = dataset , batch_size=batch_size, shuffle=True, num_workers=2)
model = NeuralNet(input_size, hidden_size, output_size)```
ERROR:
~\Untitled Folder\model.ipynb in __init__(self, input_size, hidden_size, num_classes)
9 "source": [
10 "import torch\n",
---> 11 "import torch.nn as nn"
12 ]
13 },
~\anaconda3a\lib\site-packages\torch\nn\modules\module.py in __setattr__(self, name, value)
1234 if isinstance(value, Module):
1235 if modules is None:
-> 1236 raise AttributeError(
1237 "cannot assign module before Module.__init__() call")
1238 remove_from(self.__dict__, self._parameters, self._buffers, self._non_persistent_buffers_set)
AttributeError: cannot assign module before Module.__init__() call
This is most probably because you haven't called super().__init__ in your __init__ function of NeuralNet before registering sub-modules to it. See here for additional details.
The only missing component is a function __len__ on ChatDataset. Other than that the provided code runs fine.

How to properly a Pytorch geometric GNN model for my custom toy dataset?

I created my own custom toy dataset of graphs in order to learn graph neural networks in Pytorch-geopmetric (PyG).
The data looks like the following:
Data(x=[20, 1], edge_index=[2, 20], y=[1])
I also created a dataloader as follows:
from torch_geometric.loader import DataLoader
train_dataloader = DataLoader(dataset[0:8000], batch_size=32, shuffle=True)
test_dataloader = DataLoader(dataset[8000:10000], batch_size=32, shuffle=True)
Therefore, a batch will look like:
DataBatch(x=[640, 1], edge_index=[2, 640], y=[32], batch=[640], ptr=[33])
My attempt to make a Graph-CNN:
import torch
from torch import nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class GCN(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv1 = GCNConv(dataset[0].num_node_features, 16)
self.conv2 = GCNConv(16, 16)
self.out = nn.Linear(16, 1)
def forward(self, data):
x, edge_index = data.x, data.edge_index
x = self.conv1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, training=self.training)
x = self.conv2(x, edge_index)
out = self.out(x)
return out
model = GCN()
When I do something like:
criterion = torch.nn.CrossEntropyLoss()
target = batch.y.to(torch.float32)
loss = criterion(out, target)
loss
I get the error:
ValueError: Expected input batch_size (640) to match target batch_size (32).
Full code is in my github repo here:
https://github.com/amine179/myGNN-learning/blob/main/My%20first%20GCNN.ipynb

How to use variable learning rate that decreases with loss using pytorch-geometric?

I have the following code snippet from PyTorch geometric example. I want to use a learning rate that decreases as the loss value during training decreases. I tried using scheduler but that didn't work for me.
A clean code-snippet is below. Can anyone provide valuable suggestions or help on this matter?
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import AGNNConv
dataset = 'Cora'
path = 'Cora'
dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures())
data = dataset[0]
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.lin1 = torch.nn.Linear(dataset.num_features, 16)
self.prop1 = AGNNConv(requires_grad=False)
self.prop2 = AGNNConv(requires_grad=True)
self.lin2 = torch.nn.Linear(16, dataset.num_classes)
def forward(self):
x = F.dropout(data.x, training=self.training)
x = F.relu(self.lin1(x))
x = self.prop1(x, data.edge_index)
x = self.prop2(x, data.edge_index)
x = F.dropout(x, training=self.training)
x = self.lin2(x)
return F.log_softmax(x, dim=1)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
def train():
model.train()
optimizer.zero_grad()
F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
optimizer.step()
def test():
model.eval()
logits, accs = model(), []
for _, mask in data('train_mask', 'val_mask', 'test_mask'):
pred = logits[mask].max(1)[1]
acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
accs.append(acc)
return accs
best_val_acc = test_acc = 0
for epoch in range(1, 201):
train()
train_acc, val_acc, tmp_test_acc = test()
if val_acc > best_val_acc:
best_val_acc = val_acc
test_acc = tmp_test_acc
log = 'Epoch: {:03d}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}'
print(log.format(epoch, train_acc, best_val_acc, test_acc))

Multi GPU training in Pytorch causes my system to freeze

When I wrap my model in nn.DataParallel(model) and start training my screen freezes and I have to manually restart the computer every time.
I've tried a few variations, like not adding .to(device) to every x and y, but whenever nn.DataParallel is used I seem to cause the computer to freeze.
import random
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import models, datasets, transforms
import torch.utils.data
DataLoader = torch.utils.data.DataLoader
random_split = torch.utils.data.random_split
global_rank = 0
MNIST = datasets.MNIST
class MLPClassifier(nn.Module):
def __init__(self):
super(MLPClassifier, self).__init__()
self.layer_1 = torch.nn.Linear(28 * 28, 128)
self.layer_2 = torch.nn.Linear(128, 444)
self.layer_3 = torch.nn.Linear(444, 333)
self.layer_4 = torch.nn.Linear(333, 10)
def forward(self, x):
x = x.view(x.size(0), -1)
x = self.layer_1(x)
x = F.relu(x)
x = self.layer_2(x)
x = F.relu(x)
x = self.layer_3(x)
x = F.relu(x)
x = self.layer_4(x)
return x
# Download data
if global_rank == 0:
mnist_train = MNIST(os.getcwd(), train=True, download=True)
mnist_test = MNIST(os.getcwd(), train=False, download=True)
# dist.barrier()
#transforms
transform=transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))])
mnist_train = MNIST(os.getcwd(), train=True, transform=transform)
# Split dataset
mnist_train, mnist_val = random_split(mnist_train, [55000, 5000])
mnist_test = MNIST(os.getcwd(), train=False, download=True)
# Build dataloaders
mnist_train = DataLoader(mnist_train, batch_size=256)
mnist_val = DataLoader(mnist_val, batch_size=256)
mnist_test = DataLoader(mnist_test, batch_size=256)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = MLPClassifier()
model = nn.DataParallel(model)
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
# Train loop
model.train()
num_epochs = 1
for epoch in range(num_epochs):
for train_batch in mnist_train:
x, y = train_batch
logits = model(x.to(device))
loss = F.cross_entropy(logits, y.to(device))
print('rain loss: ', loss.item())
loss.backward()
optimizer.step()
optimizer.zero_grad()
# EVAL LOOP
model.eval()
with torch.no_grad():
val_loss_a = []
for val_batch in mnist_val:
x, y = val_batch
logits = model(x.to(device))
val_loss = F.cross_entropy(logits, y.to(device))
val_loss_a.append(val_loss)
avg_val_loss = torch.stack(val_loss_a).mean()
model.train()

How to fix this loss is NaN problem in PyTorch of this RNN with GRU?

I'm completely new to PyTorch and tried out some models. I wanted to make an easy prediction rnn of stock market prices and found the following code:
I load the data set with pandas then split it into training and test data and load it into a pytorch DataLoader for later usage in training process. The model is defined in the GRU class. But the actual problem seems to be the optimisation. I think the problem could be gradient explosion. I thought about adding gradient clipping but the GRU design should actually prevent gradient explosion or am I wrong? What could cause the loss to be instantly NaN (already in the first epoch)
from sklearn.preprocessing import MinMaxScaler
import time
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
batch_size = 200
input_dim = 1
hidden_dim = 32
num_layers = 2
output_dim = 1
num_epochs = 10
nvda = pd.read_csv('dataset/stocks/NVDA.csv')
price = nvda[['Close']]
scaler = MinMaxScaler(feature_range=(-1, 1))
price['Close'] = scaler.fit_transform(price['Close'].values.reshape(-1, 1))
def split_data(stock, lookback):
data_raw = stock.to_numpy() # convert to numpy array
data = []
# create all possible sequences of length seq_len
for index in range(len(data_raw) - lookback):
data.append(data_raw[index: index + lookback])
data = np.array(data)
test_set_size = int(np.round(0.2 * data.shape[0]))
train_set_size = data.shape[0] - (test_set_size)
x_train = data[:train_set_size, :-1, :]
y_train = data[:train_set_size, -1, :]
x_test = data[train_set_size:, :-1]
y_test = data[train_set_size:, -1, :]
return [x_train, y_train, x_test, y_test]
lookback = 20 # choose sequence length
x_train, y_train, x_test, y_test = split_data(price, lookback)
train_data = TensorDataset(torch.from_numpy(x_train).float(), torch.from_numpy(y_train).float())
train_data = DataLoader(train_data, shuffle=True, batch_size=batch_size, drop_last=True)
test_data = TensorDataset(torch.from_numpy(x_test).float(), torch.from_numpy(y_test).float())
test_data = DataLoader(test_data, shuffle=True, batch_size=batch_size, drop_last=True)
class GRU(nn.Module):
def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
super(GRU, self).__init__()
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.2)
self.fc = nn.Linear(hidden_dim, output_dim)
self.relu = nn.ReLU()
def forward(self, x, h):
out, h = self.gru(x, h)
out = self.fc(self.relu(out[:, -1]))
return out, h
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
hidden = weight.new(self.num_layers, batch_size, self.hidden_dim).zero_()
return hidden
model = GRU(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0000000001)
model.train()
start_time = time.time()
h = model.init_hidden(batch_size)
for epoch in range(1, num_epochs+1):
for x, y in train_data:
h = h.data
model.zero_grad()
y_train_pred, h = model(x, h)
loss = criterion(y_train_pred, y)
print("Epoch ", epoch, "MSE: ", loss.item())
loss.backward()
optimizer.step()
training_time = time.time() - start_time
print("Training time: {}".format(training_time))
This is the dataset which I used.
Not sure if it is the case, but did you preprocess and cleaned the data? I do not know it but maybe there are some values missing or it's something strange about it. I checked it here
https://ca.finance.yahoo.com/quote/NVDA/history?p=NVDA and it seems that every couple of rows there is some inconsistency. Like I said, I do not know if it's the case but it may be.

Resources