Assign custom weight in pytorch - machine-learning

I'm trying to assign some custom weight to my PyTorch model but it doesn't work correctly.
class Mod(nn.Module):
def __init__(self):
super(Mod, self).__init__()
self.linear = nn.Sequential(
nn.Linear(1, 5)
)
def forward(self, x):
x = self.linear(x)
return x
mod = Mod()
mod.linear.weight = torch.tensor([1. ,2. ,3. ,4. ,5.], requires_grad=True)
mod.linear.bias = torch.nn.Parameter(torch.tensor(0., requires_grad=True))
print(mod.linear.weight)
>>> tensor([1., 2., 3., 4., 5.], requires_grad=True)
output = mod(torch.ones(1))
print(output)
>>> tensor([ 0.2657, 0.3220, -0.0726, -1.6987, 0.3945], grad_fn=<AddBackward0>)
The output is expected to be [1., 2., 3., 4., 5.] but it doesn't work as expected. What am I missing here?

You are not updating the weights in the right place. Your self.linear is not a nn.Linear layer, but rather a nn.Sequential container. Your nn.Linear is the first layer in the sequential. To access it you need to index self.linear:
with torch.no_grad():
mod.linear[0].weight.data = torch.tensor([1. ,2. ,3. ,4. ,5.], requires_grad=True)[:, None]
mod.linear[0].bias.data = torch.zeros((5, ), requires_grad=True) # bias is not a scalar here

Related

Pytorch Batch Size issue when comparing outputs form model and labels

Im having issues with the input and output size being halfed from 16 to 8 when running through my model.I've tried tweaking the input/output size between the maxpool and linear layer, that doesn't work. I was wondering if it has something to do with my loss criterion inputs or if the model should be outputting 16 instead of 8.
import torch
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from os import listdir
import os
from os.path import isdir
from torchsummary import summary
# Define the preprocessing steps
transform = transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# Define the custom dataset
class VideoDataset(Dataset):
def __init__(self, data_dir, transform=None):
self.data_dir = data_dir
self.transform = transform
def __len__(self):
return len(self.data_dir)
def __getitem__(self, idx):
video_dir = self.data_dir[idx]
print(video_dir)
video = []
for i in range(10): # For example, each video has 10 frames
img = Image.open(f"{video_dir}/frame_{i}.jpg")
if self.transform:
img = self.transform(img)
video.append(img)
video = torch.stack(video)
if(video_dir.find("squat")):
label = 1
if(video_dir.find("pull")):
label = 0
else:
label = 0
# label = str(video_dir.split("/")[-2]) # Assume the class label is included in the video directory name
sample = {'video': video, 'label': label}
#print(sample)
return sample
# Load the data
path = "videos/squat/"
path_pullups = "videos/pull ups/"
path_situp = "videos/situp/"
data_dir = list()
for file in os.scandir(path):
if file.is_dir():
data_dir.append(path + file.name)
for file in os.scandir(path_pullups):
if file.is_dir():
data_dir.append(path_pullups + file.name)
for file in os.scandir(path_situp):
if file.is_dir():
data_dir.append(path_situp + file.name)
print(len(data_dir)/2)
# Split the data into training and validation sets
train_data = VideoDataset(data_dir[:243], transform=transform) # Use first two classes for training
#print("train" + str(train_data.data_dir))
#valid_data = VideoDataset(data_dir[165:], transform=transform) # Use last class for validation
# Define the data loaders
train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
#valid_loader = DataLoader(valid_data, batch_size=16, shuffle=False)
# Define the CNN model
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = torch.nn.Conv3d(10, 16, kernel_size=(3, 3, 3), stride=1, padding=1)
self.pool = torch.nn.MaxPool3d(kernel_size=(2, 2, 2), stride=2, padding=0)
self.fc1 = torch.nn.Linear(16 * 8 * 8 * 8, 32) #16*16*2
self.fc2 = torch.nn.Linear(32, 3)
self.fc3 = torch.nn.Linear(3, 1)
def forward(self, x):
x = self.pool(torch.nn.functional.relu(self.conv1(x)))
x = x.view(-1, 16 * 8 * 8 * 8)
x = torch.nn.functional.relu(self.fc1(x))
x = self.fc2(x)
x = self.fc3(x)
x = torch.sigmoid(x)
return x
# Initialize the model, loss function, and optimizer
model = Net()
criterion = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# Train the model
for epoch in range(10): # Train for 10 epochs
running_loss = 0.0
for i, data in enumerate(train_loader, 0):
inputs, labels = data['video'], data['label']
# .view(-1,1)
outputs = model(inputs)
#if labels.shape[0] != outputs.shape[0]:
# labels = labels.view(-1, outputs.shape[0]).t()
summary(model, (10, 3, 32, 32), device='cpu')
print("Labels size:" + str(labels.shape))
print("Outputs size:" + str(outputs.shape))
print(outputs, labels)
#####################################################################
loss = criterion(outputs, labels) #### error here
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch + 1} loss: {running_loss / (i + 1)}")
# Evaluate the model
# correct = 0
# total = 0
# with torch.no_grad():
# for data in valid_loader:
# inputs, labels = data['video'], data['label']
# outputs = model(inputs)
# _, predicted = torch.max(outputs.data, 1)
# total += labels.size(0)
# correct += (predicted == labels).sum().item()
# print(f"Accuracy of the model on the validation set: {100 * correct / total}%")
Sample inputs are frames from video clips like this:
described by their exercise category such as squats, situps, pullups, etc.
Desired outputs for this model would be a binary representation of either 1 or 0 that each exercise given is a squat or not as labeled and indicated in the dataset customization function.

How to check if any of the gradients in a PyTorch model is nan?

I have a toy model:
import torch
import torch.nn as nn
import torch.optim as optim
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.fc1 = nn.Linear(1, 2)
self.fc2 = nn.Linear(2, 3)
self.fc3 = nn.Linear(3, 1)
def forward(self, x):
x = self.fc1(x)
x = torch.relu(x)
x = torch.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Model()
opt = optim.Adam(net.parameters())
The training loop is
features = torch.rand((3,1))
for i in range(10):
opt.zero_grad()
out = net(features)
loss = torch.mean(torch.square(torch.tensor(5) - torch.sum(out)))
loss.backward()
opt.step()
How can I check if any of the gradients is nan? That is, if just 1 of the gradients is nan print something/break
pseudocode:
for i in range(10):
opt.zero_grad()
out = net(features)
loss = torch.mean(torch.square(torch.tensor(5) - torch.sum(out)))
loss.backward()
if_gradients_nan:
print("NAN")
opt.step()
You can check as below. This approach only checks for the gradients with respect to the model parameters. It does not look at intermediate gradients, actually, those intermediate gradients do not exist after loss.backward() is called without retain_graph=True argument. For the demonstration purposes, I have multiplied output of first torch.relu(x) with float("inf") so that some of the gradients become nan.
...
def forward(self, x):
x = self.fc1(x)
x = torch.relu(x) * float("inf")
x = torch.relu(self.fc2(x))
x = self.fc3(x)
return x
...
loss = torch.mean(torch.square(torch.tensor(5) - torch.sum(out)))
loss.backward()
for name, param in net.named_parameters():
print(name, torch.isnan(param.grad))
opt.step()
This prints
fc1.weight tensor([[False],
[False]])
fc1.bias tensor([False, False])
fc2.weight tensor([[True, True],
[True, True],
[True, True]])
fc2.bias tensor([True, True, True])
fc3.weight tensor([[True, True, True]])
fc3.bias tensor([True])
fc1.weight tensor([[False],
[False]])
fc1.bias tensor([False, False])
fc2.weight tensor([[True, True],
[True, True],
[True, True]])
...
To check if any of the gradients is nan, you can use
for name, param in net.named_parameters():
if torch.isnan(param.grad).any():
print("nan gradient found")
raise SystemExit

Initalize using previous .pth and train for further epochs in pytorch

How do I initialize a UNet model from its previous weights and further train the model for more epochs?
This is current model and after training I am saving the state_dict() as .pth file
class UNet(nn.Module):
def __init__(self):
super(UNet,self).__init__()
# encoder
self.max_pool_2x2 = nn.MaxPool2d(kernel_size=2,stride=2)
self.down_conv_1 = double_conv(3,64)
self.down_conv_2 = double_conv(64,128)
self.down_conv_3 = double_conv(128,256)
self.down_conv_4 = double_conv(256,512)
self.down_conv_5 = double_conv(512,1024)
# decoder
self.up_trans_1= nn.ConvTranspose2d(in_channels=1024,
out_channels=512,
kernel_size=2,
stride=2)
self.up_conv_1 = double_conv(1024,512)
self.up_trans_2= nn.ConvTranspose2d(in_channels=512,
out_channels=256,
kernel_size=2,
stride=2)
self.up_conv_2 = double_conv(512,256)
self.up_trans_3= nn.ConvTranspose2d(in_channels=256,
out_channels=128,
kernel_size=2,
stride=2)
self.up_conv_3 = double_conv(256,128)
self.up_trans_4 = nn.ConvTranspose2d(in_channels=128,
out_channels=64,
kernel_size=2,
stride=2)
self.up_conv_4 = double_conv(128,64)
self.out = nn.Conv2d(64,1,kernel_size=1)
def forward(self, image):
##code for forward prop
model=UNet()
model.cuda()
# y= model(image)
# model.train()
# optimizer = optim.SGD(model.parameters(), lr = 0.1, momentum=0.9)
Now I want to initialize the same model with that .pth file and train further?
state= torch.load("/content/model_WandB_12.pth")
model.load_state_dict(state)
used this to load the model and
then run the training loop

Variational Autoencoder's sampling problem

My vae class looks like this:
class Encoder(nn.Module):
def __init__(self):
super(Encoder, self).__init__()
c = capacity
self.conv1 = nn.Conv2d(in_channels=1, out_channels=c, kernel_size=4, stride=2, padding=1) # out: c x 14 x 14
self.conv2 = nn.Conv2d(in_channels=c, out_channels=c*2, kernel_size=4, stride=2, padding=1) # out: c x 7 x 7
self.fc_mu = nn.Linear(in_features=c*2*7*7, out_features=latent_dims)
self.fc_logvar = nn.Linear(in_features=c*2*7*7, out_features=latent_dims)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = x.view(x.size(0), -1) # flatten batch of multi-channel feature maps to a batch of feature vectors
x_mu = self.fc_mu(x)
x_logvar = self.fc_logvar(x)
return x_mu, x_logvar
class Decoder(nn.Module):
def __init__(self):
super(Decoder, self).__init__()
c = capacity
self.fc = nn.Linear(in_features=latent_dims, out_features=c*2*7*7)
self.conv2 = nn.ConvTranspose2d(in_channels=c*2, out_channels=c, kernel_size=4, stride=2, padding=1)
self.conv1 = nn.ConvTranspose2d(in_channels=c, out_channels=1, kernel_size=4, stride=2, padding=1)
def forward(self, x):
x = self.fc(x)
x = x.view(x.size(0), capacity*2, 7, 7) # unflatten batch of feature vectors to a batch of multi-channel feature maps
x = F.relu(self.conv2(x))
x = torch.sigmoid(self.conv1(x)) # last layer before output is sigmoid, since we are using BCE as reconstruction loss
return x
class VariationalAutoencoder(nn.Module):
def __init__(self):
super(VariationalAutoencoder, self).__init__()
self.encoder = Encoder()
self.decoder = Decoder()
def forward(self, x):
latent_mu, latent_logvar = self.encoder(x)
latent = self.latent_sample(latent_mu, latent_logvar)
x_recon = self.decoder(latent)
return x_recon, latent_mu, latent_logvar
def latent_sample(self, mu, logvar):
if self.training:
# the reparameterization trick
std = logvar.mul(0.5).exp_()
eps = torch.empty_like(std).normal_()
return eps.mul(std).add_(mu)
else:
return mu
def vae_loss(recon_x, x, mu, logvar):
# recon_x is the probability of a multivariate Bernoulli distribution p.
# -log(p(x)) is then the pixel-wise binary cross-entropy.
# Averaging or not averaging the binary cross-entropy over all pixels here
# is a subtle detail with big effect on training, since it changes the weight
# we need to pick for the other loss term by several orders of magnitude.
# Not averaging is the direct implementation of the negative log likelihood,
# but averaging makes the weight of the other loss term independent of the image resolution.
recon_loss = F.binary_cross_entropy(recon_x.view(-1, 784), x.view(-1, 784), reduction='sum')
kldivergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
return recon_loss + variational_beta * kldivergence
I train it on MNIST dataset.
I want to sample it, or generate an array and give it to the decoder and see what the output will be.
The problem is that I don't really understand, what my z array should look like and what shape should it need.
Here is the code for sampling:
z = ...
input = torch.FloatTensor(z).to(device)
vae.eval()
output = vae.decoder(input)
plot_gallery(output.data.cpu().numpy(), 24, 24, n_row=5, n_col=5)

Trying to understand Pytorch's implementation of LSTM

I have a dataset containing 1000 examples where each example has 5 features (a,b,c,d,e). I want to feed 7 examples to an LSTM so it predicts the feature (a) of the 8th day.
Reading Pytorchs documentation of nn.LSTM() I came up with the following:
input_size = 5
hidden_size = 10
num_layers = 1
output_size = 1
lstm = nn.LSTM(input_size, hidden_size, num_layers)
fc = nn.Linear(hidden_size, output_size)
out, hidden = lstm(X) # Where X's shape is ([7,1,5])
output = fc(out[-1])
output # output's shape is ([7,1])
According to the docs:
The input of the nn.LSTM is "input of shape (seq_len, batch, input_size)" with "input_size – The number of expected features in the input x",
And the output is: "output of shape (seq_len, batch, num_directions * hidden_size): tensor containing the output features (h_t) from the last layer of the LSTM, for each t."
In this case, I thought seq_len would be the sequence of 7 examples, batchis 1 and input_size is 5. So the lstm would consume each example containing 5 features refeeding the hidden layer every iteration.
What am I missing?
When I extend your code to a full example -- I also added some comments to may help -- I get the following:
import torch
import torch.nn as nn
input_size = 5
hidden_size = 10
num_layers = 1
output_size = 1
lstm = nn.LSTM(input_size, hidden_size, num_layers)
fc = nn.Linear(hidden_size, output_size)
X = [
[[1,2,3,4,5]],
[[1,2,3,4,5]],
[[1,2,3,4,5]],
[[1,2,3,4,5]],
[[1,2,3,4,5]],
[[1,2,3,4,5]],
[[1,2,3,4,5]],
]
X = torch.tensor(X, dtype=torch.float32)
print(X.shape) # (seq_len, batch_size, input_size) = (7, 1, 5)
out, hidden = lstm(X) # Where X's shape is ([7,1,5])
print(out.shape) # (seq_len, batch_size, hidden_size) = (7, 1, 10)
out = out[-1] # Get output of last step
print(out.shape) # (batch, hidden_size) = (1, 10)
out = fc(out) # Push through linear layer
print(out.shape) # (batch_size, output_size) = (1, 1)
This makes sense to me, given your batch_size = 1 and output_size = 1 (I assume, you're doing regression). I don't know where your output.shape = (7, 1) come from.
Are you sure that your X has the correct dimensions? Did you create nn.LSTM maybe with batch_first=True? There are lot of little things that can sneak in.

Resources