I am using 187 data as train set, which has 68 features
and would like to extract 10 features then use PCA to plot in 2D
my original data is right skewed but the latent space becomes normal
even though the loss decreases well, the model doesn't seem to be learning
[model] variational autoencoder
layer 68 - 30 - 10 - 30 - 68, using leaky_relu as activation function and tanh in the final layer
added l1 regularization in loss function, and dropout in the encoder
class VAE(nn.Module):
def __init__(self):
super(VAE, self).__init__()
self.fc1 = nn.Linear(68, 30)
self.fc21 = nn.Linear(30, 10)
self.fc22 = nn.Linear(30, 10)
self.fc3 = nn.Linear(10, 30)
self.fc4 = nn.Linear(30, 68)
self.dropout = nn.Dropout(0.5)
def encode(self, x):
h1 = F.leaky_relu(self.fc1(x))
h1 = self.dropout(h1)
return self.fc21(h1), self.fc22(h1)
def reparameterize(self, mu, logvar):
std = logvar.mul(0.5).exp_()
if torch.cuda.is_available():
eps = torch.cuda.FloatTensor(std.size()).normal_()
else:
eps = torch.FloatTensor(std.size()).normal_()
eps = Variable(eps)
return eps.mul(std).add_(mu)
def decode(self, z):
h3 = F.leaky_relu(self.fc3(z))
return torch.tanh(self.fc4(h3)) # sigmoid -> relu
def forward(self, x):
mu, logvar = self.encode(x.view(-1, 68))
z = self.reparameterize(mu, logvar)
return self.decode(z), z, mu, logvar
model = VAE().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3) # this will help for L2 regularization
def loss_function(recon_x, x, mu, logvar):
loss = nn.MSELoss(reduction = 'sum')
BCE = loss(recon_x, x)
KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
regularization_loss = 0 # l1 regularization
for param in model.parameters():
regularization_loss += torch.sum(torch.abs(param))
return BCE + KLD + regularization_loss
I have no clue why this is not working
this is the desired output
this is the one I get -- very random
Related
I'm new to pytorch and wrote a simple code as following to classify some inputs. The model input has 8*2 with batch size of 2 and the input layer in the model has 2 nodes. I don't know what is wrong!
X1=np.array([[2,1],[3,2],[-4,-1],[-1,-3],[2,-1],[3,-3],[-2,1],[-4,-2]])
Y1=np.array([0,0,0,0,1,1,1,1])
X=torch.tensor(X1)
Y=torch.tensor(Y1)
BATCH_SIZE=2
trainset= torch.utils.data.TensorDataset(X, Y)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
shuffle=True, num_workers=1)
from torch.nn.modules import flatten
learning_rate = 0.01
num_epochs = 20
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = MyModel()
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
## compute accuracy
def get_accuracy(logit, target, batch_size):
''' Obtain accuracy for training round '''
corrects = (torch.max(logit, 1)[1].view(target.size()).data == target.data).sum()
accuracy = 100.0 * corrects/batch_size
return accuracy.item()
model = MyModel()
# Commented out IPython magic to ensure Python compatibility.
for epoch in range(num_epochs):
train_running_loss = 0.0
train_acc = 0.0
## training step
for inputs, labels in trainloader:
#inputs=torch.flatten(inputs)
inputs,labels=inputs.to(device), labels.to(device)
#inputs = inputs.to(device)
#labels = labels.to(device)
optimizer.zero_grad()
## forward + backprop + loss
print(inputs)
outputs = model.forward(inputs)
loss = criterion(outputs, labels)
loss.backward()
## update model params
optimizer.step()
train_running_loss += loss.detach().item()
train_acc += get_accuracy(outputs, labels, BATCH_SIZE)
#model.train()
model.eval()
print('Epoch: %d | Loss: %.4f | Train Accuracy: %.2f'%(epoch, train_running_loss / i, train_acc/i))
And my model is as below:
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
self.d1 = nn.Linear(2,3)
self.d2 = nn.Linear(3,1)
self.init_weights()
def init_weights(self):
k1=torch.tensor([0.1,-0.72,0.94,-0.29,0.12,0.44])
k1=torch.unsqueeze(torch.unsqueeze(k1,0),0)
self.d1.weight.data=k1
k2=torch.tensor([1,-1.16,-0.26])
k2=torch.unsqueeze(torch.unsqueeze(k2,0),0)
self.d2.weight.data=k2
def forward(self, x):
x = self.d1(x)
x = F.tanh(x)
x = self.d2(x)
out = F.sigmoid(x)
return out
Then I got an error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-27-196d819d3ccd> in <module>
101 print(inputs)
102
--> 103 outputs = model.forward(inputs)
104 loss = criterion(outputs, labels)
105
2 frames
/usr/local/lib/python3.8/dist-packages/torch/nn/modules/linear.py in forward(self, input)
112
113 def forward(self, input: Tensor) -> Tensor:
--> 114 return F.linear(input, self.weight, self.bias)
115
116 def extra_repr(self) -> str:
RuntimeError: t() expects a tensor with <= 2 dimensions, but self is 3D
I flatten the input but nothing changed. What should I do to fix it?
First of all, you don't need to invoke your model's forward pass by model.forward(x); using model(x) is good.
Second of all, what exactly are you trying to achieve via the init_weights method? You're unsqueezing k1 and k2 twice, giving them the shape of (1, 1, x) which is 3D which is what the error is telling you. torch.nn.Linear performs a matrix multiplication with a 2D matrix, so you can't use a 3D one. torch.nn.Linear already initializes the weights via Kaiming initialization [1] so I'm not sure what you're trying to achieve here.
Changing the init_weights method to:
def init_weights(self):
k1 = torch.tensor([0.1, -0.72, 0.94, -0.29, 0.12, 0.44])
k1 = k1.reshape(self.d1.weight.shape)
self.d1.weight.data = k1
k2 = torch.tensor([1, -1.16, -0.26])
k2 = k2.reshape(self.d2.weight.shape)
self.d2.weight.data = k2
and changing the type of inputs from Long to Float (i.e., model(inputs.float())) should solve your problem.
References
[1] https://github.com/pytorch/pytorch/blob/0dceaf07cd1236859953b6f85a61dc4411d10f87/torch/nn/modules/linear.py#L103
I am attempting to train EEG data through a transformer network. The input dimensions are 50x16684x60 (seq x batch x features) and the output is 16684x2. Right now I am simply trying to run a basic transformer, and I keep getting an error telling me
RuntimeError: the feature number of src and tgt must be equal to d_model
Why would the source and target feature number ever be equal? Is it possible to run such a dataset through a transformer?
Here is my basic model:
input_size = 60 # seq x batch x features
hidden_size = 32
num_classes = 2
learning_rate = 0.001
batch_size = 64
num_epochs = 2
sequence_length = 50
num_layers = 2
dropout = 0.5
class Transformer(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(Transformer, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.transformer = nn.Transformer(60, 2)
self.fc = nn.Linear(hidden_size * sequence_length, num_classes)
def forward(self, x, y):
# Forward Propogation
out, _ = self.transformer(x,y)
out = out.reshape(out.shape[0], -1)
out = self.fc(out)
return out
model = Transformer(input_size, hidden_size, num_layers, num_classes)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
for index in tqdm(range(16684)):
X, y = (X_train[index], Y_train[index])
print(X.shape, y.shape)
output = model(X, y)
loss = criterion(output, y)
model.zero_grad()
loss.backward()
optimizer.step()
if index % 500 == 0:
print(f"Epoch {epoch}, Batch: {index}, Loss: {loss}")
You train the model to find some features by feeding it the input sequence and desired sequence. The backprop trains the net by computing the loss as a "difference" between src and target features.
If the features sizes aren't the same - the backprop can't find the accordance to some desired feature and the model can't be trained.
I tried to implement a simple demo that gets a polynomial regression, but the linear model's loss fails to decrease.
I am confused about where I went wrong.
If I trained the model one sample(batch size = 1) each time, it works fine. but when I feed the model with many samples a time, the loss increase and get inf.
import numpy as np
import torch
import math
from matplotlib import pyplot as plt
def rand_series(size):
x = np.linspace(-100, 100, size)
np.random.shuffle(x)
base_y = 20 * np.sin(2 * math.pi / 200 * x)
y = base_y + 10 * np.random.rand(size)
return x, y
def rescale_vec(vector):
vec_as_tensor = torch.tensor(vector, dtype=torch.float32)
max_in_vec = torch.max(vec_as_tensor)
min_in_vec = torch.min(vec_as_tensor)
if max_in_vec - min_in_vec == 0:
return torch.ones(vec_as_tensor.size(), dtype=torch.float32)
else:
return (vec_as_tensor - min_in_vec) / (max_in_vec - min_in_vec)
def rescale(vectors):
if len(vectors.shape) == 1:
return rescale_vec(vectors)
nor_vecs = torch.empty(vectors.shape)
for i in range(vectors.shape[0]):
nor_vecs[i] = rescale_vec(vectors[i])
return nor_vecs
class LinearRegression (torch.nn.Module):
def __init__ (self, power=4):
super().__init__()
self.layer = torch.nn.Linear(power, 1)
def forward(self, x):
return self.layer(x)
def regression(x_, y_, learning_rate):
x = torch.t(torch.tensor(x_, dtype=torch.float32))
y = torch.tensor(y_, dtype=torch.float32)
dim_size = x.size()[1]
print(dim_size, x.size())
model = LinearRegression(dim_size)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
loss_func = torch.nn.MSELoss(reduction='sum')
batch_size = 400
for round in range(50):
sample_indices = torch.randint(0, len(x), (batch_size, ))
x_samples = torch.index_select(x, 0, sample_indices)
y_samples = torch.index_select(y, 0, sample_indices)
optimizer.zero_grad()
y_hat = model(x_samples.view(-1, dim_size))
loss = loss_func(y_hat, y_samples)
print(loss.item())
loss.backward()
optimizer.step()
return model
x_one, y = rand_series(1000)
b = np.ones(len(x_one))
x = np.array([b, x_one, x_one ** 2, x_one ** 3, x_one ** 4, x_one ** 5])
model = regression(rescale(x), torch.tensor(y, dtype=torch.float32), 0.002)
nor_x = rescale(x)
y_hat = model(torch.t(torch.tensor(x, dtype=torch.float32)))
plt.scatter(x_one, y)
plt.scatter(x_one, y_hat.data, c='red')
plt.show()
the loss:
4.7375866968775066e+19
1.6979300048622735e+26
6.0214270068868396e+32
inf
inf
inf
You need to use loss_func = torch.nn.MSELoss(reduction='mean') to solve the NaN problem. A batch of one or two seems to work because the loss was small enough. By adding more epochs, you should see that your loss tend exponentially to infinity.
My vae class looks like this:
class Encoder(nn.Module):
def __init__(self):
super(Encoder, self).__init__()
c = capacity
self.conv1 = nn.Conv2d(in_channels=1, out_channels=c, kernel_size=4, stride=2, padding=1) # out: c x 14 x 14
self.conv2 = nn.Conv2d(in_channels=c, out_channels=c*2, kernel_size=4, stride=2, padding=1) # out: c x 7 x 7
self.fc_mu = nn.Linear(in_features=c*2*7*7, out_features=latent_dims)
self.fc_logvar = nn.Linear(in_features=c*2*7*7, out_features=latent_dims)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = x.view(x.size(0), -1) # flatten batch of multi-channel feature maps to a batch of feature vectors
x_mu = self.fc_mu(x)
x_logvar = self.fc_logvar(x)
return x_mu, x_logvar
class Decoder(nn.Module):
def __init__(self):
super(Decoder, self).__init__()
c = capacity
self.fc = nn.Linear(in_features=latent_dims, out_features=c*2*7*7)
self.conv2 = nn.ConvTranspose2d(in_channels=c*2, out_channels=c, kernel_size=4, stride=2, padding=1)
self.conv1 = nn.ConvTranspose2d(in_channels=c, out_channels=1, kernel_size=4, stride=2, padding=1)
def forward(self, x):
x = self.fc(x)
x = x.view(x.size(0), capacity*2, 7, 7) # unflatten batch of feature vectors to a batch of multi-channel feature maps
x = F.relu(self.conv2(x))
x = torch.sigmoid(self.conv1(x)) # last layer before output is sigmoid, since we are using BCE as reconstruction loss
return x
class VariationalAutoencoder(nn.Module):
def __init__(self):
super(VariationalAutoencoder, self).__init__()
self.encoder = Encoder()
self.decoder = Decoder()
def forward(self, x):
latent_mu, latent_logvar = self.encoder(x)
latent = self.latent_sample(latent_mu, latent_logvar)
x_recon = self.decoder(latent)
return x_recon, latent_mu, latent_logvar
def latent_sample(self, mu, logvar):
if self.training:
# the reparameterization trick
std = logvar.mul(0.5).exp_()
eps = torch.empty_like(std).normal_()
return eps.mul(std).add_(mu)
else:
return mu
def vae_loss(recon_x, x, mu, logvar):
# recon_x is the probability of a multivariate Bernoulli distribution p.
# -log(p(x)) is then the pixel-wise binary cross-entropy.
# Averaging or not averaging the binary cross-entropy over all pixels here
# is a subtle detail with big effect on training, since it changes the weight
# we need to pick for the other loss term by several orders of magnitude.
# Not averaging is the direct implementation of the negative log likelihood,
# but averaging makes the weight of the other loss term independent of the image resolution.
recon_loss = F.binary_cross_entropy(recon_x.view(-1, 784), x.view(-1, 784), reduction='sum')
kldivergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
return recon_loss + variational_beta * kldivergence
I train it on MNIST dataset.
I want to sample it, or generate an array and give it to the decoder and see what the output will be.
The problem is that I don't really understand, what my z array should look like and what shape should it need.
Here is the code for sampling:
z = ...
input = torch.FloatTensor(z).to(device)
vae.eval()
output = vae.decoder(input)
plot_gallery(output.data.cpu().numpy(), 24, 24, n_row=5, n_col=5)
I am trying to implement Bayesian CNN using Mc Dropout on Pytorch, the main idea is that by applying dropout at test time and running over many forward passes, you get predictions from a variety of different models. I need to obtain the uncertainty, does anyone have an idea of how I can do it Please
This is how I defined my CNN
'''
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
self.dropout = nn.Dropout(p=0.3)
nn.init.xavier_uniform_(self.conv1.weight)
nn.init.constant_(self.conv1.bias, 0.0)
nn.init.xavier_uniform_(self.conv2.weight)
nn.init.constant_(self.conv2.bias, 0.0)
nn.init.xavier_uniform_(self.fc1.weight)
nn.init.constant_(self.fc1.bias, 0.0)
nn.init.xavier_uniform_(self.fc2.weight)
nn.init.constant_(self.fc2.bias, 0.0)
nn.init.xavier_uniform_(self.fc3.weight)
nn.init.constant_(self.fc3.bias, 0.0)
def forward(self, x):
x = self.pool(F.relu(self.dropout(self.conv1(x)))) # recommended to add the relu
x = self.pool(F.relu(self.dropout(self.conv2(x)))) # recommended to add the relu
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(self.dropout(x)))
x = self.fc3(self.dropout(x)) # no activation function needed for the last layer
return x
model = Net().to(device)
train_accuracies=np.zeros(num_epochs)
test_accuracies=np.zeros(num_epochs)
dataiter = iter(trainloader)
images, labels = dataiter.next()
#initializing variables
loss_acc = []
class_acc_mcdo = []
start_train = True
#Defining the Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def train():
loss_vals = []
acc_vals = []
for epoch in range(num_epochs): # loop over the dataset multiple times
n_correct = 0 # initialize number of correct predictions
acc = 0 # initialize accuracy of each epoch
somme = 0 # initialize somme of losses of each epoch
epoch_loss = []
for i, (images, labels) in enumerate(trainloader):
# origin shape: [4, 3, 32, 32] = 4, 3, 1024
# input_layer: 3 input channels, 6 output channels, 5 kernel size
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model.train()(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad() # zero the parameter gradients
loss.backward()
epoch_loss.append(loss.item()) # add the loss to epoch_loss list
optimizer.step()
# max returns (value ,index)
_, predicted = torch.max(outputs, 1)
n_correct += (predicted == labels).sum().item()
# print statistics
if (i + 1) % 2000 == 0:
print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{n_total_steps}], Loss:
{loss.item():.4f}')
somme = (sum(epoch_loss)) / len(epoch_loss)
loss_vals.append(somme) # add the epoch's loss to loss_vals
print("Loss = {}".format(somme))
acc = 100 * n_correct / len(trainset)
acc_vals.append(acc) # add the epoch's Accuracy to acc_vals
print("Accuracy = {}".format(acc))
# SAVE
PATH = './cnn.pth'
torch.save(model.state_dict(), PATH)
loss_acc.append(loss_vals)
loss_acc.append(acc_vals)
return loss_acc
And here is the code of the mc dropout
'''
def enable_dropout(model):
""" Function to enable the dropout layers during test-time """
for m in model.modules():
if m.__class__.__name__.startswith('Dropout'):
m.train()
def test():
# set non-dropout layers to eval mode
model.eval()
# set dropout layers to train mode
enable_dropout(model)
test_loss = 0
correct = 0
n_samples = 0
n_class_correct = [0 for i in range(10)]
n_class_samples = [0 for i in range(10)]
T = 100
for images, labels in testloader:
images = images.to(device)
labels = labels.to(device)
with torch.no_grad():
output_list = []
# getting outputs for T forward passes
for i in range(T):
output_list.append(torch.unsqueeze(model(images), 0))
# calculating mean
output_mean = torch.cat(output_list, 0).mean(0)
test_loss += F.nll_loss(F.log_softmax(output_mean, dim=1), labels,
reduction='sum').data # sum up batch loss
_, predicted = torch.max(output_mean, 1) # get the index of the max log-probability
correct += (predicted == labels).sum().item() # sum up correct predictions
n_samples += labels.size(0)
for i in range(batch_size):
label = labels[i]
predi = predicted[i]
if (label == predi):
n_class_correct[label] += 1
n_class_samples[label] += 1
test_loss /= len(testloader.dataset)
# PRINT TO HTML PAGE
print('\n Average loss: {:.4f}, Accuracy: ({:.3f}%)\n'.format(
test_loss,
100. * correct / n_samples))
# Accuracy for each class
acc_classes = []
for i in range(10):
acc = 100.0 * n_class_correct[i] / n_class_samples[i]
print(f'Accuracy of {classes[i]}: {acc} %')
acc_classes.append(acc)
class_acc_mcdo.extend(acc_classes)
print('Finished Testing')
You can compute the statistics, such as the sample mean or the sample variance, of different stochastic forward passes at test time (i.e. with the test or validation data), when the dropout is enabled. These statistics can be used to represent uncertainty. For example, you can compute the entropy, which is a measure of uncertainty, from the sample mean.