It's a 15-class classification model, OUTPUT_DIM = 15. I'm trying to input a frequency vector like this one 'hi my name is' => [1,43,2,56].
When I call predictions = model(x_train[0]) I get an array of size torch.Size([100, 15]), instead of just a 1D array with 15 classes like this: torch.Size([15]). What's happening? Why is this the output? How can I fix it? Thank you in advance, more info below.
The model (from main docs) is the following:
import torch.nn as nn
class RNN(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
super().__init__()
self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim, hidden_dim)
self.hidden2tag = nn.Linear(hidden_dim, output_dim)
def forward(self, text):
embeds = self.word_embeddings(text)
lstm_out, _ = self.lstm(embeds.view(len(text), 1, -1))
tag_space = self.hidden2tag(lstm_out.view(len(text), -1))
tag_scores = F.log_softmax(tag_space, dim=1)
return tag_scores
Parameters:
INPUT_DIM = 62288
EMBEDDING_DIM = 64
HIDDEN_DIM = 128
OUTPUT_DIM = 15
The LSTM function in Pytorch returns not just the output of the last timestep but all outputs instead (this is useful in some cases). So in your example you seem to have exactly 100 timesteps (the amount of timesteps is just your sequence length).
But since you are doing classification you just care about the last output. You can normally get it like this:
outputs, _ = self.lstm(embeddings)
# shape: batch_size x 100 x 15
output = outputs[:, -1]
# shape: batch_size x 1 x 15
Related
I'm new to pytorch and wrote a simple code as following to classify some inputs. The model input has 8*2 with batch size of 2 and the input layer in the model has 2 nodes. I don't know what is wrong!
X1=np.array([[2,1],[3,2],[-4,-1],[-1,-3],[2,-1],[3,-3],[-2,1],[-4,-2]])
Y1=np.array([0,0,0,0,1,1,1,1])
X=torch.tensor(X1)
Y=torch.tensor(Y1)
BATCH_SIZE=2
trainset= torch.utils.data.TensorDataset(X, Y)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
shuffle=True, num_workers=1)
from torch.nn.modules import flatten
learning_rate = 0.01
num_epochs = 20
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = MyModel()
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
## compute accuracy
def get_accuracy(logit, target, batch_size):
''' Obtain accuracy for training round '''
corrects = (torch.max(logit, 1)[1].view(target.size()).data == target.data).sum()
accuracy = 100.0 * corrects/batch_size
return accuracy.item()
model = MyModel()
# Commented out IPython magic to ensure Python compatibility.
for epoch in range(num_epochs):
train_running_loss = 0.0
train_acc = 0.0
## training step
for inputs, labels in trainloader:
#inputs=torch.flatten(inputs)
inputs,labels=inputs.to(device), labels.to(device)
#inputs = inputs.to(device)
#labels = labels.to(device)
optimizer.zero_grad()
## forward + backprop + loss
print(inputs)
outputs = model.forward(inputs)
loss = criterion(outputs, labels)
loss.backward()
## update model params
optimizer.step()
train_running_loss += loss.detach().item()
train_acc += get_accuracy(outputs, labels, BATCH_SIZE)
#model.train()
model.eval()
print('Epoch: %d | Loss: %.4f | Train Accuracy: %.2f'%(epoch, train_running_loss / i, train_acc/i))
And my model is as below:
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
self.d1 = nn.Linear(2,3)
self.d2 = nn.Linear(3,1)
self.init_weights()
def init_weights(self):
k1=torch.tensor([0.1,-0.72,0.94,-0.29,0.12,0.44])
k1=torch.unsqueeze(torch.unsqueeze(k1,0),0)
self.d1.weight.data=k1
k2=torch.tensor([1,-1.16,-0.26])
k2=torch.unsqueeze(torch.unsqueeze(k2,0),0)
self.d2.weight.data=k2
def forward(self, x):
x = self.d1(x)
x = F.tanh(x)
x = self.d2(x)
out = F.sigmoid(x)
return out
Then I got an error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-27-196d819d3ccd> in <module>
101 print(inputs)
102
--> 103 outputs = model.forward(inputs)
104 loss = criterion(outputs, labels)
105
2 frames
/usr/local/lib/python3.8/dist-packages/torch/nn/modules/linear.py in forward(self, input)
112
113 def forward(self, input: Tensor) -> Tensor:
--> 114 return F.linear(input, self.weight, self.bias)
115
116 def extra_repr(self) -> str:
RuntimeError: t() expects a tensor with <= 2 dimensions, but self is 3D
I flatten the input but nothing changed. What should I do to fix it?
First of all, you don't need to invoke your model's forward pass by model.forward(x); using model(x) is good.
Second of all, what exactly are you trying to achieve via the init_weights method? You're unsqueezing k1 and k2 twice, giving them the shape of (1, 1, x) which is 3D which is what the error is telling you. torch.nn.Linear performs a matrix multiplication with a 2D matrix, so you can't use a 3D one. torch.nn.Linear already initializes the weights via Kaiming initialization [1] so I'm not sure what you're trying to achieve here.
Changing the init_weights method to:
def init_weights(self):
k1 = torch.tensor([0.1, -0.72, 0.94, -0.29, 0.12, 0.44])
k1 = k1.reshape(self.d1.weight.shape)
self.d1.weight.data = k1
k2 = torch.tensor([1, -1.16, -0.26])
k2 = k2.reshape(self.d2.weight.shape)
self.d2.weight.data = k2
and changing the type of inputs from Long to Float (i.e., model(inputs.float())) should solve your problem.
References
[1] https://github.com/pytorch/pytorch/blob/0dceaf07cd1236859953b6f85a61dc4411d10f87/torch/nn/modules/linear.py#L103
I have a (244, 108) numpy array. It contains percentage change of close value of a trade for each minute in one day ie 108 values and like that for 244 days. Basically its a 1D vector. So in order to do 1D CNN how should I shape my data?
What i have done:
x.shape = (244, 108)
x = np.expand_dims(x, axis=2)
x.shape = (243, 108, 1)
y.shape = (243,)
Model:
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.layer1 = torch.nn.Conv1d(in_channels=108, out_channels=1, kernel_size=1, stride=1)
self.act1 = torch.nn.ReLU()
self.act2 = torch.nn.MaxPool1d(kernel_size=1, stride=1)
self.layer2 = torch.nn.Conv1d(in_channels=1, out_channels=1, kernel_size=1, stride=1)
self.act3 = torch.nn.ReLU()
self.act4 = torch.nn.MaxPool1d(kernel_size=1, stride=1)
self.linear_layers = nn.Linear(1, 1)
# Defining the forward pass
def forward(self, x):
x = self.layer1(x)
x = self.act1(x)
x = self.act2(x)
x = self.layer2(x)
x = self.act3(x)
x = self.act4(x)
x = self.linear_layers(x)
return x
If each day should be separate instance for convolution your data should have the shape (248, 1, 108). This seems more reasonable.
If you want all your days and minutes to be a continuum for network to learn it should be of shape (1, 1, 248*108).
Basically first dimension is batch (how many training samples), second is the number of channels or features of sample (only one in your case) and last is the number of timesteps.
Edit
Your pooling layer should be torch.nn.AdaptiveMaxPool1d(1). You should also reshape output from this layer like this: pooled.reshape(x.shape[0], -1) before pushing it through torch.nn.Linear layer.
I have a dataset containing 1000 examples where each example has 5 features (a,b,c,d,e). I want to feed 7 examples to an LSTM so it predicts the feature (a) of the 8th day.
Reading Pytorchs documentation of nn.LSTM() I came up with the following:
input_size = 5
hidden_size = 10
num_layers = 1
output_size = 1
lstm = nn.LSTM(input_size, hidden_size, num_layers)
fc = nn.Linear(hidden_size, output_size)
out, hidden = lstm(X) # Where X's shape is ([7,1,5])
output = fc(out[-1])
output # output's shape is ([7,1])
According to the docs:
The input of the nn.LSTM is "input of shape (seq_len, batch, input_size)" with "input_size – The number of expected features in the input x",
And the output is: "output of shape (seq_len, batch, num_directions * hidden_size): tensor containing the output features (h_t) from the last layer of the LSTM, for each t."
In this case, I thought seq_len would be the sequence of 7 examples, batchis 1 and input_size is 5. So the lstm would consume each example containing 5 features refeeding the hidden layer every iteration.
What am I missing?
When I extend your code to a full example -- I also added some comments to may help -- I get the following:
import torch
import torch.nn as nn
input_size = 5
hidden_size = 10
num_layers = 1
output_size = 1
lstm = nn.LSTM(input_size, hidden_size, num_layers)
fc = nn.Linear(hidden_size, output_size)
X = [
[[1,2,3,4,5]],
[[1,2,3,4,5]],
[[1,2,3,4,5]],
[[1,2,3,4,5]],
[[1,2,3,4,5]],
[[1,2,3,4,5]],
[[1,2,3,4,5]],
]
X = torch.tensor(X, dtype=torch.float32)
print(X.shape) # (seq_len, batch_size, input_size) = (7, 1, 5)
out, hidden = lstm(X) # Where X's shape is ([7,1,5])
print(out.shape) # (seq_len, batch_size, hidden_size) = (7, 1, 10)
out = out[-1] # Get output of last step
print(out.shape) # (batch, hidden_size) = (1, 10)
out = fc(out) # Push through linear layer
print(out.shape) # (batch_size, output_size) = (1, 1)
This makes sense to me, given your batch_size = 1 and output_size = 1 (I assume, you're doing regression). I don't know where your output.shape = (7, 1) come from.
Are you sure that your X has the correct dimensions? Did you create nn.LSTM maybe with batch_first=True? There are lot of little things that can sneak in.
I'm trying to train a simple RNN model with a trivial goal where the output matches a fixed vector regardless of the input
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.i2h = nn.Linear(input_size + hidden_size, hidden_size)
print "i2h WEIGHT size ", list(self.i2h.weight.size())
print "i2h bias size ", list(self.i2h.bias.size())
self.i2o = nn.Linear(hidden_size, output_size)
print "i2o WEIGHT size ", list(self.i2o.weight.size())
print "i2o bias size ", list(self.i2o.bias.size())
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, input, hidden):
combined = torch.cat((input, hidden), 1)
hidden = self.i2h(combined)
output = self.i2o(hidden)
output = self.softmax(output)
return output, hidden
def initHidden(self):
return Variable(torch.zeros(1, self.hidden_size))
n_hidden = 20
rnn = RNN(10, n_hidden, 3)
learning_rate = 1e-3
loss_fn = torch.nn.MSELoss(size_average=False)
out_target = Variable( torch.FloatTensor([[0.0 , 1.0, 0.0]] ) , requires_grad=False)
print "target output::: ", out_target
def train(category_tensor, line_tensor):
hidden = rnn.initHidden()
rnn.zero_grad()
for i in range(line_tensor.size()[0]):
#print "train iteration ", i, ": input data: ", line_tensor[i]
output, hidden = rnn(line_tensor[i], hidden)
loss = loss_fn(output, out_target)
loss.backward()
# Add parameters' gradients to their values, multiplied by learning rate
for p in rnn.parameters():
#print "parameter: ", p, " gradient: ", p.grad.data
p.data.add_(-learning_rate, p.grad.data)
return output, loss.data[0]
current_loss = 0
n_iters = 500
for iter in range(1, n_iters + 1):
inp = Variable(torch.randn(100,1,10) + 5)
output, loss = train(out_target, inp)
current_loss += loss
if iter % 1 == 0:
print "weights: ",rnn.i2h.weight
print "LOSS: ", loss
print output
As it shows, the loss stays above 6 and never goes down. Notice also that I am biasing all the random inputs normal distributions by 5, so they are mostly positive numbers, so there should exist a weight distribution that approaches the goal output
What am I doing wrong in this example that is failing to output to approach the goal?
Your fixed output is:
torch.FloatTensor([[0.0, 1.0, 0.0]])
But you are using the following as the final layer in your RNN:
self.softmax = nn.LogSoftmax(dim=1)
Does LogSoftmax returns value in [0, 1]? Althouhgh, you can use the Softmax but I would recommend you to use the sign function and transform -1 to 0.
I'm having a difficult time visualizing what this Tensorflow class creates. I want to implement a LSTM RNN that handles 3D data.
class Grid3LSTMCell(GridRNNCell):
"""3D BasicLSTM cell
This creates a 2D cell which receives input and gives output in the first dimension.
The first dimension can optionally be non-recurrent if `non_recurrent_fn` is specified.
The second and third dimensions are LSTM.
"""
def __init__(self, num_units, tied=False, non_recurrent_fn=None,
use_peepholes=False, forget_bias=1.0):
super(Grid3LSTMCell, self).__init__(num_units=num_units, num_dims=3,
input_dims=0, output_dims=0, priority_dims=0, tied=tied,
non_recurrent_dims=None if non_recurrent_fn is None else 0,
cell_fn=lambda n, i: rnn_cell.LSTMCell(
num_units=n, input_size=i, forget_bias=forget_bias,
use_peepholes=use_peepholes),
non_recurrent_fn=non_recurrent_fn)
The class is found in `from tensorflow.contrib.grid_rnn.python.ops import grid_rnn_cell`.
This is difficult to explain, so I've provided a drawing. Here is what I want it to do...
However the comment sounds like it isn't doing this. The comment makes it sound like the RNN is still a flat RNN, where the first dimension is outputting to, what is commonly called, the outputs variable (see below). The second dimension is outputting to the next step in the RNN, and the third dimension is outputting to the next hidden layer.
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
If this is the case, what is the point in having the first and second dimensions? Aren't they essentially the same thing? The BasicLSTMCell sends the output to the next step into outputs -- in other words they are one in the same.
Clarity?
For reference, here is my example code...
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
from tensorflow.contrib.grid_rnn.python.ops import grid_rnn_cell
import numpy as np
#define parameters
learning_rate = 0.01
batch_size = 2
n_input_x = 10
n_input_y = 10
n_input_z = 10
n_hidden = 128
n_classes = 2
n_output = n_input_x * n_classes
x = tf.placeholder("float", [n_input_x, n_input_y, n_input_z])
y = tf.placeholder("float", [n_input_x, n_input_y, n_input_z, n_classes])
weights = {}
biases = {}
for i in xrange(n_input_y * n_input_z):
weights[i] = tf.Variable(tf.random_normal([n_hidden, n_output]))
biases[i] = tf.Variable(tf.random_normal([n_output]))
#generate random data
input_data = np.random.rand(n_input_x, n_input_y, n_input_z)
ground_truth = np.random.rand(n_input_x, n_input_y, n_input_z, n_classes)
#build GridLSTM
def GridLSTM_network(x):
x = tf.reshape(x, [-1,n_input_x])
x = tf.split(0, n_input_y * n_input_z, x)
lstm_cell = grid_rnn_cell.Grid3LSTMCell(n_hidden)
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
output = []
for i in xrange(n_input_y * n_input_z):
output.append(tf.matmul(outputs[i], weights[i]) + biases[i])
return output
#initialize network, cost, optimizer and all variables
pred = GridLSTM_network(x)
# import pdb
# pdb.set_trace()
pred = tf.pack(pred)
pred = tf.transpose(pred,[1,0,2])
pred= tf.reshape(pred, [-1, n_input_x, n_input_y, n_input_z, n_classes])
temp_pred = tf.reshape(pred, [-1,n_classes])
temp_y = tf.reshape(y,[-1, n_classes])
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(temp_pred, temp_y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(0,tf.cast(tf.sub(tf.nn.sigmoid(temp_pred),temp_y), tf.int32))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 0
while 1:
print step
step = step + 1
# pdb.set_trace
sess.run(optimizer, feed_dict={x: input_data, y: ground_truth})