I am trying to predict features using a pretrained model.
I am getting the output as following, however, how can I use torch.max() to get the classes of interest. Code whhich I tried:
... loading model
input = transformation_sequence(sample).unsqueeze(0)
outputs = model(input)
_, predicted = torch.max(outputs,1) #this line returns error
#print of `outputs` variable
[tensor([[ 3.0654, -3.0650]]), tensor([[ 1.5634, -1.5672]]), tensor([[ 1.2867, -1.2888]]), tensor([[ 1.2974, -1.2928]]), tensor([[ 6.4537, -6.4487]]), tensor([[ 2.4851, -2.4710]]), tensor([[ 0.9855, -0.9809]]), tensor([[ 0.3995, -0.4033]]), tensor([[ 0.6301, -0.6276]]), tensor([[ 5.7082, -5.6931]]), tensor([[ 1.9354, -1.9365]]), tensor([[ 0.6091, -0.6074]]), tensor([[ 5.4509, -5.4417]]), tensor([[ 3.7231, -3.7115]]), tensor([[ 4.4494, -4.4361]]), tensor([[ 0.8867, -0.8902]]), tensor([[ 2.7410, -2.7402]]), tensor([[ 5.4919, -5.4909]]), tensor([[ 2.2687, -2.2744]]), tensor([[-0.9695, 0.9723]]), tensor([[ 1.5100, -1.5114]]), tensor([[-2.7077, 2.7140]]), tensor([[ 4.4661, -4.4734]]), tensor([[ 0.4846, -0.4821]]), tensor([[-2.9743, 2.9643]]), tensor([[ 1.3900, -1.3874]]), tensor([[ 7.6764, -7.6742]]), tensor([[ 0.5173, -0.5118]]), tensor([[ 1.3513, -1.3503]]), tensor([[ 2.5381, -2.5356]]), tensor([[ 4.9850, -5.0074]]), tensor([[-2.8397, 2.8484]]), tensor([[ 3.1010, -3.1137]]), tensor([[-0.2374, 0.2406]]), tensor([[ 0.5338, -0.5358]]), tensor([[ 3.4912, -3.4979]]), tensor([[ 1.1957, -1.1876]]), tensor([[ 1.1189, -1.1163]]), tensor([[ 3.6400, -3.6365]]), tensor([[-1.3123, 1.3132]])]
#list of error:
_, predicted = torch.max(outputs,1)
TypeError: max() received an invalid combination of arguments - got (list, int), but expected one of:
* (Tensor input)
* (Tensor input, Tensor other, Tensor out)
* (Tensor input, int dim, bool keepdim, tuple of Tensors out)
Your model returns a list of tensor, not a tensor. It can be fixed with torch.cat:
torch.max(torch.cat(outputs),1)
>>> torch.return_types.max(
values=tensor([3.0654, 1.5634, 1.2867]),
indices=tensor([0, 0, 0]))
Related
def data():
xTrain, xVal, yTrain, yVal = train_test_split(X_data, Y_Labels, test_size=0.1, random_state=42) # checked
return xTrain, yTrain, xVal, yVal
# Define the model (CNN) for single source localization
input_shape = xTrain.shape[1:]
kern_size1 = 3
kern_size2 = 2
model = Sequential() # kernel_regularizer=l1(0.00001),
model.add(Conv2D(256, kernel_size=(kern_size1,kern_size1), activation=None, input_shape=input_shape, name="Conv2D_1",padding="valid", strides=(2,2)))
model.add(BatchNormalization(trainable=True))
model.add(ReLU())
model.add(Conv2D(256, kernel_size=(kern_size2,kern_size2), activation=None,name="Conv2D_2", padding="valid"))
model.add(BatchNormalization(trainable=True))
model.add(ReLU())
model.add(Conv2D(256, kernel_size=(kern_size2,kern_size2), activation=None,name="Conv2D_3", padding="valid"))
model.add(BatchNormalization(trainable=True))
model.add(ReLU())
model.add(Flatten())
model.add(Dense(4096, activation="relu",name="Dense_Layer1"))
model.add(Dropout(0.2,name="Dropout1"))
model.add(Dense(2048, activation="relu",name="Dense_Layer2"))
model.add(Dropout(0.2,name="Dropout2"))
model.add(Dense(1024, activation="relu",name="Dense_Layer3"))
model.add(Dropout(0.2,name="Dropout3"))
model.add(Dense(DNN_outp, activation="sigmoid", kernel_initializer=glorot_normal(seed=None),name="Classif_Layer"))
model.summary()
This is the code. I define the xTrain but it gives me an error at line ----> [2] input_shape = xTrain.shape[1:] saying NameError: name 'xTrain' is not defined. Why does this happen?
You have to either remove the function data() and only keep xTrain, xVal, yTrain, yVal = train_test_split(X_data, Y_Labels, test_size=0.1, random_state=42) or use input_shape = data()[0].shape[1:].
This happens because your data() function is returning a list of lists instead of separate variables.
Here is the model im using
X_test, X_valid, y_test, y_valid = train_test_split(testReview,testLabel, test_size = 0.4,shuffle=True, random_state = 42,stratify=testLabel)
print(len(X_test), len(X_valid))
print((X_valid.shape), X.shape)
def read_glove_vector(glove_vec):
with open(glove_vec, 'r', encoding='UTF-8') as f:
words = set()
word_to_vec_map = {}
for line in f:
w_line = line.split()
curr_word = w_line[0]
word_to_vec_map[curr_word] = np.array(w_line[1:], dtype='float32')
return word_to_vec_map
word_to_vec_map = read_glove_vector('/content/drive/MyDrive/Colab Notebooks/AmazonCustomerReview/glove.6B.100d.txt')
maxLen = 123
vocab_len = len(word_to_index)
embed_vector_len = word_to_vec_map['moon'].shape[0]
emb_matrix = np.zeros((vocab_len, embed_vector_len))
for word, index in word_to_index.items():
embedding_vector = word_to_vec_map.get(word)
if embedding_vector is not None:
emb_matrix[index] = embedding_vector
print('Build model...')
model = Sequential()
model.add(Embedding(vocab_len, embed_vector_len, input_length=maxLen))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# Load GloVe embeding
model.layers[0].set_weights([emb_matrix])
model.layers[0].trainable = False
print(model.summary())
# Training and evaluation
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(X, y, epochs=2, batch_size=32,
validation_data=(X_valid,y_valid))
print("Result: ", model.metrics_names, model.evaluate(X_test, y_test))
After this the Error i encounter is this
Epoch 1/2
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-62-4152e138ca8e> in <module>()
----> 1 model.fit(X, y, batch_size=32, epochs=2, verbose=2)
2 # model.fit(X, y, epochs=2, batch_size=32)
3 # # validation_data=(X_valid,y_valid))
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
53 ctx.ensure_initialized()
54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57 if name is not None:
InvalidArgumentError: Graph execution error:
The issue is , i did run the full code 1 hour ago, it was completed perfectly.
After i just tried to fine tune to the Model if i get better result or not, but just unexpected stuck on this , if any mistake have here, it wont run fully the 1st time , but got this error in 3rd time running.
What is the solution to this?
I'm trying to assign some custom weight to my PyTorch model but it doesn't work correctly.
class Mod(nn.Module):
def __init__(self):
super(Mod, self).__init__()
self.linear = nn.Sequential(
nn.Linear(1, 5)
)
def forward(self, x):
x = self.linear(x)
return x
mod = Mod()
mod.linear.weight = torch.tensor([1. ,2. ,3. ,4. ,5.], requires_grad=True)
mod.linear.bias = torch.nn.Parameter(torch.tensor(0., requires_grad=True))
print(mod.linear.weight)
>>> tensor([1., 2., 3., 4., 5.], requires_grad=True)
output = mod(torch.ones(1))
print(output)
>>> tensor([ 0.2657, 0.3220, -0.0726, -1.6987, 0.3945], grad_fn=<AddBackward0>)
The output is expected to be [1., 2., 3., 4., 5.] but it doesn't work as expected. What am I missing here?
You are not updating the weights in the right place. Your self.linear is not a nn.Linear layer, but rather a nn.Sequential container. Your nn.Linear is the first layer in the sequential. To access it you need to index self.linear:
with torch.no_grad():
mod.linear[0].weight.data = torch.tensor([1. ,2. ,3. ,4. ,5.], requires_grad=True)[:, None]
mod.linear[0].bias.data = torch.zeros((5, ), requires_grad=True) # bias is not a scalar here
I am training a neural network on video frames (converted to greyscale) to output a tensor with two values. The first iteration always evaluates an acceptable loss (mean squared error generally between 15-40), followed by an exponential rise in the second pass, and then infinite.
The net is quite vanilla:
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(100 * 291, 29100),
nn.ReLU(),
nn.Linear(29100, 29100),
nn.ReLU(),
nn.Linear(29100, 2),
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
As is the training loop:
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
model.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to("cpu"), y.to("cpu")
# Compute prediction error
pred = model(X)
loss = loss_fn(pred, y)
# Backpropogation
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
Example of loss function growth:
ITERATION 1
prediction: tensor([[-1.2239, -8.2337]], grad_fn=<AddmmBackward>)
actual: tensor([[0.0321, 0.0325]])
loss: tensor(34.9545, grad_fn=<MseLossBackward>)
ITERATION 2
prediction: tensor([[ 314636.5625, 2063098.2500]], grad_fn=<AddmmBackward>)
actual: tensor([[0.0330, 0.0323]])
loss: tensor(2.1777e+12, grad_fn=<MseLossBackward>)
ITERATION 3
prediction: tensor([[-8.0924e+22, -5.3062e+23]], grad_fn=<AddmmBackward>)
actual: tensor([[0.0334, 0.0317]])
loss: tensor(inf, grad_fn=<MseLossBackward>)
Here is an example of the video data: it's a 291x100 greyscale image and there are 1100 of them in the training dataset:
dataset.video_frames.size()
> torch.Size([1100, 100, 291])
dataset.video_frames[0]
> tensor([[21., 29., 28., ..., 33., 27., 26.],
[22., 27., 25., ..., 25., 25., 30.],
[23., 26., 26., ..., 24., 24., 28.],
...,
[24., 33., 31., ..., 41., 40., 42.],
[26., 34., 31., ..., 26., 20., 22.],
[25., 32., 32., ..., 21., 20., 18.]])
And the labeled training data:
dataset.y.size()
> torch.Size([1100, 2])
dataset.y[0]
> tensor([0.0335, 0.0315], dtype=torch.float)
I've fiddled the learning rate, number of hidden layers, and nothing seems to keep the loss from going to infinite.
Properly scaling the inputs is crucial for proper training.
Weights are initialized based on some assumptions on the way inputs are scaled.
See this part of a lecture on weight initialization and see how critical it is for proper convergence.
More details on the mathematical analysis of the influence of weight initialization can be found in Sec. 2 of this paper:
Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification (ICCV 2015).
I started a very simple RNN project to solidify my knowledge in TF, basically a simple sequence generator using LSTMs and TF. The project is just a many-to-one sequence generation, the input is a 4 integer window and the output has only one float for each window. The minimum number of the input is 1 and the maximum is 61, so I can predict from 61 and forward. I just used a batch of all inputs, which has shape [58,4,1] and the output with shape [58,1]. For better visualization, the inputs and outputs have been written below.
Inputs Labels
[[[ 1],[ 2],[ 3],[ 4]], -------> [[0.0493],
[[ 2],[ 3],[ 4],[ 5]], -------> [0.0634],
[[ 3],[ 4],[ 5],[ 6]], -------> [0.0773],
[[ 4],[ 5],[ 6],[ 7]], -------> [0.0909],
.. .. .. .. -------> ... ,
[[55],[56],[57],[58]], -------> [0.5503],
[[56],[57],[58],[59]], -------> [0.5567],
[[57],[58],[59],[60]], -------> [0.5630],
[[58],[59],[60],[61]]] -------> [0.5693]]
The training part went very well and I could achieve something around 0.991 accuracy with 500 epochs, but when I try to predict some values from 61 to 118, the output has a fixed step down for all predicted values but somehow has the right behavior.
Because the purpose of this project is just for learning the basics, I decided to use the simplest functions in TF, so the seq2seq facilities have been left off. The code for the RNN is written below
def build_lstm(cell_lengh, cell_depth, batch_size, keep_prob):
def lstm_row(cell_length, keep_prob):
cell_row = tf.contrib.rnn.BasicLSTMCell(cell_lengh)
cell_row = tf.contrib.rnn.DropoutWrapper(cell_row, keep_prob)
return cell_row
cell = tf.contrib.rnn.MultiRNNCell([lstm_row(cell_lengh, keep_prob) for _ in range(cell_depth)])
initial_state = cell.zero_state(batch_size, tf.float32)
return cell, initial_state
tf.reset_default_graph()
inputs = tf.placeholder(tf.float32, [None, feature_length, 1], name='inputs')
labels = tf.placeholder(tf.float32, [None, output_length], name='labels')
keep_prob = tf.placeholder(tf.float32, name='kpprob')
lstm_cell, initial_state = build_lstm(40, 2, batch_size=batch_size, keep_prob=keep_prob)
lstm_output, final_state = tf.nn.dynamic_rnn(lstm_cell, inputs, initial_state=initial_state)
lstm_outout_seq = lstm_output[:,-1,:]
dense_0 = tf.layers.dense(inputs=lstm_outout_seq, units=120, activation=tf.nn.relu)
dropout_0 = tf.layers.dropout(dense_0, rate=0.7)
with tf.variable_scope('sigmoid'):
W = tf.Variable(tf.truncated_normal((120, 1), stddev=0.1), name='weights')
b = tf.Variable(tf.zeros(1), name='bias')
logits = tf.matmul(dropout_0, W) + b
output = tf.nn.sigmoid(logits, name='output')
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=labels))
correct_predictions = tf.abs(output - labels)
total_correct = tf.ones_like(correct_predictions)
accuracy = tf.reduce_mean(total_correct - correct_predictions)
learning_rate = tf.placeholder(tf.float32, name='learning_rate')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
l_rate = 0.001
epochs = 500
kp_prob = 0.7
with tf.Session() as session:
session.run(tf.global_variables_initializer())
for e in range(epochs):
new_state = session.run([initial_state])
feeder = {
inputs: wnd_x,
labels: wnd_y_scl,
keep_prob: kp_prob,
learning_rate: l_rate,
initial_state: new_state
}
session_loss,
session_accuracy,
session_output, _,
last_state = session.run([loss, accuracy, output,
optimizer, final_state], feed_dict=feeder)
print('Epoch {0}/{1}:\t'.format(e, epochs),
'training loss {0}\t'.format(session_loss),
'accuracy {0}\t'.format(session_accuracy))
new_state = session.run([initial_state])
feeder = {
inputs: unseen_data_rsp,
keep_prob: 1.0,
initial_state: new_state
}
session_output = session.run([output], feed_dict=feeder)
As mentioned before, during the inference phase, the predictions have fixed step down but somehow have the right behavior, i.e. the derivate of the curves changes correctly for the time-steps.
During the training phase I have the following output:
Epoch 999/1000: training loss = 0.5913468599319458 | accuracy = 0.9909629225730896
Input Label Output
[[ 1],[ 2],[ 3],[ 4]] --> [0.0493] ... [0.0591]
[[ 2],[ 3],[ 4],[ 5]] --> [0.0634] ... [0.0802]
[[ 3],[ 4],[ 5],[ 6]] --> [0.0773] ... [0.0777]
[[ 4],[ 5],[ 6],[ 7]] --> [0.0909] ... [0.1035]
.. .. .. .. ... ... ...
[[55],[56],[57],[58]] --> [0.5503] ... [0.5609]
[[56],[57],[58],[59]] --> [0.5567] ... [0.5465]
[[57],[58],[59],[60]] --> [0.5630] ... [0.5543]
[[58],[59],[60],[61]] --> [0.5693] ... [0.5614]
And during inference phase I have the following output:
Input Prediction
[[ 58],[ 59],[ 60],[ 61]] --> [0.4408]
[[ 59],[ 60],[ 61],[ 62]] --> [0.4459]
[[ 60],[ 61],[ 62],[ 63]] --> [0.4510]
[[ 61],[ 62],[ 63],[ 64]] --> [0.4559]
... ... ... ... ... ...
[[112],[113],[114],[115]] --> [0.6089]
[[113],[114],[115],[116]] --> [0.6101]
[[114],[115],[116],[117]] --> [0.6113]
[[115],[116],[117],[118]] --> [0.6124]
As you can see, the first input of the inference is the same of the last input of the training. What I don't understand here is why the same input gave me 2 different outputs and why theses output has a fixed step down, around 0.11. Thank you guys for any help and sorry for the long text, I can make it shorter upon request.
During inference you are resetting the state. And so you're getting two different values on the same input because the state of the network is different in both cases.
To keep the state after a prediction you would need to do something like this:
#iterate for each prediction {
feeder = {
inputs: unseen_data_rsp,
keep_prob: 1.0,
initial_state: last_state
}
session_output, last_state = session.run([output,final_state], feed_dict=feeder)
}
Also to get exactly the training result with the first input of inference you would need to first present all the training examples to ensure that you start inference with the correct state. Another approach would be to save the state of the network which you then could reuse during prediction.