Attribute Error: `loss.backward()` returns None - machine-learning

I'm trying to implement the Learner object and its steps and facing an issue with the loss.backward() function as it raises and AttributeError: 'NoneType' object has no attribute 'data'
The entire process works when I follow the Chapter 04 MNIST Basics. However, implementing within a class raises this error. Could anybody guide me on why this occurs and ways to fix this?
Here's the code below:
class Basic_Optim:
def __init__(self, params, lr):
self.params = list(params)
self.lr = lr
def step(self):
for p in self.params:
p.data -= self.lr * p.grad.data
def zero(self):
for p in self.params:
p.grad = None
class Learner_self:
def __init__(self, train, valid, model, loss, metric, params, lr):
self.x = train
self.y = valid
self.model = model
self.loss = loss
self.metric = metric
self.opt_func = Basic_Optim(params, lr)
def fit(self, epochs):
for epoch in range(epochs):
self.train_data()
score = self.valid_data()
print(score, end = ' | ')
def train_data(self):
for x, y in self.x:
preds = self.model(x)
loss = self.loss(preds, y)
loss_b = loss.backward()
print(f'Loss: {loss:.4f}, Loss Backward: {loss_b}')
self.opt_func.step()
self.opt_func.zero()
def valid_data(self):
accuracy = [self.metric(xb, yb) for xb, yb in self.y]
return round(torch.stack(accuracy).mean().item(), 4)
learn = Learner_self(dl, valid_dl, simple_net, mnist_loss, metric=batch_accuracy,
params=linear_model.parameters(), lr = 1)
learn.fit(10)
OUTPUT from the print statement inside the train_data prints: Loss: 0.0516, Loss Backward: None and then raises the Attribute error shared above.
Please let me know if you want any more details. Every other function such as mnist_loss, batch_accuracy, simple_net are exactly the same from the book.
Thank you in advance.

It seems like your optimizer and your trainer do not work on the same model.
You have model=simple_net, while the parameters for the optimizer are those of a different model params=linear_model.parameters().
Try passing params=simple_net.parameters() -- that is, make sure the trainer's params are those of model.

Related

How to solve my problem of max_step parameter in pytorch?

I'm trying to train source code.
class mymodel(pl.LightningModule):
def __init__(self, config , learning_rate = 1e-4, max_steps = 100000//2):
super(mymodel, self).__init__()
self.config = config
self.save_hyperparameters()
self.training_losses = []
self.validation_losses = []
self.max_steps = max_steps
def configure_optimizers(self):
return torch.optim.AdamW(self.parameters(), lr = self.hparams['learning_rate'])
def forward(self, batch_dict):
return answer_vector
def calculate_metrics(self, prediction, labels):
batch_size = len(prediction)
ac_score = 0
for (pred, gt) in zip(prediction, labels):
ac_score+= calculate_acc_score(pred.detach().cpu(), gt.detach().cpu())
ac_score = ac_score/batch_size
return ac_score
def training_step(self, batch, batch_idx):
answer_vector = self.forward(batch)
loss = nn.CrossEntropyLoss()(answer_vector.reshape(-1,self.config['classes']), batch['answer'].reshape(-1))
_, preds = torch.max(answer_vector, dim = -1)
train_acc = self.calculate_metrics(preds, batch['answer'])
train_acc = torch.tensor(train_acc)
return loss
def validation_step(self, batch, batch_idx):
logits = self.forward(batch)
loss = nn.CrossEntropyLoss()(logits.reshape(-1,self.config['classes']), batch['answer'].reshape(-1))
_, preds = torch.max(logits, dim = -1)
## Validation Accuracy
val_acc = self.calculate_metrics(preds.cpu(), batch['answer'].cpu())
val_acc = torch.tensor(val_acc)
## Logging
self.log('val_ce_loss', loss, prog_bar = True)
self.log('val_acc', val_acc, prog_bar = True)
return {'val_loss': loss, 'val_acc': val_acc}
def optimizer_step(self, epoch_nb, batch_nb, optimizer, optimizer_i, opt_closure = None, on_tpu=False,
using_native_amp=False, using_lbfgs=False):
## Warmup for 1000 steps
if self.trainer.global_step < 1000:
lr_scale = min(1., float(self.trainer.global_step + 1) / 1000.)
for pg in optimizer.param_groups:
pg['lr'] = lr_scale * self.hparams.learning_rate
## Linear Decay
else:
for pg in optimizer.param_groups:
pg['lr'] = polynomial(self.hparams.learning_rate, self.trainer.global_step, max_iter = self.max_steps)
optimizer.step(opt_closure)
optimizer.zero_grad()
In 5'th epoch (maybe less or more) I encountered error that stop training. so I increase max_step . But when I increase max_step(max_step==100K) I have this problem with loss and acc that
loss>100 && acc==0 .I attach screen of this problem.
enter image description here
What changing I should do in source code to continue training model without this problem?
Updates:
I see. It looks like your optimizer_step is actually for "scheduler," where it messes with the AdamW learning rate. You should directly apply the scheduler to configure_optimizers function. See https://pytorch-lightning.readthedocs.io/en/stable/common/lightning_module.html?highlight=configure_optimizers#configure-optimizers
===
old answer:
By error, do you mean the line of val_ce_loss? If yes, that's not an error. It means the val_ce_loss of the current epoch is not within the top1 of historical epochs so the checkpoint won't be saved to the disk. Please refer to the argument of save_top_k in the checkpoint callback. https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.callbacks.ModelCheckpoint.html

Gradients are None in PyTorch for RL

I am new to PyTorch, and Reinforcement Learning, so I am practicing with the gym library, but I cannot get my model to train. I have narrowed it down to my grads being None, but I cannot figure out why. I suspect that it has something to do with the way I calculate reward/loss, but I cannot understand the problem.
I read that it might be because my parameters are detached from the computational graph, and I have tried everything to prevent that without any luck. I don't think I fully understand the computational graph... Code:
for episode in range(n_episodes):
done = False
obs, reward, done, info = env.reset()
rewards = torch.tensor(0, dtype = torch.float32 ,requires_grad = True)
while not done:
observation = torch.from_numpy(obs)
observation.requires_grad = True
y_pred = model(observation)
obs, reward, done, info = env.step(int(y_pred.clone().detach().numpy()[0]))
rewards.cat(torch.tensor(-reward, requires_grad = True))
optimizer.zero_grad()
loss = rewards.sum()
loss.retain_grad()
loss.backward()
optimizer.step()
My model looks like this:
class Network(nn.Module):
def __init__(self, input_dim, output_dim):
super(Network, self).__init__()
self.model = nn.Sequential(
nn.Linear(input_dim, 64),
nn.ReLU(),
nn.Linear(64, output_dim),
nn.ReLU(),
nn.Softmax(dim= 0))
def forward(self, x):
return self.model(x)
I printed list(model.parameters())[0].grad is None, to find that it is always True.
How can I fix this problem?

Reinforcement learning converges for mean loss but not for each training data

Here I show a dummy example that represents my actual problem.
My neural network (NN) receives one input and gives the probabilities for two output nodes. The code for the NN is:
class Net(torch.nn.Module):
def __init__(self, N, M):
super(Net, self).__init__()
self.fc1 = torch.nn.Linear(N, 4)
self.fc2 = torch.nn.Linear(4, 4)
self.fc3 = torch.nn.Linear(4, M)
def forward(self, x):
x = torch.sigmoid(self.fc1(x))
x = torch.sigmoid(self.fc2(x))
x = torch.softmax(self.fc3(x),0)
return x
The ABM class is our model that iteratiely sends calls to Net::forward, and based on the probabilities, chooses an action if it's the first index, increments agent_count. Inputs xx are stored in states which will be used to backward.
class ABM:
def __init__(self,_nn,_t_data):
self.nn = nn
self.iteration_n = _t_data.iteration_n
self.target_value = _t_data.target_value
def run(self):
for jj in range(self.iteration_n):
xx = self.generate_input();
self.states.append(xx); # store inputs
ys = nn.forward(xx);
action = self.draw(ys);
if (action == 0):
self.agent_count+=1
loss = self.calculateReward();
return loss;
def generate_input(self):
return torch.ones((1),requires_grad = True)
--some other attributes--
When the run is over, error is calculated as error = (target_value - agent_count)/target_value which is a value between -1 and 1.
In order to train the model, the error is applied to the probability of the first output node of NN. This is to correct the NN in a way that predicts the right probability for the first output. The code is:
class ABM:
def calculateReward(self):
error = (self.target_value - self.agent_count)/self.target_value
reward = torch.tensor((-error), requires_grad = True)
#since all states are same, we just choose the first one
state = self.states[0]
ys = nn.forward(state)
actionProb = ys[0]
action_reward = actionProb * reward
return action_reward;
--some other members--
Two parameters of iteration_n and target_value used in the ABM are defined in the training data class as:
class Train:
def __init__(self,tt , tv):
self.iteration_n = tt
self.target_value = tv
target_value =0;
iteration_n=0;
The harmony between different parts of the code is done as:
#### start optimization ####
nn = Net(1,2)
optimizer = optim.Adam(nn.parameters(), lr=0.01)
# create training data values
training_items = []
training_items.append(Train(1000,800))
training_items.append(Train(500,200))
error_record = []
for ii in range(100):
print("############ start iteration #%d ################"%ii)
for t_item in training_items:
model = ABM(nn,t_item)
loss = model.run()
optimizer.zero_grad()
loss.backward()
optimizer.step()
error_record.append(loss.item())
Now let's present the problem.
If I only define one training item as Train(/*iteration number*/1000,/*target value*/800));, the NN is optimized as expected:
however, by defining two training items, although the average error declines to zero, the error on each training data stays high:
Is there any idea how to solve this issue?
I have omitted some parts of the code here to make it more readable. The full running code is available on minimal ABM

Keras KerasClassifier gridsearch TypeError: can't pickle _thread.lock objects

The following code is throwing an error:
TypeError: can't pickle _thread.lock objects
I can see that it likely has to do with passing the previous method in as a function in def fit(self, c_m). But I think this is correct via the documentations: https://keras.io/scikit-learn-api/
I may be making a rookie mistake if anyone sees the error in my code I would appreciate help.
np.random.seed(7)
y_dic = []
class NN:
def __init__(self):
self.X = None
self.y = None
self.model = None
def clean_data(self):
seed = 7
np.random.seed(seed)
dataset = pd.read_csv('/Users/isaac/pca_rfe_tsne_comparisons/Vital_intrusions.csv', delimiter=',', skiprows=0)
dataset = dataset.iloc[:,1:6]
self.X = dataset.iloc[:, 1:5]
Y = dataset.iloc[:, 0]
for y in Y:
if y >= 8:
y_dic.append(1)
else:
y_dic.append(0)
self.y = y_dic
self.X = np.asmatrix(stats.zscore(self.X, axis=0, ddof=1))
self.y = to_categorical(self.y)
def create_model(self):
self.model = Sequential()
self.model.add(Dense(4, input_dim=4, activation='relu'))
self.model.add(Dense(4, activation='relu'))
self.model.add(Dense(2, activation='sigmoid'))
self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
pass
def fit(self, c_m):
model = KerasClassifier(build_fn=c_m, verbose=0)
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
pdb.set_trace()
grid_result = grid.fit(self.X, self.y)
return (grid_result)
def results(self, grid_results):
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
def main():
nn = NN()
nn.clean_data()
nn.create_model()
grid_results = nn.fit(nn.create_model)
nn.results(grid_results)
if __name__ == "__main__":
main()
Ok, a follow up to this. Thanks for your comments #MarcinMożejko. You were right about this. There were more errors I should mention. In def fit(), I wrote model = KerasClassifier, not self.model=Keras Classifier. I wanted to mention that incase anyone is looking at the code. I'm now getting a new error on the same line:
AttributeError: 'NoneType' object has no attribute 'loss'.
I can track this back to scikit_learn.py:
loss_name = self.model.loss
if hasattr(loss_name, '__name__'):
loss_name = loss_name.__name__
if loss_name == 'categorical_crossentropy' and len(y.shape) != 2:
y = to_categorical(y)
I'm not sure how to solve this as I set the loss term in self.model.compile. I tried changing it to binary_crossentropy but that had no effect. any further thoughts?
The problem lies in this line of code:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
Unfortunately - for now, keras is not supporting applying pickle to your model which is needed for sklearn to apply multiprocessing (here you may read the discussion on this). In order to make this code work you should set:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)

When introducing the Optimizer variables under variable_scope get recreated twice, why?

As the title suggests, when looking at the graph inside tensorboard - the variables I've created inside a variable_scope get recreated twice. Why is that? what am I doing wrong?
def weights_biases(weights_shape, biases_shape):
weights = tf.get_variable("weights", weights_shape, initializer = tf.random_normal_initializer())
biases = tf.get_variable("biases", biases_shape, initializer = tf.random_normal_initializer())
return weights, biases
def hl_relu(input_tensor, weights_shape, biases_shape):
weights, biases = weights_biases(weights_shape, biases_shape)
regression = tf.matmul(input_tensor, weights) + biases
return tf.nn.relu(regression)
def neural_network_model(x):
# W = tf.Variable(
# tf.truncated_normal([vocab_size, embedding_size], stddev=1 / math.sqrt(vocab_size)),
# name="W")
# embedded = tf.nn.embedding_lookup(W, x)
# embedding_aggregated = tf.reduce_sum(embedded, [1])
with tf.variable_scope("hidden_layer_1"):
relu1 = hl_relu(x, [max_words_len, n_nodes_hl1], [n_nodes_hl1])
with tf.variable_scope("hidden_layer_2"):
relu2 = hl_relu(relu1, [n_nodes_hl1,n_nodes_hl2], [n_nodes_hl2])
with tf.variable_scope("hidden_layer_3"):
relu3 = hl_relu(relu2, [n_nodes_hl2,n_nodes_hl3], [n_nodes_hl3])
with tf.variable_scope("output_layer"):
weights, biases = weights_biases([n_nodes_hl3, n_classes], [n_classes])
output_regression = tf.matmul(relu3, weights) + biases
return output_regression
def train_neural_network(test_x, test_y):
with tf.device("/cpu:0"):
custom_runner = CustomRunner()
x_batch, y_batch = custom_runner.get_inputs()
with tf.variable_scope("test"):
testX = tf.constant(test_x, name="testX")
testX = tf.cast(testX, tf.float32)
testY = tf.constant(test_y, name="testY")
testY = tf.cast(testY, tf.float32)
with tf.variable_scope("nn") as scope:
global_step = tf.Variable(0, trainable=False, name='global_step')
logits = neural_network_model(x_batch)
scope.reuse_variables()
test_logits = neural_network_model(testX)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, y_batch), name="cost")
tf.scalar_summary('cost', cost)
optimizer = tf.train.AdagradOptimizer(0.01).minimize(cost, global_step = global_step)
Produces the following abnormality:
You can see that the 'nn' scope was created twice with my hidden layers which are not connected to any inputs, but affect the Adagrad optimizer by providing their initialized random weight constantly. I suspect this is also slowing down training.
What have i done wrong in my code??
I believe this is what is causing them to be created twice.
logits = neural_network_model(x_batch)
scope.reuse_variables()
test_logits = neural_network_model(testX)
Can you change that to this:
logits = neural_network_model(x_batch)
without the test_logits and see if you still get the same issue?
Are you sure the variables are re-created? I suspect what you see is just Adagrad variables created by the optimizer so that is can save what it needs for its computation. Can you try the simplest GradientDescentOptimizer and see if it still happens?

Resources