I am making a chatbot using pytorch. I developed this chatbot on a linux system which was working flawlessly. But when i am running the same model in windows 10. There is an error :-
Traceback (most recent call last):
return self.wsgi_app(environ, start_response)
return super(_SocketIOMiddleware, self).call(environ,
return self.wsgi_app(environ, start_response)
response = self.handle_exception(e)
return cors_after_request(app.make_response(f(*args, **kwargs)))
reraise(exc_type, exc_value, tb)
raise value
response = self.full_dispatch_request()
rv = self.handle_user_exception(e)
return cors_after_request(app.make_response(f(*args, **kwargs)))
reraise(exc_type, exc_value, tb)
raise value
rv = self.dispatch_request()
return self.view_functionsrule.endpoint > load(candidateSkillset.get(name))
File
for (words, labels_net) in train_loader:
return _MultiProcessingDataLoaderIter(self)
w.start()
self._popen = self._Popen(self)
return _default_context.get_context().Process._Popen(process_obj)
return Popen(process_obj)
reduction.dump(process_obj, to_child)
ForkingPickler(file, protocol).dump(obj)
AttributeError: Can't pickle local object 'load..ChatDataset'
exitcode = _main(fd, parent_sentinel)
self = reduction.pickle.load(from_parent)
EOFError: Ran out of input
I am calling the function with the dataset i want it to train on... Here is my code :-
def load(inp):
global words,labels,doc_x,doc_y,questionsP1,questionsP2,questionsP3,questionsP4,model,questionTag, all_words, tags, xy, questions_list
all_words=[]
tags=[]
xy=[]
questions_list = []
words=[]
labels=[]
docs_x=[]
docs_y=[]
questionsP1=[]
questionsP2=[]
questionsP3=[]
questionsP4=[]
questionTag={}
print( dataFileNames.get(inp) )
with open(dataFileNames.get(inp)) as file:
data = json.load(file)
for intent in data["intents"]:
for proficiency in intent["proficiency"]:
for questions in proficiency["questions"]:
questions_list.append(questions["question"])
for responses in questions["responses"]:
wrds = tokenize(responses)
all_words.extend(wrds)
xy.append((wrds, questions["question"]))
if questions["tag"] in tags:
print(questions["tag"])
if questions["tag"] not in tags:
tags.append(questions["tag"])
if proficiency["level"] == "P1":
questionsP1.append(questions["question"])
questionTag[questions["question"]]=questions["tag"]
if proficiency["level"] == "P2":
questionsP2.append(questions["question"])
questionTag[questions["question"]]=questions["tag"]
if proficiency["level"] == "P3":
questionsP3.append(questions["question"])
questionTag[questions["question"]]=questions["tag"]
if proficiency["level"] == "P4":
questionsP4.append(questions["question"])
questionTag[questions["question"]]=questions["tag"]
#PyTorch Implementation
ignore_words = ['?', '!', '.', ',']
all_words = [stem(x) for x in all_words if x not in ignore_words]
all_words = sorted(set(all_words))
tags = sorted(set(tags))
X_train = []
y_train = []
for tokenized_response, question in xy:
bag = bag_of_words(tokenized_response, all_words)
X_train.append(bag)
label = questions_list.index( question)
y_train.append(label)
X_train = np.array(X_train)
y_train = np.array(y_train)
class ChatDataset(Dataset):
def __init__(self):
self.n_samples = len(X_train)
self.x_data = X_train
self.y_data = y_train
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.n_samples
#HyperParameters
batch_size = 8
hidden_size = 8
output_size = len(tags)
input_size = len(X_train[0])
learning_rate = 0.001
num_epochs = 1000
dataset = ChatDataset()
train_loader = DataLoader(dataset = dataset, batch_size=batch_size, shuffle = True, num_workers = 2)
device = 'cpu'
# print(device)
global model_main
model_main = NeuralNet(input_size, hidden_size, output_size).to(device)
try:
print("Inside Try")
data = torch.load(modelNames.get(inp))
input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data["all_words"]
questions_list = data["questions_list"]
model_state = data["model_state"]
model_main = NeuralNet(input_size, hidden_size, output_size).to(device)
model_main.load_state_dict(model_state)
model_main.eval()
except:
#loss and optimizer
print("Inside Except")
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_main.parameters(), lr = learning_rate)
for epoch in range(num_epochs):
for (words, labels_net) in train_loader:
words = words.to(device)
labels_net = labels_net.to(device)
#Forward
outputs = model_main(words)
loss = criterion(outputs, labels_net)
# print(loss)
#backward and optimizer step
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch + 1) % 100 == 0:
print(f'epoch {epoch + 1}/ {num_epochs}, loss={loss.item(): .4f}')
print(f'final loss, loss={loss.item(): .4f}')
############### Accuracy Calculation ##############
correct = 0
total = 0
# print(device)
with torch.no_grad():
for words, labels_net in train_loader:
outputs = model_main(words)
_, predicted = torch.max(outputs.data, 1)
total += labels_net.size(0)
correct += (predicted == labels_net).sum().item()
print('Accuracy : %d %%' % ( 100 * correct / total ))
#Saving the model
data = {
"model_state": model_main.state_dict(),
"input_size": input_size,
"output_size": output_size,
"hidden_size": hidden_size,
"all_words": all_words,
"questions_list": questions_list
}
FILE = modelNames.get(inp)
torch.save(data, FILE)
Related
I have a GNN that works for regression however I have changed the nature of the task from regression to classification. I thought it would be as simple as converting the loss functions and output size but I am getting multiple errors. This is my code:
Class GNN(torch.nn.Module):
def __init__(self, gnn, n_layer, tfeature_len, dim, mlp_hidden_unit, feature_mode):
super(GNN, self).__init__()
self.gnn = gnn
self.n_layer = n_layer
self.tfeature_len = tfeature_len
self.dim = dim
self.gnn_layers = ModuleList([])
if gnn in ['gcn', 'gat', 'sage', 'tag']:
for i in range(n_layer):
if gnn == 'gcn':
self.gnn_layers.append(GraphConv(in_feats=tfeature_len if i == 0 else dim,
out_feats=dim,
activation=None if i == n_layer - 1 else torch.relu))
elif gnn == 'gat':
num_heads = 16 # make sure that dim is dividable by num_heads
self.gnn_layers.append(GATConv(in_feats=tfeature_len if i == 0 else dim,
out_feats=dim // num_heads,
activation=None if i == n_layer - 1 else torch.relu,
num_heads=num_heads))
elif gnn == 'sage':
agg = 'pool'
self.gnn_layers.append(SAGEConv(in_feats=tfeature_len if i == 0 else dim,
out_feats=dim,
activation=None if i == n_layer - 1 else torch.relu,
aggregator_type=agg))
elif gnn == 'tag':
hops = 2
self.gnn_layers.append(TAGConv(in_feats=tfeature_len if i == 0 else dim,
out_feats=dim,
activation=None if i == n_layer - 1 else torch.relu,
k=hops))
elif gnn == 'sgc':
self.gnn_layers.append(SGConv(in_feats=tfeature_len, out_feats=dim, k=n_layer))
else:
raise ValueError('unknown GNN model')
self.factor = None
self.pooling_layer = SumPooling()
self.mlp_hidden_unit = mlp_hidden_unit
self.feature_mode = feature_mode
if self.feature_mode == 'concat':
self.mlp_hidden_layer = torch.nn.Linear(2 * self.dim, self.mlp_hidden_unit)
elif self.feature_mode == 'subtract':
self.mlp_hidden_layer = torch.nn.Linear(self.dim, self.mlp_hidden_unit)
else:
raise ValueError('unknown feature mode')
self.mlp_output_layer = torch.nn.Linear(self.mlp_hidden_unit, 2)
def forward(self, graph1, graph2):
graph1_embedding = self.calculate_embedding(graph1)
graph2_embedding = self.calculate_embedding(graph2)
if self.feature_mode == 'concat':
hidden = relu(self.mlp_hidden_layer(torch.concat([graph1_embedding, graph2_embedding], dim=-1)))
elif self.feature_mode == 'subtract':
hidden = relu(self.mlp_hidden_layer(graph1_embedding - graph2_embedding))
else:
raise ValueError('unknown feature mode')
output = self.mlp_output_layer(hidden)
return output
def calculate_embedding(self, graph):
feature = graph.ndata['feature']
h = one_hot(feature, num_classes=self.tfeature_len)
h = torch.sum(h, dim=1, dtype=torch.float)
for layer in self.gnn_layers:
h = layer(graph, h)
if self.gnn == 'gat':
h = torch.reshape(h, [h.size()[0], -1])
if self.factor is None:
self.factor = math.sqrt(self.dim) / float(torch.mean(torch.linalg.norm(h, dim=1)))
h *= self.factor
graph_embedding = self.pooling_layer(graph, h)
return graph_embedding
def train(data,model,optimizer):
train_loader, val_loader, test_loader, tfeature_len = data
loss_fn = torch.nn.CrossEntropyLoss()
epoch = 23
model = model.to(device)
print('start training\n')
#evaluate(model, 'train', train_loader)
evaluate(model, 'val', val_loader)
evaluate(model, 'test', test_loader)
epoch_losses = []
for i in range(epoch):
print('epoch %d:' % i)
epoch_loss = 0
model.train()
for graph1, graph2, target in train_loader:
pred = torch.squeeze(model(graph1, graph2))
loss = loss_fn(pred, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss += loss.detach().item()
epoch_loss /= (iter + 1)
print('Epoch {}, loss {:.4f}'.format(epoch, epoch_loss))
epoch_losses.append(epoch_loss)
#evaluate(model, 'train', train_loader)
evaluate(model, 'val', val_loader)
evaluate(model, 'test', test_loader)
print()
def evaluate(model, mode, data):
pred_list = []
target_list = []
model.eval()
with torch.no_grad():
for graph1, graph2, target in data:
outputs = torch.softmax(model(graph1, graph2), 1)
_, predicted = torch.max(outputs.data, 1)
pred_list.append(predicted)
target_list.append(target)
#torch.sum(preds == targets).detach().cpu().numpy().
pred_list = torch.concat(pred_list)
target_list = torch.concat(target_list)
#print('%s Acc: %.4f' % (mode (sklearn.metrics.accuracy_score(target_list, pred_list, normalize=False)) / len(pred_list) * 10
The accuracy is commented out at the moment because that too gave me an error. My first error however was the following:
start training
epoch 0:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-84-9de6c6dbd2ee> in <module>
----> 1 train(data,model,optimizer)
3 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
3012 if size_average is not None or reduce is not None:
3013 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 3014 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
3015
3016
RuntimeError: "nll_loss_forward_reduce_cuda_kernel_2d_index" not implemented for 'Float'
My targets are as follows:
targets = training_data['Class'].tolist()
targets = torch.Tensor(targets)
targets = targets.to(device)
And is just a list of 1s and 0s.
I call my model as follows:
model = GNN('sage', 3, tfeature_len, 2048, 100, 'subtract')
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
How I can fix this?
I have made a chatbot using pytorch and would like to display accuracy on every epoch. I am not quite understanding how to do that. I can display loss but cant figure out how to display my accuracy
Here is my code :-
from nltk_utils import tokenize, stem, bag_of_words
import json
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from model import NeuralNet
from torch.autograd import Variable
all_words=[]
tags=[]
xy=[]
questionsP1=[]
questionsP2=[]
questionsP3=[]
questionsP4=[]
questionTag={}
with open('new.json', encoding="utf8") as file:
data = json.load(file)
for intent in data["intents"]:
for proficiency in intent["proficiency"]:
for questions in proficiency["questions"]:
for responses in questions["responses"]:
wrds = tokenize(responses)
all_words.extend(wrds)
xy.append((wrds, questions["tag"]))
if questions["tag"] in tags:
print(questions["tag"])
if questions["tag"] not in tags:
tags.append(questions["tag"])
if proficiency["level"] == "P1":
questionsP1.append(questions["question"])
questionTag[questions["question"]]=questions["tag"]
if proficiency["level"] == "P2":
questionsP2.append(questions["question"])
questionTag[questions["question"]]=questions["tag"]
if proficiency["level"] == "P3":
questionsP3.append(questions["question"])
questionTag[questions["question"]]=questions["tag"]
if proficiency["level"] == "P4":
questionsP4.append(questions["question"])
questionTag[questions["question"]]=questions["tag"]
ignore_words = ['?', '!', '.', ',']
all_words = [stem(x) for x in all_words if x not in ignore_words]
all_words = sorted(set(all_words))
tags = sorted(set(tags))
X_train = []
y_train = []
for tokenized_response, tag in xy:
bag = bag_of_words(tokenized_response, all_words)
print(bag)
X_train.append( bag )
label = tags.index( tag )
y_train.append( label )
print(y_train)
X_train = np.array( X_train )
y_train = np.array( y_train )
class ChatDataset(Dataset):
def __init__(self):
self.n_samples = len(X_train)
self.x_data = X_train
self.y_data = y_train
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.n_samples
#HyperParameters
batch_size = 8
hidden_size = 8
output_size = len(tags)
input_size = len(X_train[0])
learning_rate = 0.001
num_epochs = 994
dataset = ChatDataset()
train_loader = DataLoader(dataset = dataset, batch_size=batch_size, shuffle = True, num_workers = 2)
device = 'cpu'
model = NeuralNet(input_size, hidden_size, output_size).to(device)
#loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
for epoch in range( num_epochs ):
for (words, labels) in train_loader:
words = words.to(device)
labels = labels.to(device)
#Forward
outputs = model(words)
loss = criterion(outputs, labels)
#backward and optimizer step
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'epoch {epoch + 1}/ {num_epochs}, loss={loss.item(): .4f}')
print(f'final loss, loss={loss.item(): .4f}')
data = {
"model_state": model.state_dict(),
"input_size": input_size,
"output_size": output_size,
"hidden_size": hidden_size,
"all_words": all_words,
"tags": tags,
}
FILE = "data.pth"
torch.save(data, FILE)
with open('new.json', 'r') as f:
intents = json.load(f)
bot_name = "Sam"
while True:
sentence = input("You: ")
if sentence == 'quit':
break
sentence = tokenize(sentence)
X = bag_of_words(sentence, all_words)
X = X.reshape( 1, X.shape[0])
X = torch.from_numpy( X )
output = model( X )
_, predicted = torch.max(output, dim=1)
tag = tags[predicted.item()]
print(tag)
probs = torch.softmax(output, dim=1)
probs = probs[0][predicted.item()]
print( probs.item() )
if probs.item() > 0.75:
for intent in intents["intents"]:
for proficiency in intent["proficiency"]:
for questions in proficiency["questions"]:
if questions["tag"] == tag:
print(f'{bot_name}: {questions["question"]}')
else:
print(f'{bot_name}: Probability Too Low')
print(f'Training Complete. File saved to {FILE}')
My chatbot is working inverselt... i am trying to map the answer to the right question.
Any help would be appreciated.
According to your code labels contains the indices that should have the highest values in outputs in order for the samples to be counted as correct predictions.
So to calculate the validation accuracy:
correct = 0
total = 0
model.eval()
with torch.no_grad():
for (words, labels) in validation_loader:
words = words.to(device)
labels = labels.to(device)
total += labels.shape[0]
outputs = model(words)
correct += torch.sum(labels == outputs.argmax(dim=-1))
accuracy = correct / total
https://github.com/cmgreen210/TensorFlowDeepAutoencoder
i'm trying to save and restore model after the fine tuning step i tried to restore the model then get the variable from the model and it gave me this error, ValueError: Variable autoencoder_variables/weights1 does not exist, or was not created with tf.get_variable(). Did you mean to set reuse=None in VarScope? If i make the reuse into false it create a new variables for the weights and biases
here's my code to restore the model.
def do_eval(sess, eval_correct,images_placeholder,labels_placeholder,
data_set):
true_count = 0 # Counts the number of correct predictions.
steps_per_epoch = data_set.num_examples // FLAGS.batch_size
num_examples = steps_per_epoch * FLAGS.batch_size
for step in range(steps_per_epoch):
feed_dict = fill_feed_dict(data_set,
images_placeholder,
labels_placeholder)
true_count += sess.run(eval_correct, feed_dict=feed_dict)
precision = true_count / num_examples
print(' Num examples: %d Num correct: %d Precision # 1: %0.04f' %
(num_examples, true_count, precision))
def evaluation(logits, labels):
return tf.reduce_sum(tf.cast(correct, tf.int32))
def test_nets(self):
data = read_data_sets(FLAGS.data_dir)
ckpt = tf.train.get_checkpoint_state("model_sps_2017-08-
29_11:45:25")
sess = tf.InteractiveSession()
saver = tf.train.import_meta_graph('model_sps_2017-08-29_11:45:25/model.meta')
saver.restore(sess, ckpt.model_checkpoint_path)
with sess.as_default():
ae_shape = [784, 2000, 2000, 2000, 10]
ae = AutoEncoder(ae_shape, sess)
input_pl = tf.placeholder(tf.float32, shape=(FLAGS.batch_size,
FLAGS.image_pixels),name='input_pl')
sup_net = ae.supervised_net(input_pl)
data = read_data_sets(FLAGS.data_dir)
labels_placeholder = tf.placeholder(tf.int32,
shape=FLAGS.batch_size,
name='target_pl')
eval_correct = evaluation(sup_net, labels_placeholder)
do_eval(sess,
eval_correct,
input_pl,
labels_placeholder,
data.test)
this is the code how i train and save the supervise model
def main_supervised(ae):
with ae.session.graph.as_default():
saver = tf.train.Saver()
sess = ae.session
input_pl = tf.placeholder(tf.float32, shape=(FLAGS.batch_size,
FLAGS.image_pixels),
name='input_pl')
logits = ae.supervised_net(input_pl)
data = read_data_sets(FLAGS.data_dir)
num_train = data.train.num_examples
labels_placeholder = tf.placeholder(tf.int32,
shape=FLAGS.batch_size,
name='target_pl')
loss = loss_supervised(logits, labels_placeholder)
train_op, global_step = training(loss, FLAGS.supervised_learning_rate)
eval_correct = evaluation(logits, labels_placeholder)
hist_summaries = [ae['biases{0}'.format(i + 1)]
for i in range(ae.num_hidden_layers + 1)]
hist_summaries.extend([ae['weights{0}'.format(i + 1)]
for i in range(ae.num_hidden_layers + 1)])
hist_summaries = [tf.summary.histogram(v.op.name + "_fine_tuning", v)
for v in hist_summaries]
summary_op = tf.summary.merge(hist_summaries)
summary_writer = tf.summary.FileWriter(FLAGS.summary_dir, sess.graph_def)
# tf.train.SummaryWriter(pjoin(FLAGS.summary_dir,
# 'fine_tuning'),
# graph_def=sess.graph_def,
# flush_secs=FLAGS.flush_secs)
vars_to_init = ae.get_variables_to_init(ae.num_hidden_layers + 1)
vars_to_init.append(global_step)
#sess.run(tf.initialize_variables(vars_to_init))
init = tf.initialize_all_variables()
sess.run(init)
steps = (num_train//FLAGS.batch_size)
for k in range(1):
for step in range(1):
start_time = time.time()
feed_dict = fill_feed_dict(data.train,
input_pl,
labels_placeholder)
_, loss_value = sess.run([train_op, loss],
feed_dict=feed_dict)
duration = time.time() - start_time
# Write the summaries and print an overview fairly often.
if step % 1 == 0:
# Print status to stdout.
print('Step %d/%d: loss = %.2f (%.3f sec)' % (step, steps,loss_value, duration))
# Update the events file.
summary_str = sess.run(summary_op, feed_dict=feed_dict)
summary_writer.add_summary(summary_str, step)
# summary_img_str = sess.run(
# tf.summary.image("training_images",
# tf.reshape(input_pl,
# (FLAGS.batch_size,
# FLAGS.image_size,
# FLAGS.image_size, 1)),
# max_outputs=10),
# feed_dict=feed_dict
# )
# summary_writer.add_summary(summary_img_str)
if (step + 1) % 1000 == 0 or (step + 1) == steps:
train_sum = do_eval_summary("training_error",
sess,
eval_correct,
input_pl,
labels_placeholder,
data.train)
val_sum = do_eval_summary("validation_error",
sess,
eval_correct,
input_pl,
labels_placeholder,
data.validation)
test_sum = do_eval_summary("test_error",
sess,
eval_correct,
input_pl,
labels_placeholder,
data.test)
summary_writer.add_summary(train_sum, step)
summary_writer.add_summary(val_sum, step)
summary_writer.add_summary(test_sum, step)
folder = "model_sps_"+str(strftime("%Y-%m-%d_%H:%M:%S", gmtime()))
os.mkdir(folder)
folder += "/model"
saver.save(sess, folder)
do_eval(sess,
eval_correct,
input_pl,
labels_placeholder,
data.test)
this how i setup the variable and restore it one by one on the scope of autoencoder_variables
def _restore_variables(self):
#print(tf.get_collection(tf.GraphKeys.VARIABLES, scope='autoencoder_variables'))
# v=tf.get_variable("autoencoder_variables/weights1", shape=(784, 2000))
# print(v)
with tf.variable_scope("autoencoder_variables",reuse=True ) as scope1:
#print(tf.get_collection(tf.GraphKeys.VARIABLES, scope="autoencoder_variables"))
#print(scope)
#tf.Variable 'autoencoder_variables/weights1:0' shape=(784, 2000)
for i in range(self.__num_hidden_layers + 1):
# Train weights
name_w = self._weights_str.format(i + 1)
w_shape = (self.__shape[i], self.__shape[i + 1])
self[name_w] = tf.get_variable(name_w,w_shape,trainable=False)
# Train biases
name_b = self._biases_str.format(i + 1)
b_shape = (self.__shape[i + 1],)
self[name_b] = tf.get_variable(name_b,b_shape)
if i < self.__num_hidden_layers:
# Hidden layer fixed weights (after pretraining before fine tuning)
self[name_w + "_fixed"] = tf.get_variable(name_w+ "_fixed",w_shape)
# Hidden layer fixed biases
self[name_b + "_fixed"] = tf.get_variable(name_b+ "_fixed",b_shape)
# Pretraining output training biases
name_b_out = self._biases_str.format(i + 1) + "_out"
b_shape = (self.__shape[i],)
self[name_b_out] = tf.get_variable(name_b_out,b_shape)
traceback error :
File "run.py", line 47, in <module>
main()
File "run.py", line 40, in main
test.test_nets_1()
File "C:\Users\simjs\Downloads\TensorFlowDeepAutoencoder-master\TensorFlowDeepAutoencoder-master\code\ae\autoencoder_test.py", line 55, in test_nets_1
ae = AutoEncoder(ae_shape, sess)
File "C:\Users\simjs\Downloads\TensorFlowDeepAutoencoder-master\TensorFlowDeepAutoencoder-master\code\ae\autoencoder.py", line 41, in __init__
self._restore_variables()
File "C:\Users\simjs\Downloads\TensorFlowDeepAutoencoder-master\TensorFlowDeepAutoencoder-master\code\ae\autoencoder.py", line 98, in _restore_variables
self[name_w] = tf.get_variable(name_w,w_shape)
File "C:\Users\simjs\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 1065, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "C:\Users\simjs\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 962, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "C:\Users\simjs\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 367, in get_variable
validate_shape=validate_shape, use_resource=use_resource)
File "C:\Users\simjs\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 352, in _true_getter
use_resource=use_resource)
File "C:\Users\simjs\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 682, in _get_single_variable
"VarScope?" % name)
ValueError: Variable autoencoder_variables/weights1 does not exist, or was not created with tf.get_variable(). Did you mean to set reuse=None in VarScope?
Check which variables are not initialized by writting print(sess.run(tf.report_uninitialized_variables())) after the restoring line.
Try also sess.run(tf.global_variables_initializer()) before restoring and analyse your results.
TensorFlow - How to predict with trained model on a different test dataset? I was working on Image segmentation. The prediction output have different dimension and give me hard time. Any help ..
def main(argv=None):
keep_probability = tf.placeholder(tf.float32, name="keep_probabilty")
image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="input_image")
annotation = tf.placeholder(tf.int32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation")
pred_annotation, logits = inference(image, keep_probability)
tf.summary.image("input_image", image, max_outputs=2)
tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2)
tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2)
loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
labels=tf.squeeze(annotation, squeeze_dims=[3]),
name="entropy")))
tf.summary.scalar("entropy", loss)
trainable_var = tf.trainable_variables()
if FLAGS.debug:
for var in trainable_var:
utils.add_to_regularization_and_summary(var)
train_op = train(loss, trainable_var)
print("Setting up summary op...")
summary_op = tf.summary.merge_all()
print("Setting up image reader...")
train_records, valid_records, test_records= scene_parsing.read_dataset(FLAGS.data_dir)
print(len(train_records))
print(len(valid_records))
print(len(test_records))
print("Setting up dataset reader")
image_options = {'resize': True, 'resize_size': IMAGE_SIZE}
if FLAGS.mode == 'train':
train_dataset_reader = dataset.BatchDatset(train_records, image_options)
validation_dataset_reader = dataset.BatchDatset(valid_records, image_options)
test_dataset_reader = dataset.BatchDatset(test_records, image_options)
sess = tf.Session()
print("Setting up Saver...")
saver = tf.train.Saver()
summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, sess.graph)
sess.run(tf.global_variables_initializer())
ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
print("Model restored...")
if FLAGS.mode == "train":
for itr in xrange(MAX_ITERATION):
train_images, train_annotations = train_dataset_reader.next_batch(FLAGS.batch_size)
feed_dict = {image: train_images, annotation: train_annotations, keep_probability: 0.85}
sess.run(train_op, feed_dict=feed_dict)
if itr % 10 == 0:
train_loss, summary_str = sess.run([loss, summary_op], feed_dict=feed_dict)
print("Step: %d, Train_loss:%g" % (itr, train_loss))
summary_writer.add_summary(summary_str, itr)
if itr % 100 == 0:
valid_images, valid_annotations = validation_dataset_reader.next_batch(FLAGS.batch_size)
valid_loss = sess.run(loss, feed_dict={image: valid_images, annotation: valid_annotations,
keep_probability: 1.0})
print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss))
saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr)
elif FLAGS.mode == "predict":
predict_dataset_reader = dataset.BatchDatset(train_records, image_options)
test_images = predict_dataset_reader.get_random_batch(FLAGS.batch_size)
pred = sess.run(pred_annotation, feed_dict={image: test_images,
keep_probability: 1.0})
#test_annotations = np.squeeze(test_annotations, axis=3)
pred = np.squeeze(pred, axis=3)
for itr in range(FLAGS.batch_size):
utils.save_image(test_images[itr].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(20+itr))
utils.save_image(pred[itr].astype(np.uint8), FLAGS.logs_dir, name="pred_" + str(20+itr))
print("Saved image: %d" % itr)
if __name__ == "__main__":
tf.app.run()
The error is on "predict" for "train" works perfectly.
I am using a machine which has 2 GPUs Titan Black to train my deep learning model which has 3 layers (3x3, 3x3 and 5x5).
The training runs pretty well but when I watch nvidia-smi (watch every 1 sec), I realized that my program uses only one GPU for computation, the second one always 0% even when the first one reach 100%.
I am trying to use tf.device to assign specific tasks for each of them but then they run one-by-one, not in parallel, and the total time was even increased, not reduced (I guess because 2 GPUs had to exchange values with each other)
Below is my program. It is quite messy, maybe you just need to pay attention at the graph where I use tf.device is enough...
Thank you so much!
import tensorflow as tf
import numpy as np
from six.moves import cPickle as pickle
import matplotlib.pyplot as plt
from os import listdir, sys
from os.path import isfile, join
from time import gmtime, strftime
import time
def validatePath(path):
path = path.replace("\\","/")
if (path[len(path)-1] != "/"):
path = path + "/"
return path
hidden_size_default = np.array([16, 32, 64, 32])
cnn1_default = 3
cnn2_default = 3
cnn3_default = 5
SIZE_BATCH_VALID = 200
input_path = 'ARCHIVES-sub-dataset'
output_path = 'ARCHIVES-model'
log_address = "trainlog.txt"
tf.app.flags.DEFINE_integer('h0', hidden_size_default[0], 'Size of hidden layer 0th')
tf.app.flags.DEFINE_integer('h1', hidden_size_default[1], 'Size of hidden layer 1st')
tf.app.flags.DEFINE_integer('h2', hidden_size_default[2], 'Size of hidden layer 2nd')
tf.app.flags.DEFINE_integer('h3', hidden_size_default[3], 'Size of hidden layer 3rd')
tf.app.flags.DEFINE_integer('k1', cnn1_default , 'Size of kernel 1st')
tf.app.flags.DEFINE_integer('k2', cnn2_default , 'Size of kernel 2nd')
tf.app.flags.DEFINE_integer('k3', cnn3_default , 'Size of kernel 3rd')
tf.app.flags.DEFINE_string('input_path', input_path, 'The parent directory which contains 2 directories: dataset and label')
tf.app.flags.DEFINE_string('output_path', output_path, 'The directory which will store models (you have to create)')
tf.app.flags.DEFINE_string('log_address', log_address, 'The file name which will store the log')
FLAGS = tf.app.flags.FLAGS
load_path = FLAGS.input_path
save_model_path = FLAGS.output_path
log_addr = FLAGS.log_address
load_path = validatePath(load_path)
save_model_path = validatePath(save_model_path)
cnn1 = FLAGS.k1
cnn2 = FLAGS.k2
cnn3 = FLAGS.k3
hidden_size = np.array([FLAGS.h0, FLAGS.h1, FLAGS.h2, FLAGS.h3])
# Shuffle the dataset and its label
def randomize(dataset, labels):
permutation = np.random.permutation(labels.shape[0])
shuffled_dataset = dataset[permutation,:]
shuffled_labels = labels[permutation]
return shuffled_dataset, shuffled_labels
def writemyfile(mystring):
with open(log_addr, "a") as myfile:
myfile.write(str(mystring + "\n"))
num_labels = 5
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/ predictions.shape[0])
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def DivideSets(input_set):
length_set = input_set.shape[0]
index_70 = int(length_set*0.7)
index_90 = int(length_set*0.9)
set_train = input_set[0:index_70]
set_valid = input_set[index_70:index_90]
set_test = input_set[index_90:length_set]
return np.float32(set_train), np.float32(set_valid), np.float32(set_test)
# from 1-value labels to 5 values of (0 and 1)
def LabelReconstruct(label_set):
label_set = label_set.astype(int)
new_label_set = np.zeros(shape=(len(label_set),num_labels))
for i in range(len(label_set)):
new_label_set[i][label_set[i]] = 1
return new_label_set.astype(int)
def LoadDataSet(load_path):
list_data = [f for f in listdir(load_path + "dataset/") if isfile(join(load_path + "dataset/", f))]
list_label = [f for f in listdir(load_path + "label/") if isfile(join(load_path + "dataset/", f))]
if list_data.sort() == list_label.sort():
return list_data
else:
print("data and labels are not suitable")
return 0
# load, randomize, normalize images and reconstruct labels
def PrepareData(*arg):
filename = arg[0]
loaded_dataset = pickle.load( open( load_path + "dataset/" + filename, "rb" ))
loaded_labels = pickle.load( open( load_path + "label/" + filename, "rb" ))
if len(arg) == 1:
datasize = len(loaded_labels)
elif len(arg) == 2:
datasize = int(arg[1])
else:
print("not more than 2 arguments please!")
dataset_full,labels_full = randomize(loaded_dataset[0:datasize], loaded_labels[0:datasize])
return NormalizeData(dataset_full), LabelReconstruct(labels_full)
def NormalizeData(dataset):
dataset = dataset - (dataset.mean())
dataset = dataset / (dataset.std())
return dataset
### LOAD DATA
listfiles = LoadDataSet(load_path)
# divide
listfiles_train = listfiles[0:15]
listfiles_valid = listfiles[15:25]
listfiles_test = listfiles[25:len(listfiles)]
graphCNN = tf.Graph()
with graphCNN.as_default():
with tf.device('/gpu:0'):
x = tf.placeholder(tf.float32, shape=(None, 224,224,3)) # X
y_ = tf.placeholder(tf.float32, shape=(None, num_labels)) # Y_
dropout = tf.placeholder(tf.float32)
if dropout == 1.0:
keep_prob = tf.constant([0.2, 0.3, 0.5], dtype=tf.float32)
else:
keep_prob = tf.constant([1.0, 1.0, 1.0], dtype=tf.float32)
weights_1 = weight_variable([cnn1,cnn1,3, hidden_size[0]])
biases_1 = bias_variable([hidden_size[0]])
weights_2 = weight_variable([cnn2,cnn2,hidden_size[0], hidden_size[1]])
biases_2 = bias_variable([hidden_size[1]])
weights_3 = weight_variable([cnn3,cnn3,hidden_size[1], hidden_size[2]])
biases_3 = bias_variable([hidden_size[2]])
weights_4 = weight_variable([56 * 56 * hidden_size[2], hidden_size[3]])
biases_4 = bias_variable([hidden_size[3]])
weights_5 = weight_variable([hidden_size[3], num_labels])
biases_5 = bias_variable([num_labels])
def model(data):
with tf.device('/gpu:1'):
train_hidden_1 = tf.nn.relu(conv2d(data, weights_1) + biases_1)
train_hidden_2 = max_pool_2x2(tf.nn.relu(conv2d(train_hidden_1, weights_2) + biases_2))
train_hidden_2_drop = tf.nn.dropout(train_hidden_2, keep_prob[0])
train_hidden_3 = max_pool_2x2(tf.nn.relu(conv2d(train_hidden_2_drop, weights_3) + biases_3))
train_hidden_3_drop = tf.nn.dropout(train_hidden_3, keep_prob[1])
train_hidden_3_drop = tf.reshape(train_hidden_3_drop,[-1, 56 * 56 * hidden_size[2]])
train_hidden_4 = tf.nn.relu(tf.matmul(train_hidden_3_drop, weights_4) + biases_4)
train_hidden_4_drop = tf.nn.dropout(train_hidden_4, keep_prob[2])
logits = tf.matmul(train_hidden_4_drop, weights_5) + biases_5
return logits
t_train_labels = tf.argmax(y_, 1) # From one-hot (one and zeros) vectors to values
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=model(x), labels=t_train_labels))
optimizer = tf.train.AdamOptimizer(0.01).minimize(loss)
y = tf.nn.softmax(model(x))
### RUNNING
print("log address: %s" % (log_addr))
#num_steps = 10001
times_repeat = 20 # number of epochs
batch_size = 100
with tf.Session(graph=graphCNN,config=tf.ConfigProto(log_device_placement=True)) as session:
tf.initialize_all_variables().run()
saver = tf.train.Saver(max_to_keep=0)
writemyfile("---ARCHIVES_M1----")
mytime = strftime("%Y-%m-%d %H:%M:%S", time.localtime())
writemyfile(str("\nTime: %s \nLayers: %d,%d,%d \epochs: %d" % (mytime,cnn1,cnn2,cnn3,times_repeat)))
writemyfile("Train files:" + str(listfiles_train))
writemyfile("Valid files:" + str(listfiles_valid))
writemyfile("Test files:" + str(listfiles_test))
print("Model will be saved in file: %s" % save_model_path)
writemyfile(str("Model will be saved in file: %s" % save_model_path))
### TRAINING & VALIDATION
valid_accuracies_epochs = np.array([])
for time_repeat in range(times_repeat):
print("- time_repeat:",time_repeat)
writemyfile("- time_repeat:"+str(time_repeat))
for file_train in listfiles_train:
file_train_id = int(file_train[0:len(file_train)-4])
time_start_this_file = time.time()
#LOAD DATA
print("- - file:",file_train_id, end=' ')
writemyfile("- - file:" + str(file_train_id))
Data_train, Label_train= PrepareData(file_train)
for step in range(0,len(Data_train)-batch_size,batch_size):
batch_data = Data_train[step:step+batch_size]
batch_labels = Label_train[step:step+batch_size]
feed_dict = {x : batch_data, y_ : batch_labels, dropout: 1.0}
opti, l, predictions = session.run([optimizer, loss, y], feed_dict=feed_dict)
train_accuracies = np.array([])
for index_tr_accu in range(0,len(Data_train)-SIZE_BATCH_VALID,SIZE_BATCH_VALID):
current_predictions = y.eval(feed_dict={x: Data_train[index_tr_accu:index_tr_accu+SIZE_BATCH_VALID],dropout: 0.0})
current_accuracy = accuracy(current_predictions, Label_train[index_tr_accu:index_tr_accu+SIZE_BATCH_VALID])
train_accuracies = np.r_[train_accuracies,current_accuracy]
train_accuracy = train_accuracies.mean()
print("batch accu: %.2f%%" %(train_accuracy),end=" | ")
writemyfile("batch accu: %.2f%%" %(train_accuracy))
time_done_this_file = time.time() - time_start_this_file
print("time: %.2fs" % (time_done_this_file))
writemyfile("time: %.2fs" % (time_done_this_file))
# save model
model_addr = save_model_path + "model335" + "-epoch-" + str(time_repeat) + ".ckpt"
save_path = saver.save(session, model_addr,) # max_to_keep default was 5
mytime = strftime("%Y-%m-%d %H:%M:%S", time.localtime())
print("epoch finished at %s \n model address: %s" % (mytime,model_addr))
writemyfile("epoch finished at %s \n model address: %s" % (mytime,model_addr))
# validation
valid_accuracies = np.array([])
for file_valid in listfiles_valid:
file_valid_id = int(file_valid[0:len(file_valid)-4])
Data_valid, Label_valid = PrepareData(file_valid)
for index_vl_accu in range(0,len(Data_valid)-SIZE_BATCH_VALID,SIZE_BATCH_VALID):
current_predictions = y.eval(feed_dict={x: Data_valid[index_vl_accu:index_vl_accu+SIZE_BATCH_VALID],dropout: 0.0})
current_accuracy = accuracy(current_predictions, Label_valid[index_vl_accu:index_vl_accu+SIZE_BATCH_VALID])
valid_accuracies = np.r_[valid_accuracies,current_accuracy]
valid_accuracy = valid_accuracies.mean()
print("epoch %d - valid accu: %.2f%%" %(time_repeat,valid_accuracy))
writemyfile("epoch %d - valid accu: %.2f%%" %(time_repeat,valid_accuracy))
valid_accuracies_epochs = np.hstack([valid_accuracies_epochs,valid_accuracy])
print('Done!!')
writemyfile(str('Done!!'))
session.close()
Update: I found cifar10_multi_gpu_train.py seems to be a good example for training with multi GPUs, but honestly I don't know how to apply on my case.
I think you need to change
def model(data):
with tf.device('/gpu:1'):
to:
def model(data):
for d in ['/gpu:0', '/gpu:1']:
with tf.device(d):
and ditch the line with tf.device('/gpu:0'):
Since at the first with tf.device... you are only doing initiation
of variables and then you are resetting your devices with the next with tf.device.
Let me know if this works since I can't test it.