Chainer how to save and load DQN model - machine-learning

I'm learning the Deep Reinforcement learning
framework Chainer.
I've followed a tutorial and gotten the following code:
def train_dddqn(env):
class Q_Network(chainer.Chain):
def __init__(self, input_size, hidden_size, output_size):
super(Q_Network, self).__init__(
fc1=L.Linear(input_size, hidden_size),
fc2=L.Linear(hidden_size, hidden_size),
fc3=L.Linear(hidden_size, hidden_size // 2),
fc4=L.Linear(hidden_size, hidden_size // 2),
state_value=L.Linear(hidden_size // 2, 1),
advantage_value=L.Linear(hidden_size // 2, output_size)
)
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
def __call__(self, x):
h = F.relu(self.fc1(x))
h = F.relu(self.fc2(h))
hs = F.relu(self.fc3(h))
ha = F.relu(self.fc4(h))
state_value = self.state_value(hs)
advantage_value = self.advantage_value(ha)
advantage_mean = (F.sum(advantage_value, axis=1) / float(self.output_size)).reshape(-1, 1)
q_value = F.concat([state_value for _ in range(self.output_size)], axis=1) + (
advantage_value - F.concat([advantage_mean for _ in range(self.output_size)], axis=1))
return q_value
def reset(self):
self.cleargrads()
Q = Q_Network(input_size=env.history_t + 1, hidden_size=100, output_size=3)
Q_ast = copy.deepcopy(Q)
optimizer = chainer.optimizers.Adam()
optimizer.setup(Q)
epoch_num = 50
step_max = len(env.data) - 1
memory_size = 200
batch_size = 50
epsilon = 1.0
epsilon_decrease = 1e-3
epsilon_min = 0.1
start_reduce_epsilon = 200
train_freq = 10
update_q_freq = 20
gamma = 0.97
show_log_freq = 5
memory = []
total_step = 0
total_rewards = []
total_losses = []
start = time.time()
for epoch in range(epoch_num):
pobs = env.reset()
step = 0
done = False
total_reward = 0
total_loss = 0
while not done and step < step_max:
# select act
pact = np.random.randint(3)
if np.random.rand() > epsilon:
pact = Q(np.array(pobs, dtype=np.float32).reshape(1, -1))
pact = np.argmax(pact.data)
# act
obs, reward, done = env.step(pact)
# add memory
memory.append((pobs, pact, reward, obs, done))
if len(memory) > memory_size:
memory.pop(0)
# train or update q
if len(memory) == memory_size:
if total_step % train_freq == 0:
shuffled_memory = np.random.permutation(memory)
memory_idx = range(len(shuffled_memory))
for i in memory_idx[::batch_size]:
batch = np.array(shuffled_memory[i:i + batch_size])
b_pobs = np.array(batch[:, 0].tolist(), dtype=np.float32).reshape(batch_size, -1)
b_pact = np.array(batch[:, 1].tolist(), dtype=np.int32)
b_reward = np.array(batch[:, 2].tolist(), dtype=np.int32)
b_obs = np.array(batch[:, 3].tolist(), dtype=np.float32).reshape(batch_size, -1)
b_done = np.array(batch[:, 4].tolist(), dtype=np.bool)
q = Q(b_pobs)
indices = np.argmax(q.data, axis=1)
maxqs = Q_ast(b_obs).data
target = copy.deepcopy(q.data)
for j in range(batch_size):
Q.reset()
loss = F.mean_squared_error(q, target)
total_loss += loss.data
loss.backward()
optimizer.update()
if total_step % update_q_freq == 0:
Q_ast = copy.deepcopy(Q)
# epsilon
if epsilon > epsilon_min and total_step > start_reduce_epsilon:
epsilon -= epsilon_decrease
# next step
total_reward += reward
pobs = obs
step += 1
total_step += 1
total_rewards.append(total_reward)
total_losses.append(total_loss)
if (epoch + 1) % show_log_freq == 0:
log_reward = sum(total_rewards[((epoch + 1) - show_log_freq):]) / show_log_freq
log_loss = sum(total_losses[((epoch + 1) - show_log_freq):]) / show_log_freq
elapsed_time = time.time() - start
print('\t'.join(map(str, [epoch + 1, epsilon, total_step, log_reward, log_loss, elapsed_time])))
start = time.time()
return Q, total_losses, total_rewards
Q, total_losses, total_rewards = train_dddqn(Environment1(train))
My question is how can I save and load this Model which has been train very well?I know Kreas has some function like: model.save and load_model.
So what's the specify code I need for this Chainer code?

You can use serializer module to save/load chainer's model's parameter (Chain class).
from chainer import serializers
Q = Q_Network(input_size=env.history_t + 1, hidden_size=100, output_size=3)
Q_ast = Q_Network(input_size=env.history_t + 1, hidden_size=100, output_size=3)
# --- train Q here... ---
# copy Q parameter into Q_ast by saving Q's parameter and load to Q_ast
serializers.save_npz('my.model', Q)
serializers.load_npz('my.model', Q_ast)
See official document for details:
http://docs.chainer.org/en/stable/guides/serializers.html
Also, you may refer chainerrl, which is a chainer library for reinforcement learning.
https://github.com/chainer/chainerrl
chainerrl have a util function copy_param to copy parameter from network source_link to target_link.
https://github.com/chainer/chainerrl/blob/master/chainerrl/misc/copy_param.py#L12-L30

Related

Error trying to convert pytorch regression model to a classification model

I have a GNN that works for regression however I have changed the nature of the task from regression to classification. I thought it would be as simple as converting the loss functions and output size but I am getting multiple errors. This is my code:
Class GNN(torch.nn.Module):
def __init__(self, gnn, n_layer, tfeature_len, dim, mlp_hidden_unit, feature_mode):
super(GNN, self).__init__()
self.gnn = gnn
self.n_layer = n_layer
self.tfeature_len = tfeature_len
self.dim = dim
self.gnn_layers = ModuleList([])
if gnn in ['gcn', 'gat', 'sage', 'tag']:
for i in range(n_layer):
if gnn == 'gcn':
self.gnn_layers.append(GraphConv(in_feats=tfeature_len if i == 0 else dim,
out_feats=dim,
activation=None if i == n_layer - 1 else torch.relu))
elif gnn == 'gat':
num_heads = 16 # make sure that dim is dividable by num_heads
self.gnn_layers.append(GATConv(in_feats=tfeature_len if i == 0 else dim,
out_feats=dim // num_heads,
activation=None if i == n_layer - 1 else torch.relu,
num_heads=num_heads))
elif gnn == 'sage':
agg = 'pool'
self.gnn_layers.append(SAGEConv(in_feats=tfeature_len if i == 0 else dim,
out_feats=dim,
activation=None if i == n_layer - 1 else torch.relu,
aggregator_type=agg))
elif gnn == 'tag':
hops = 2
self.gnn_layers.append(TAGConv(in_feats=tfeature_len if i == 0 else dim,
out_feats=dim,
activation=None if i == n_layer - 1 else torch.relu,
k=hops))
elif gnn == 'sgc':
self.gnn_layers.append(SGConv(in_feats=tfeature_len, out_feats=dim, k=n_layer))
else:
raise ValueError('unknown GNN model')
self.factor = None
self.pooling_layer = SumPooling()
self.mlp_hidden_unit = mlp_hidden_unit
self.feature_mode = feature_mode
if self.feature_mode == 'concat':
self.mlp_hidden_layer = torch.nn.Linear(2 * self.dim, self.mlp_hidden_unit)
elif self.feature_mode == 'subtract':
self.mlp_hidden_layer = torch.nn.Linear(self.dim, self.mlp_hidden_unit)
else:
raise ValueError('unknown feature mode')
self.mlp_output_layer = torch.nn.Linear(self.mlp_hidden_unit, 2)
def forward(self, graph1, graph2):
graph1_embedding = self.calculate_embedding(graph1)
graph2_embedding = self.calculate_embedding(graph2)
if self.feature_mode == 'concat':
hidden = relu(self.mlp_hidden_layer(torch.concat([graph1_embedding, graph2_embedding], dim=-1)))
elif self.feature_mode == 'subtract':
hidden = relu(self.mlp_hidden_layer(graph1_embedding - graph2_embedding))
else:
raise ValueError('unknown feature mode')
output = self.mlp_output_layer(hidden)
return output
def calculate_embedding(self, graph):
feature = graph.ndata['feature']
h = one_hot(feature, num_classes=self.tfeature_len)
h = torch.sum(h, dim=1, dtype=torch.float)
for layer in self.gnn_layers:
h = layer(graph, h)
if self.gnn == 'gat':
h = torch.reshape(h, [h.size()[0], -1])
if self.factor is None:
self.factor = math.sqrt(self.dim) / float(torch.mean(torch.linalg.norm(h, dim=1)))
h *= self.factor
graph_embedding = self.pooling_layer(graph, h)
return graph_embedding
def train(data,model,optimizer):
train_loader, val_loader, test_loader, tfeature_len = data
loss_fn = torch.nn.CrossEntropyLoss()
epoch = 23
model = model.to(device)
print('start training\n')
#evaluate(model, 'train', train_loader)
evaluate(model, 'val', val_loader)
evaluate(model, 'test', test_loader)
epoch_losses = []
for i in range(epoch):
print('epoch %d:' % i)
epoch_loss = 0
model.train()
for graph1, graph2, target in train_loader:
pred = torch.squeeze(model(graph1, graph2))
loss = loss_fn(pred, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss += loss.detach().item()
epoch_loss /= (iter + 1)
print('Epoch {}, loss {:.4f}'.format(epoch, epoch_loss))
epoch_losses.append(epoch_loss)
#evaluate(model, 'train', train_loader)
evaluate(model, 'val', val_loader)
evaluate(model, 'test', test_loader)
print()
def evaluate(model, mode, data):
pred_list = []
target_list = []
model.eval()
with torch.no_grad():
for graph1, graph2, target in data:
outputs = torch.softmax(model(graph1, graph2), 1)
_, predicted = torch.max(outputs.data, 1)
pred_list.append(predicted)
target_list.append(target)
#torch.sum(preds == targets).detach().cpu().numpy().
pred_list = torch.concat(pred_list)
target_list = torch.concat(target_list)
#print('%s Acc: %.4f' % (mode (sklearn.metrics.accuracy_score(target_list, pred_list, normalize=False)) / len(pred_list) * 10
The accuracy is commented out at the moment because that too gave me an error. My first error however was the following:
start training
epoch 0:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-84-9de6c6dbd2ee> in <module>
----> 1 train(data,model,optimizer)
3 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
3012 if size_average is not None or reduce is not None:
3013 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 3014 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
3015
3016
RuntimeError: "nll_loss_forward_reduce_cuda_kernel_2d_index" not implemented for 'Float'
My targets are as follows:
targets = training_data['Class'].tolist()
targets = torch.Tensor(targets)
targets = targets.to(device)
And is just a list of 1s and 0s.
I call my model as follows:
model = GNN('sage', 3, tfeature_len, 2048, 100, 'subtract')
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
How I can fix this?

Dimension error in neural network model for classification

Below is the code for Hierarchical Attention Networks, taken from https://github.com/arunarn2/HierarchicalAttentionNetworks. The only difference in the code on the link and mine is that I have 3 classes for classification, whereas they are using 2
maxlen = 100
max_sentences = 15
max_words = 20000
embedding_dim = 100
validation_split = 0.2
#class defining the custom attention layer
class HierarchicalAttentionNetwork(Layer):
def __init__(self, attention_dim):
self.init = initializers.get('normal')
self.supports_masking = True
self.attention_dim = attention_dim
super(HierarchicalAttentionNetwork, self).__init__()
def build(self, input_shape):
assert len(input_shape) == 3
self.W = K.variable(self.init((input_shape[-1], self.attention_dim)))
self.b = K.variable(self.init((self.attention_dim,)))
self.u = K.variable(self.init((self.attention_dim, 1)))
self.trainable_weightss = [self.W, self.b, self.u]
super(HierarchicalAttentionNetwork, self).build(input_shape)
def compute_mask(self, inputs, mask=None):
return mask
def call(self, x, mask=None):
# size of x :[batch_size, sel_len, attention_dim]
# size of u :[batch_size, attention_dim]
# uit = tanh(xW+b)
uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
ait = K.exp(K.squeeze(K.dot(uit, self.u), -1))
if mask is not None:
# Cast the mask to floatX to avoid float64 upcasting
ait *= K.cast(mask, K.floatx())
ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
weighted_input = x * K.expand_dims(ait)
output = K.sum(weighted_input, axis=1)
return output
def compute_output_shape(self, input_shape):
return input_shape[0], input_shape[-1]
# building Hierachical Attention network
embedding_matrix = np.random.random((len(word_index) + 1, embedding_dim))
for word, i in word_index.items():
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
# words not found in embedding index will be all-zeros.
embedding_matrix[i] = embedding_vector
embedding_layer = Embedding(len(word_index) + 1, embedding_dim, weights=[embedding_matrix],
input_length=maxlen, trainable=True, mask_zero=True)
sentence_input = Input(shape=(maxlen,), dtype='int32')
embedded_sequences = embedding_layer(sentence_input)
lstm_word = Bidirectional(GRU(100, return_sequences=True))(embedded_sequences)
attn_word = HierarchicalAttentionNetwork(100)(lstm_word)
sentenceEncoder = Model(sentence_input, attn_word)
review_input = Input(shape=(max_sentences, maxlen), dtype='int32')
review_encoder = TimeDistributed(sentenceEncoder)(review_input)
lstm_sentence = Bidirectional(GRU(100, return_sequences=True))(review_encoder)
attn_sentence = HierarchicalAttentionNetwork(100)(lstm_sentence)
preds = Dense(3, activation='softmax')(attn_sentence)
model = Model(review_input, preds)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
print("model fitting - Hierachical attention network")
Following is the error I get. Please help me understand what the error means and how I can possibly resolve it.

How to use multiple GPUs effectively when training deep networks?

I am using a machine which has 2 GPUs Titan Black to train my deep learning model which has 3 layers (3x3, 3x3 and 5x5).
The training runs pretty well but when I watch nvidia-smi (watch every 1 sec), I realized that my program uses only one GPU for computation, the second one always 0% even when the first one reach 100%.
I am trying to use tf.device to assign specific tasks for each of them but then they run one-by-one, not in parallel, and the total time was even increased, not reduced (I guess because 2 GPUs had to exchange values with each other)
Below is my program. It is quite messy, maybe you just need to pay attention at the graph where I use tf.device is enough...
Thank you so much!
import tensorflow as tf
import numpy as np
from six.moves import cPickle as pickle
import matplotlib.pyplot as plt
from os import listdir, sys
from os.path import isfile, join
from time import gmtime, strftime
import time
def validatePath(path):
path = path.replace("\\","/")
if (path[len(path)-1] != "/"):
path = path + "/"
return path
hidden_size_default = np.array([16, 32, 64, 32])
cnn1_default = 3
cnn2_default = 3
cnn3_default = 5
SIZE_BATCH_VALID = 200
input_path = 'ARCHIVES-sub-dataset'
output_path = 'ARCHIVES-model'
log_address = "trainlog.txt"
tf.app.flags.DEFINE_integer('h0', hidden_size_default[0], 'Size of hidden layer 0th')
tf.app.flags.DEFINE_integer('h1', hidden_size_default[1], 'Size of hidden layer 1st')
tf.app.flags.DEFINE_integer('h2', hidden_size_default[2], 'Size of hidden layer 2nd')
tf.app.flags.DEFINE_integer('h3', hidden_size_default[3], 'Size of hidden layer 3rd')
tf.app.flags.DEFINE_integer('k1', cnn1_default , 'Size of kernel 1st')
tf.app.flags.DEFINE_integer('k2', cnn2_default , 'Size of kernel 2nd')
tf.app.flags.DEFINE_integer('k3', cnn3_default , 'Size of kernel 3rd')
tf.app.flags.DEFINE_string('input_path', input_path, 'The parent directory which contains 2 directories: dataset and label')
tf.app.flags.DEFINE_string('output_path', output_path, 'The directory which will store models (you have to create)')
tf.app.flags.DEFINE_string('log_address', log_address, 'The file name which will store the log')
FLAGS = tf.app.flags.FLAGS
load_path = FLAGS.input_path
save_model_path = FLAGS.output_path
log_addr = FLAGS.log_address
load_path = validatePath(load_path)
save_model_path = validatePath(save_model_path)
cnn1 = FLAGS.k1
cnn2 = FLAGS.k2
cnn3 = FLAGS.k3
hidden_size = np.array([FLAGS.h0, FLAGS.h1, FLAGS.h2, FLAGS.h3])
# Shuffle the dataset and its label
def randomize(dataset, labels):
permutation = np.random.permutation(labels.shape[0])
shuffled_dataset = dataset[permutation,:]
shuffled_labels = labels[permutation]
return shuffled_dataset, shuffled_labels
def writemyfile(mystring):
with open(log_addr, "a") as myfile:
myfile.write(str(mystring + "\n"))
num_labels = 5
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/ predictions.shape[0])
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def DivideSets(input_set):
length_set = input_set.shape[0]
index_70 = int(length_set*0.7)
index_90 = int(length_set*0.9)
set_train = input_set[0:index_70]
set_valid = input_set[index_70:index_90]
set_test = input_set[index_90:length_set]
return np.float32(set_train), np.float32(set_valid), np.float32(set_test)
# from 1-value labels to 5 values of (0 and 1)
def LabelReconstruct(label_set):
label_set = label_set.astype(int)
new_label_set = np.zeros(shape=(len(label_set),num_labels))
for i in range(len(label_set)):
new_label_set[i][label_set[i]] = 1
return new_label_set.astype(int)
def LoadDataSet(load_path):
list_data = [f for f in listdir(load_path + "dataset/") if isfile(join(load_path + "dataset/", f))]
list_label = [f for f in listdir(load_path + "label/") if isfile(join(load_path + "dataset/", f))]
if list_data.sort() == list_label.sort():
return list_data
else:
print("data and labels are not suitable")
return 0
# load, randomize, normalize images and reconstruct labels
def PrepareData(*arg):
filename = arg[0]
loaded_dataset = pickle.load( open( load_path + "dataset/" + filename, "rb" ))
loaded_labels = pickle.load( open( load_path + "label/" + filename, "rb" ))
if len(arg) == 1:
datasize = len(loaded_labels)
elif len(arg) == 2:
datasize = int(arg[1])
else:
print("not more than 2 arguments please!")
dataset_full,labels_full = randomize(loaded_dataset[0:datasize], loaded_labels[0:datasize])
return NormalizeData(dataset_full), LabelReconstruct(labels_full)
def NormalizeData(dataset):
dataset = dataset - (dataset.mean())
dataset = dataset / (dataset.std())
return dataset
### LOAD DATA
listfiles = LoadDataSet(load_path)
# divide
listfiles_train = listfiles[0:15]
listfiles_valid = listfiles[15:25]
listfiles_test = listfiles[25:len(listfiles)]
graphCNN = tf.Graph()
with graphCNN.as_default():
with tf.device('/gpu:0'):
x = tf.placeholder(tf.float32, shape=(None, 224,224,3)) # X
y_ = tf.placeholder(tf.float32, shape=(None, num_labels)) # Y_
dropout = tf.placeholder(tf.float32)
if dropout == 1.0:
keep_prob = tf.constant([0.2, 0.3, 0.5], dtype=tf.float32)
else:
keep_prob = tf.constant([1.0, 1.0, 1.0], dtype=tf.float32)
weights_1 = weight_variable([cnn1,cnn1,3, hidden_size[0]])
biases_1 = bias_variable([hidden_size[0]])
weights_2 = weight_variable([cnn2,cnn2,hidden_size[0], hidden_size[1]])
biases_2 = bias_variable([hidden_size[1]])
weights_3 = weight_variable([cnn3,cnn3,hidden_size[1], hidden_size[2]])
biases_3 = bias_variable([hidden_size[2]])
weights_4 = weight_variable([56 * 56 * hidden_size[2], hidden_size[3]])
biases_4 = bias_variable([hidden_size[3]])
weights_5 = weight_variable([hidden_size[3], num_labels])
biases_5 = bias_variable([num_labels])
def model(data):
with tf.device('/gpu:1'):
train_hidden_1 = tf.nn.relu(conv2d(data, weights_1) + biases_1)
train_hidden_2 = max_pool_2x2(tf.nn.relu(conv2d(train_hidden_1, weights_2) + biases_2))
train_hidden_2_drop = tf.nn.dropout(train_hidden_2, keep_prob[0])
train_hidden_3 = max_pool_2x2(tf.nn.relu(conv2d(train_hidden_2_drop, weights_3) + biases_3))
train_hidden_3_drop = tf.nn.dropout(train_hidden_3, keep_prob[1])
train_hidden_3_drop = tf.reshape(train_hidden_3_drop,[-1, 56 * 56 * hidden_size[2]])
train_hidden_4 = tf.nn.relu(tf.matmul(train_hidden_3_drop, weights_4) + biases_4)
train_hidden_4_drop = tf.nn.dropout(train_hidden_4, keep_prob[2])
logits = tf.matmul(train_hidden_4_drop, weights_5) + biases_5
return logits
t_train_labels = tf.argmax(y_, 1) # From one-hot (one and zeros) vectors to values
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=model(x), labels=t_train_labels))
optimizer = tf.train.AdamOptimizer(0.01).minimize(loss)
y = tf.nn.softmax(model(x))
### RUNNING
print("log address: %s" % (log_addr))
#num_steps = 10001
times_repeat = 20 # number of epochs
batch_size = 100
with tf.Session(graph=graphCNN,config=tf.ConfigProto(log_device_placement=True)) as session:
tf.initialize_all_variables().run()
saver = tf.train.Saver(max_to_keep=0)
writemyfile("---ARCHIVES_M1----")
mytime = strftime("%Y-%m-%d %H:%M:%S", time.localtime())
writemyfile(str("\nTime: %s \nLayers: %d,%d,%d \epochs: %d" % (mytime,cnn1,cnn2,cnn3,times_repeat)))
writemyfile("Train files:" + str(listfiles_train))
writemyfile("Valid files:" + str(listfiles_valid))
writemyfile("Test files:" + str(listfiles_test))
print("Model will be saved in file: %s" % save_model_path)
writemyfile(str("Model will be saved in file: %s" % save_model_path))
### TRAINING & VALIDATION
valid_accuracies_epochs = np.array([])
for time_repeat in range(times_repeat):
print("- time_repeat:",time_repeat)
writemyfile("- time_repeat:"+str(time_repeat))
for file_train in listfiles_train:
file_train_id = int(file_train[0:len(file_train)-4])
time_start_this_file = time.time()
#LOAD DATA
print("- - file:",file_train_id, end=' ')
writemyfile("- - file:" + str(file_train_id))
Data_train, Label_train= PrepareData(file_train)
for step in range(0,len(Data_train)-batch_size,batch_size):
batch_data = Data_train[step:step+batch_size]
batch_labels = Label_train[step:step+batch_size]
feed_dict = {x : batch_data, y_ : batch_labels, dropout: 1.0}
opti, l, predictions = session.run([optimizer, loss, y], feed_dict=feed_dict)
train_accuracies = np.array([])
for index_tr_accu in range(0,len(Data_train)-SIZE_BATCH_VALID,SIZE_BATCH_VALID):
current_predictions = y.eval(feed_dict={x: Data_train[index_tr_accu:index_tr_accu+SIZE_BATCH_VALID],dropout: 0.0})
current_accuracy = accuracy(current_predictions, Label_train[index_tr_accu:index_tr_accu+SIZE_BATCH_VALID])
train_accuracies = np.r_[train_accuracies,current_accuracy]
train_accuracy = train_accuracies.mean()
print("batch accu: %.2f%%" %(train_accuracy),end=" | ")
writemyfile("batch accu: %.2f%%" %(train_accuracy))
time_done_this_file = time.time() - time_start_this_file
print("time: %.2fs" % (time_done_this_file))
writemyfile("time: %.2fs" % (time_done_this_file))
# save model
model_addr = save_model_path + "model335" + "-epoch-" + str(time_repeat) + ".ckpt"
save_path = saver.save(session, model_addr,) # max_to_keep default was 5
mytime = strftime("%Y-%m-%d %H:%M:%S", time.localtime())
print("epoch finished at %s \n model address: %s" % (mytime,model_addr))
writemyfile("epoch finished at %s \n model address: %s" % (mytime,model_addr))
# validation
valid_accuracies = np.array([])
for file_valid in listfiles_valid:
file_valid_id = int(file_valid[0:len(file_valid)-4])
Data_valid, Label_valid = PrepareData(file_valid)
for index_vl_accu in range(0,len(Data_valid)-SIZE_BATCH_VALID,SIZE_BATCH_VALID):
current_predictions = y.eval(feed_dict={x: Data_valid[index_vl_accu:index_vl_accu+SIZE_BATCH_VALID],dropout: 0.0})
current_accuracy = accuracy(current_predictions, Label_valid[index_vl_accu:index_vl_accu+SIZE_BATCH_VALID])
valid_accuracies = np.r_[valid_accuracies,current_accuracy]
valid_accuracy = valid_accuracies.mean()
print("epoch %d - valid accu: %.2f%%" %(time_repeat,valid_accuracy))
writemyfile("epoch %d - valid accu: %.2f%%" %(time_repeat,valid_accuracy))
valid_accuracies_epochs = np.hstack([valid_accuracies_epochs,valid_accuracy])
print('Done!!')
writemyfile(str('Done!!'))
session.close()
Update: I found cifar10_multi_gpu_train.py seems to be a good example for training with multi GPUs, but honestly I don't know how to apply on my case.
I think you need to change
def model(data):
with tf.device('/gpu:1'):
to:
def model(data):
for d in ['/gpu:0', '/gpu:1']:
with tf.device(d):
and ditch the line with tf.device('/gpu:0'):
Since at the first with tf.device... you are only doing initiation
of variables and then you are resetting your devices with the next with tf.device.
Let me know if this works since I can't test it.

Tensorflow optmizer Error don't change

I'm beginner in tensorflow and i'm working on a model which Colorize Greyscale images but when i run the optmizer it give the same Error (MSE) every epoch and i can't figure out what is the error, so what is wrong with my code , what am i missing?
The logic: I get the low level and global and mid level features from the image and pass the global Features to multilayer function and fuse it's output with the global part in a one fusion layer and send the fused features vector to the colorization network ,, and i have Get_images_chrominance function which get the a,b values from the labels images and store them to feed the lables with them.
The Code
Ab_values = None
Batch_size = 3
Batch_indx = 1
Batch_GreyImages = []
Batch_ColorImages = []
EpochsNum = 11
ExamplesNum = 9
Imgsize = 224, 224
Channels = 1
Input_images = tf.placeholder(dtype=tf.float32,shape=[None,224,224,1])
Ab_Labels_tensor = tf.placeholder(dtype=tf.float32,shape=[None,224,224,2])
def ReadNextBatch():
global Batch_GreyImages
global Batch_ColorImages
global Batch_indx
global Batch_size
global Ab_values
Batch_GreyImages = []
Batch_ColorImages = []
for ind in range(Batch_size):
Colored_img = Image.open(r'Path' + str(Batch_indx) + '.jpg')
Batch_ColorImages.append(Colored_img)
Grey_img = Image.open(r'Path' + str(Batch_indx) + '.jpg')
Grey_img = np.asanyarray(Grey_img)
img_shape = Grey_img.shape
img_reshaped = Grey_img.reshape(img_shape[0],img_shape[1], Channels)#[224,224,1]
Batch_GreyImages.append(img_reshaped)#[#imgs,224,224,1]
Batch_indx = Batch_indx + 1
Get_Images_Chrominance()
return Batch_GreyImages
#-------------------------------------------------------------------------------
def Get_Images_Chrominance():
global Ab_values
global Batch_ColorImages
Ab_values = np.empty((Batch_size,224,224,2),"float32")
for indx in range(Batch_size):
lab = color.rgb2lab(Batch_ColorImages[indx])
for i in range(len(lab[:,1,1])):
for j in range(len(lab[1,:,1])):
Ab_values[indx][i][j][0] = lab[i,j,1]
Ab_values[indx][i][j][1] = lab[i,j,2]
min_value = np.amin(Ab_values[indx])
max_value = np.amax(Ab_values[indx])
for i in range(len(lab[:,1,1])):
for j in range(len(lab[1,:,1])):
Ab_values[indx][i][j][0] = Normalize(lab[i,j,1],min_value,max_value)
Ab_values[indx][i][j][1] = Normalize(lab[i,j,1],min_value,max_value)
#-------------------------------------------------------------------------------
def Normalize(value,min_value,max_value):
min_norm_value = 0
max_norm_value = 1
value = min_norm_value + (((max_norm_value - min_norm_value) * (value - min_value)) / (max_value - min_value))
return value
def Frobenius_Norm(M):
return tf.reduce_sum(M ** 2) ** 0.5
def Model(Input_images):
low_layer1 = ConstructLayer(Input_images,1,3,3,64,2,'Relu')
low_layer2 = ConstructLayer(low_layer1,64,3,3,128,1,'Relu')
low_layer3 = ConstructLayer(low_layer2,128,3,3,128,2,'Relu')
low_layer4 = ConstructLayer(low_layer3,128,3,3,256,1,'Relu')
low_layer5 = ConstructLayer(low_layer4,256,3,3,256,2,'Relu')
low_layer6 = ConstructLayer(low_layer5,256,3,3,512,1,'Relu')
mid_layer1 = ConstructLayer(low_layer6,512,3,3,512,1,'Relu')
mid_layer2 = ConstructLayer(mid_layer1,512,3,3,256,1,'Relu')
global_layer1 = ConstructLayer(low_layer6,512,3,3,512,2,'Relu')
global_layer2 = ConstructLayer(global_layer1,512,3,3,512,1,'Relu')
global_layer3 = ConstructLayer(global_layer2,512,3,3,512,2,'Relu')
global_layer4 = ConstructLayer(global_layer3,512,3,3,512,1,'Relu')
ML_Net = ConstructML(global_layer4,3,[1024,512,256])
Fuse = Fusion_layer(mid_layer2, ML_OUTPUT)
Color_layer1 = ConstructLayer(Fuse,256,3,3,128,1,'Relu')
Color_layer1 = UpSampling(56,56,Color_layer1)
Color_layer2 = ConstructLayer(Color_layer1,128,3,3,64,1,'Relu')
Color_layer3 = ConstructLayer(Color_layer2,64,3,3,64,1,'Relu')
Color_layer3 = UpSampling(112,112,Color_layer3)
Color_layer4 = ConstructLayer(Color_layer3,64,3,3,32,1,'Relu')
Output = ConstructLayer(Color_layer4,32,3,3,2,1,'Sigmoid')
Output = UpSampling(224,224,Output)
return Output
#----------------------------------------------------Training-------------------
def RunModel(Input_images):
global Ab_values
global Batch_indx
Prediction = Model(Input_images)
Colorization_MSE = tf.reduce_mean((Frobenius_Norm(tf.sub(Prediction,Ab_Labels_tensor))))
Optmizer = tf.train.AdadeltaOptimizer().minimize(Colorization_MSE)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
for epoch in range(EpochsNum):
epoch_loss = 0
Batch_indx = 1
for i in range(int(ExamplesNum / Batch_size)):#over batches
print("Batch Num ",i+1)
ReadNextBatch()
_, c = sess.run([Optmizer,Colorization_MSE],feed_dict={Input_images:Batch_GreyImages,Ab_Labels_tensor:Ab_values})
epoch_loss += c
print("epoch: ",epoch+1, ",Los: ",epoch_loss)
#---- ---------------------------------------------------------------------------
RunModel(Input_images)
EDIT: this is the full code if any anyone want to help me in

Language Modelling with RNN and LSTM Cell in Tensorflow

My RNN for language modelling is predicting only "the" "and" and "unknown" what's wrong with my code?
Here I define the hyper parameters:
num_epochs = 300
total_series_length = len(uniqueSentence) - 4
truncated_backprop_length = 30
state_size = 100
num_classes = NUM_MEANINGFUL + 1
echo_step = 1
batch_size = 32
vocab_length = len(decoder)
num_batches = total_series_length//batch_size//truncated_backprop_length
learning_rate = 0.01
old_perplexity = 0
Here I generate the data (my input is given by word embeddings long 100 calculated with Word2Vec):
def generateData():
uniqueSent = uniqueSentence[0 : len(uniqueSentence) - 4]
x_tr = np.array([model_ted[word] for words in uniqueSent])
#Roll array elements along a given axis.
#Elements that roll beyond the last position are re-introduced at the first.
x_tr = x_tr.reshape((100, batch_size, -1)) # The first index changing slowest, subseries as rows
x = x_tr.transpose((1, 2, 0))
print("hi")
new_y = indexList[1: len(indexList)- 4]
new_y.append(indexList[len(indexList)-3])
y = np.array(new_y)
print(len(y))
y = y.reshape((batch_size, -1))
return (x, y)
Define the placeholders:
batchX_placeholder = tf.placeholder(tf.float32, [batch_size, truncated_backprop_length, 100])
batchY_placeholder = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length])
W = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b = tf.Variable(np.zeros((batch_size, num_classes)), dtype=tf.float32)
W2 = tf.Variable(np.random.rand(state_size, num_classes),dtype=tf.float32)
b2 = tf.Variable(np.zeros((batch_size, num_classes)), dtype=tf.float32)
Inputs and desired outputs:
labels_series = tf.transpose(batchY_placeholder)
labels_series = tf.unstack(batchY_placeholder, axis=1)
inputs_series = batchX_placeholder
Forward pass:
from tensorflow.contrib.rnn.python.ops import core_rnn_cell_impl
print(tf.__version__)
#cell = tf.contrib.rnn.BasicRNNCell(state_size)
cell = tf.contrib.rnn.BasicLSTMCell(state_size, state_is_tuple = False)
print(cell.state_size)
init_state = tf.zeros([batch_size, cell.state_size])
outputs, current_state = tf.nn.dynamic_rnn(cell, inputs_series, initial_state = init_state)
iterable_outputs = tf.unstack(outputs, axis = 1)
logits_series = [tf.matmul(state, W2) + b2 for state in iterable_outputs] #Broadcasted addition
predictions_series = [tf.nn.softmax(logits) for logits in logits_series]
losses = [tf.losses.sparse_softmax_cross_entropy(labels, logits)
for logits, labels in zip(logits_series, labels_series)]
total_loss = tf.add_n(losses)
train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)
x,y = generateData()
del(model_ted)
Training:
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
sess.run(tf.initialize_all_variables())
loss_list = []
print("start")
_current_state = np.zeros((batch_size, 2*state_size))
#avevo genrateData fuori e -currentstate dentro
for epoch_idx in range(num_epochs):
print("New data, epoch", epoch_idx)
for batch_idx in range(num_batches):
start_idx = batch_idx * truncated_backprop_length
end_idx = start_idx + truncated_backprop_length
batchX = x[:,start_idx:end_idx,:]
batchY = y[:,start_idx:end_idx]
_total_loss, _train_step, _current_state, _predictions_series = sess.run(
[total_loss, train_step, current_state, predictions_series],
feed_dict={
batchX_placeholder:batchX,
batchY_placeholder:batchY,
init_state:_current_state
})
loss_list.append(_total_loss)
del(batchX)
del(batchY)
perplexity = 2 ** (_total_loss/truncated_backprop_length )
print(perplexity)
del(perplexity)
_predictions_series = np.array(_predictions_series)
pr = _predictions_series.transpose([1, 0, 2])
pr_ind = []
for line in pr[0]:
pr_ind.append(np.argmax(line))
for index in pr_ind:
print(decoder[index], end = " " )
del(pr_ind)
print("\n learning rate: ", end = " ")
print(learning_rate)

Resources