I'm beginner in tensorflow and i'm working on a model which Colorize Greyscale images but when i run the optmizer it give the same Error (MSE) every epoch and i can't figure out what is the error, so what is wrong with my code , what am i missing?
The logic: I get the low level and global and mid level features from the image and pass the global Features to multilayer function and fuse it's output with the global part in a one fusion layer and send the fused features vector to the colorization network ,, and i have Get_images_chrominance function which get the a,b values from the labels images and store them to feed the lables with them.
The Code
Ab_values = None
Batch_size = 3
Batch_indx = 1
Batch_GreyImages = []
Batch_ColorImages = []
EpochsNum = 11
ExamplesNum = 9
Imgsize = 224, 224
Channels = 1
Input_images = tf.placeholder(dtype=tf.float32,shape=[None,224,224,1])
Ab_Labels_tensor = tf.placeholder(dtype=tf.float32,shape=[None,224,224,2])
def ReadNextBatch():
global Batch_GreyImages
global Batch_ColorImages
global Batch_indx
global Batch_size
global Ab_values
Batch_GreyImages = []
Batch_ColorImages = []
for ind in range(Batch_size):
Colored_img = Image.open(r'Path' + str(Batch_indx) + '.jpg')
Batch_ColorImages.append(Colored_img)
Grey_img = Image.open(r'Path' + str(Batch_indx) + '.jpg')
Grey_img = np.asanyarray(Grey_img)
img_shape = Grey_img.shape
img_reshaped = Grey_img.reshape(img_shape[0],img_shape[1], Channels)#[224,224,1]
Batch_GreyImages.append(img_reshaped)#[#imgs,224,224,1]
Batch_indx = Batch_indx + 1
Get_Images_Chrominance()
return Batch_GreyImages
#-------------------------------------------------------------------------------
def Get_Images_Chrominance():
global Ab_values
global Batch_ColorImages
Ab_values = np.empty((Batch_size,224,224,2),"float32")
for indx in range(Batch_size):
lab = color.rgb2lab(Batch_ColorImages[indx])
for i in range(len(lab[:,1,1])):
for j in range(len(lab[1,:,1])):
Ab_values[indx][i][j][0] = lab[i,j,1]
Ab_values[indx][i][j][1] = lab[i,j,2]
min_value = np.amin(Ab_values[indx])
max_value = np.amax(Ab_values[indx])
for i in range(len(lab[:,1,1])):
for j in range(len(lab[1,:,1])):
Ab_values[indx][i][j][0] = Normalize(lab[i,j,1],min_value,max_value)
Ab_values[indx][i][j][1] = Normalize(lab[i,j,1],min_value,max_value)
#-------------------------------------------------------------------------------
def Normalize(value,min_value,max_value):
min_norm_value = 0
max_norm_value = 1
value = min_norm_value + (((max_norm_value - min_norm_value) * (value - min_value)) / (max_value - min_value))
return value
def Frobenius_Norm(M):
return tf.reduce_sum(M ** 2) ** 0.5
def Model(Input_images):
low_layer1 = ConstructLayer(Input_images,1,3,3,64,2,'Relu')
low_layer2 = ConstructLayer(low_layer1,64,3,3,128,1,'Relu')
low_layer3 = ConstructLayer(low_layer2,128,3,3,128,2,'Relu')
low_layer4 = ConstructLayer(low_layer3,128,3,3,256,1,'Relu')
low_layer5 = ConstructLayer(low_layer4,256,3,3,256,2,'Relu')
low_layer6 = ConstructLayer(low_layer5,256,3,3,512,1,'Relu')
mid_layer1 = ConstructLayer(low_layer6,512,3,3,512,1,'Relu')
mid_layer2 = ConstructLayer(mid_layer1,512,3,3,256,1,'Relu')
global_layer1 = ConstructLayer(low_layer6,512,3,3,512,2,'Relu')
global_layer2 = ConstructLayer(global_layer1,512,3,3,512,1,'Relu')
global_layer3 = ConstructLayer(global_layer2,512,3,3,512,2,'Relu')
global_layer4 = ConstructLayer(global_layer3,512,3,3,512,1,'Relu')
ML_Net = ConstructML(global_layer4,3,[1024,512,256])
Fuse = Fusion_layer(mid_layer2, ML_OUTPUT)
Color_layer1 = ConstructLayer(Fuse,256,3,3,128,1,'Relu')
Color_layer1 = UpSampling(56,56,Color_layer1)
Color_layer2 = ConstructLayer(Color_layer1,128,3,3,64,1,'Relu')
Color_layer3 = ConstructLayer(Color_layer2,64,3,3,64,1,'Relu')
Color_layer3 = UpSampling(112,112,Color_layer3)
Color_layer4 = ConstructLayer(Color_layer3,64,3,3,32,1,'Relu')
Output = ConstructLayer(Color_layer4,32,3,3,2,1,'Sigmoid')
Output = UpSampling(224,224,Output)
return Output
#----------------------------------------------------Training-------------------
def RunModel(Input_images):
global Ab_values
global Batch_indx
Prediction = Model(Input_images)
Colorization_MSE = tf.reduce_mean((Frobenius_Norm(tf.sub(Prediction,Ab_Labels_tensor))))
Optmizer = tf.train.AdadeltaOptimizer().minimize(Colorization_MSE)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
for epoch in range(EpochsNum):
epoch_loss = 0
Batch_indx = 1
for i in range(int(ExamplesNum / Batch_size)):#over batches
print("Batch Num ",i+1)
ReadNextBatch()
_, c = sess.run([Optmizer,Colorization_MSE],feed_dict={Input_images:Batch_GreyImages,Ab_Labels_tensor:Ab_values})
epoch_loss += c
print("epoch: ",epoch+1, ",Los: ",epoch_loss)
#---- ---------------------------------------------------------------------------
RunModel(Input_images)
EDIT: this is the full code if any anyone want to help me in
Related
I am trying to train a special type of GAN called a Model-Assisted GAN (https://arxiv.org/pdf/1812.00879) using Keras, which takes as an input a vector of 13 input parameters + Gaussian noise, and generate a vector of 6 outputs. The mapping between the vectors of inputs and outputs is non-trivial, as it is related to a high energy physics simulation (in particular the simulation has some inherent randomness). The biggest dependence on the final outputs are encoded in the first five inputs. The biggest difference for this model to a traditional GAN is the use of a Siamese network as the discriminator, and this takes two inputs at a time, so for the same input parameters we provide two sets of possible outputs per training (possible due to the randomness of the simulation), so there are sort of 12 output distributions, but only 6 are unique, which is what we aim to generate. We used a 1D convolutional neural network for both the discriminator and generator.
The current model we have trained seems to reproduce the output distributions for an independent testing sample to reasonably good accuracy (see below plot of histograms overlayed), but there are still some clear differences between the distributions, and the eventual goal is for the model to be able to produce indistinguishable data to the simulation. I have so far tried varying the learning rate and added varying amounts of learning rate decay, tweaking the network architectures, changing some of the hyperparameters of the optimiser, adding some more noise to the discriminator training by implementing some label smoothing and swapping the order of inputs, adding some label smoothing to the generator training, increasing the batch size and also increasing the amount of noise inputs, and I still cannot get the model to perfectly reproduce the output distributions. I am struggling to come up with ideas of what to do next, and I was wondering if anyone else has had a similar problem, whereby the output is not quite perfect, and if so how they might have gone about solving this problem? Any thoughts or tips would be greatly appreciated!
I have included the full code for the training, as well as some plots of the input and output distributions (before applying the Quantile Transformer), the loss plots for the adversarial network and the discriminator (A for Adversarial, S for Siamese (Discriminator)) and then the overlay of the histograms for the generated and true output distributions for the independent testing sample (which is where you can see the small differences that arise).
Thanks in advance.
TRAINING CODE
"""
Training implementation
"""
net_range = [-1,1]
gauss_range = [-5.5,5.5]
mapping = interp1d(gauss_range, net_range)
class ModelAssistedGANPID(object):
def __init__(self, params=64, observables=6):
self.params = params
self.observables = observables
self.Networks = Networks(params=params, observables=observables)
self.siamese = self.Networks.siamese_model()
self.adversarial1 = self.Networks.adversarial1_model()
def train(self, pretrain_steps=4500, train_steps=100000, batch_size=32, train_no=1):
print('Pretraining for ', pretrain_steps,' steps before training for ', train_steps, ' steps')
print('Batch size = ', batch_size)
print('Training number = ', train_no)
'''
Pre-training stage
'''
# Number of tracks for the training + validation sample
n_events = 1728000 + 100000
n_train = n_events - 100000
# Parameters for Gaussian noise
lower = -1
upper = 1
mu = 0
sigma = 1
# import simulation data
print('Loading data...')
kaon_data = pd.read_hdf('PATH')
kaon_data = kaon_data.sample(n=n_events)
kaon_data = kaon_data.reset_index(drop=True)
kaon_data_train = kaon_data[:n_train]
kaon_data_test = kaon_data[n_train:n_events]
print("Producing training data...")
# add all inputs
P_kaon_data_train = kaon_data_train['TrackP']
Pt_kaon_data_train = kaon_data_train['TrackPt']
nTracks_kaon_data_train = kaon_data_train['NumLongTracks']
numRich1_kaon_data_train = kaon_data_train['NumRich1Hits']
numRich2_kaon_data_train = kaon_data_train['NumRich2Hits']
rich1EntryX_kaon_data_train = kaon_data_train['TrackRich1EntryX']
rich1EntryY_kaon_data_train = kaon_data_train['TrackRich1EntryY']
rich1ExitX_kaon_data_train = kaon_data_train['TrackRich1ExitX']
rich1ExitY_kaon_data_train = kaon_data_train['TrackRich1ExitY']
rich2EntryX_kaon_data_train = kaon_data_train['TrackRich2EntryX']
rich2EntryY_kaon_data_train = kaon_data_train['TrackRich2EntryY']
rich2ExitX_kaon_data_train = kaon_data_train['TrackRich2ExitX']
rich2ExitY_kaon_data_train = kaon_data_train['TrackRich2ExitY']
# add different DLL outputs
Dlle_kaon_data_train = kaon_data_train['RichDLLe']
Dlle2_kaon_data_train = kaon_data_train['RichDLLe2']
Dllmu_kaon_data_train = kaon_data_train['RichDLLmu']
Dllmu2_kaon_data_train = kaon_data_train['RichDLLmu2']
Dllk_kaon_data_train = kaon_data_train['RichDLLk']
Dllk2_kaon_data_train = kaon_data_train['RichDLLk2']
Dllp_kaon_data_train = kaon_data_train['RichDLLp']
Dllp2_kaon_data_train = kaon_data_train['RichDLLp2']
Dlld_kaon_data_train = kaon_data_train['RichDLLd']
Dlld2_kaon_data_train = kaon_data_train['RichDLLd2']
Dllbt_kaon_data_train = kaon_data_train['RichDLLbt']
Dllbt2_kaon_data_train = kaon_data_train['RichDLLbt2']
# convert to numpy array
P_kaon_data_train = P_kaon_data_train.to_numpy()
Pt_kaon_data_train = Pt_kaon_data_train.to_numpy()
nTracks_kaon_data_train = nTracks_kaon_data_train.to_numpy()
numRich1_kaon_data_train = numRich1_kaon_data_train.to_numpy()
numRich2_kaon_data_train = numRich2_kaon_data_train.to_numpy()
rich1EntryX_kaon_data_train = rich1EntryX_kaon_data_train.to_numpy()
rich1EntryY_kaon_data_train = rich1EntryY_kaon_data_train.to_numpy()
rich1ExitX_kaon_data_train = rich1ExitX_kaon_data_train.to_numpy()
rich1ExitY_kaon_data_train = rich1ExitY_kaon_data_train.to_numpy()
rich2EntryX_kaon_data_train = rich2EntryX_kaon_data_train.to_numpy()
rich2EntryY_kaon_data_train = rich2EntryY_kaon_data_train.to_numpy()
rich2ExitX_kaon_data_train = rich2ExitX_kaon_data_train.to_numpy()
rich2ExitY_kaon_data_train = rich2ExitY_kaon_data_train.to_numpy()
Dlle_kaon_data_train = Dlle_kaon_data_train.to_numpy()
Dlle2_kaon_data_train = Dlle2_kaon_data_train.to_numpy()
Dllmu_kaon_data_train = Dllmu_kaon_data_train.to_numpy()
Dllmu2_kaon_data_train = Dllmu2_kaon_data_train.to_numpy()
Dllk_kaon_data_train = Dllk_kaon_data_train.to_numpy()
Dllk2_kaon_data_train = Dllk2_kaon_data_train.to_numpy()
Dllp_kaon_data_train = Dllp_kaon_data_train.to_numpy()
Dllp2_kaon_data_train = Dllp2_kaon_data_train.to_numpy()
Dlld_kaon_data_train = Dlld_kaon_data_train.to_numpy()
Dlld2_kaon_data_train = Dlld2_kaon_data_train.to_numpy()
Dllbt_kaon_data_train = Dllbt_kaon_data_train.to_numpy()
Dllbt2_kaon_data_train = Dllbt2_kaon_data_train.to_numpy()
# Reshape arrays
P_kaon_data_train = np.array(P_kaon_data_train).reshape(-1, 1)
Pt_kaon_data_train = np.array(Pt_kaon_data_train).reshape(-1, 1)
nTracks_kaon_data_train = np.array(nTracks_kaon_data_train).reshape(-1, 1)
numRich1_kaon_data_train = np.array(numRich1_kaon_data_train).reshape(-1, 1)
numRich2_kaon_data_train = np.array(numRich2_kaon_data_train).reshape(-1, 1)
rich1EntryX_kaon_data_train = np.array(rich1EntryX_kaon_data_train).reshape(-1, 1)
rich1EntryY_kaon_data_train = np.array(rich1EntryY_kaon_data_train).reshape(-1, 1)
rich1ExitX_kaon_data_train = np.array(rich1ExitX_kaon_data_train).reshape(-1, 1)
rich1ExitY_kaon_data_train = np.array(rich1ExitY_kaon_data_train).reshape(-1, 1)
rich2EntryX_kaon_data_train = np.array(rich2EntryX_kaon_data_train).reshape(-1, 1)
rich2EntryY_kaon_data_train = np.array(rich2EntryY_kaon_data_train).reshape(-1, 1)
rich2ExitX_kaon_data_train = np.array(rich2ExitX_kaon_data_train).reshape(-1, 1)
rich2ExitY_kaon_data_train = np.array(rich2ExitY_kaon_data_train).reshape(-1, 1)
Dlle_kaon_data_train = np.array(Dlle_kaon_data_train).reshape(-1, 1)
Dlle2_kaon_data_train = np.array(Dlle2_kaon_data_train).reshape(-1, 1)
Dllmu_kaon_data_train = np.array(Dllmu_kaon_data_train).reshape(-1, 1)
Dllmu2_kaon_data_train = np.array(Dllmu2_kaon_data_train).reshape(-1, 1)
Dllk_kaon_data_train = np.array(Dllk_kaon_data_train).reshape(-1, 1)
Dllk2_kaon_data_train = np.array(Dllk2_kaon_data_train).reshape(-1, 1)
Dllp_kaon_data_train = np.array(Dllp_kaon_data_train).reshape(-1, 1)
Dllp2_kaon_data_train = np.array(Dllp2_kaon_data_train).reshape(-1, 1)
Dlld_kaon_data_train = np.array(Dlld_kaon_data_train).reshape(-1, 1)
Dlld2_kaon_data_train = np.array(Dlld2_kaon_data_train).reshape(-1, 1)
Dllbt_kaon_data_train = np.array(Dllbt_kaon_data_train).reshape(-1, 1)
Dllbt2_kaon_data_train = np.array(Dllbt2_kaon_data_train).reshape(-1, 1)
inputs_kaon_data_train = np.concatenate((P_kaon_data_train, Pt_kaon_data_train, nTracks_kaon_data_train, numRich1_kaon_data_train, numRich2_kaon_data_train, rich1EntryX_kaon_data_train,
rich1EntryY_kaon_data_train, rich1ExitX_kaon_data_train, rich1ExitY_kaon_data_train, rich2EntryX_kaon_data_train, rich2EntryY_kaon_data_train, rich2ExitX_kaon_data_train, rich2ExitY_kaon_data_train), axis=1)
Dll_kaon_data_train = np.concatenate((Dlle_kaon_data_train, Dllmu_kaon_data_train, Dllk_kaon_data_train, Dllp_kaon_data_train, Dlld_kaon_data_train, Dllbt_kaon_data_train), axis=1)
Dll2_kaon_data_train = np.concatenate((Dlle2_kaon_data_train, Dllmu2_kaon_data_train, Dllk2_kaon_data_train, Dllp2_kaon_data_train, Dlld2_kaon_data_train, Dllbt2_kaon_data_train), axis=1)
print('Transforming inputs and outputs using Quantile Transformer...')
scaler_inputs = QuantileTransformer(output_distribution='normal', n_quantiles=int(1e5), subsample=int(1e10)).fit(inputs_kaon_data_train)
scaler_Dll = QuantileTransformer(output_distribution='normal', n_quantiles=int(1e5), subsample=int(1e10)).fit(Dll_kaon_data_train)
scaler_Dll2 = QuantileTransformer(output_distribution='normal', n_quantiles=int(1e5), subsample=int(1e10)).fit(Dll2_kaon_data_train)
inputs_kaon_data_train = scaler_inputs.transform(inputs_kaon_data_train)
Dll_kaon_data_train = scaler_Dll.transform(Dll_kaon_data_train)
Dll2_kaon_data_train = scaler_Dll2.transform(Dll2_kaon_data_train)
inputs_kaon_data_train = mapping(inputs_kaon_data_train)
Dll_kaon_data_train = mapping(Dll_kaon_data_train)
Dll2_kaon_data_train = mapping(Dll2_kaon_data_train)
# REPEATING FOR TESTING DATA
print("Producing testing data...")
# add all inputs
P_kaon_data_test = kaon_data_test['TrackP']
Pt_kaon_data_test = kaon_data_test['TrackPt']
nTracks_kaon_data_test = kaon_data_test['NumLongTracks']
numRich1_kaon_data_test = kaon_data_test['NumRich1Hits']
numRich2_kaon_data_test = kaon_data_test['NumRich2Hits']
rich1EntryX_kaon_data_test = kaon_data_test['TrackRich1EntryX']
rich1EntryY_kaon_data_test = kaon_data_test['TrackRich1EntryY']
rich1ExitX_kaon_data_test = kaon_data_test['TrackRich1ExitX']
rich1ExitY_kaon_data_test = kaon_data_test['TrackRich1ExitY']
rich2EntryX_kaon_data_test = kaon_data_test['TrackRich2EntryX']
rich2EntryY_kaon_data_test = kaon_data_test['TrackRich2EntryY']
rich2ExitX_kaon_data_test = kaon_data_test['TrackRich2ExitX']
rich2ExitY_kaon_data_test = kaon_data_test['TrackRich2ExitY']
# add different DLL outputs
Dlle_kaon_data_test = kaon_data_test['RichDLLe']
Dlle2_kaon_data_test = kaon_data_test['RichDLLe2']
Dllmu_kaon_data_test = kaon_data_test['RichDLLmu']
Dllmu2_kaon_data_test = kaon_data_test['RichDLLmu2']
Dllk_kaon_data_test = kaon_data_test['RichDLLk']
Dllk2_kaon_data_test = kaon_data_test['RichDLLk2']
Dllp_kaon_data_test = kaon_data_test['RichDLLp']
Dllp2_kaon_data_test = kaon_data_test['RichDLLp2']
Dlld_kaon_data_test = kaon_data_test['RichDLLd']
Dlld2_kaon_data_test = kaon_data_test['RichDLLd2']
Dllbt_kaon_data_test = kaon_data_test['RichDLLbt']
Dllbt2_kaon_data_test = kaon_data_test['RichDLLbt2']
# convert to numpy array
P_kaon_data_test = P_kaon_data_test.to_numpy()
Pt_kaon_data_test = Pt_kaon_data_test.to_numpy()
nTracks_kaon_data_test = nTracks_kaon_data_test.to_numpy()
numRich1_kaon_data_test = numRich1_kaon_data_test.to_numpy()
numRich2_kaon_data_test = numRich2_kaon_data_test.to_numpy()
rich1EntryX_kaon_data_test = rich1EntryX_kaon_data_test.to_numpy()
rich1EntryY_kaon_data_test = rich1EntryY_kaon_data_test.to_numpy()
rich1ExitX_kaon_data_test = rich1ExitX_kaon_data_test.to_numpy()
rich1ExitY_kaon_data_test = rich1ExitY_kaon_data_test.to_numpy()
rich2EntryX_kaon_data_test = rich2EntryX_kaon_data_test.to_numpy()
rich2EntryY_kaon_data_test = rich2EntryY_kaon_data_test.to_numpy()
rich2ExitX_kaon_data_test = rich2ExitX_kaon_data_test.to_numpy()
rich2ExitY_kaon_data_test = rich2ExitY_kaon_data_test.to_numpy()
Dlle_kaon_data_test = Dlle_kaon_data_test.to_numpy()
Dlle2_kaon_data_test = Dlle2_kaon_data_test.to_numpy()
Dllmu_kaon_data_test = Dllmu_kaon_data_test.to_numpy()
Dllmu2_kaon_data_test = Dllmu2_kaon_data_test.to_numpy()
Dllk_kaon_data_test = Dllk_kaon_data_test.to_numpy()
Dllk2_kaon_data_test = Dllk2_kaon_data_test.to_numpy()
Dllp_kaon_data_test = Dllp_kaon_data_test.to_numpy()
Dllp2_kaon_data_test = Dllp2_kaon_data_test.to_numpy()
Dlld_kaon_data_test = Dlld_kaon_data_test.to_numpy()
Dlld2_kaon_data_test = Dlld2_kaon_data_test.to_numpy()
Dllbt_kaon_data_test = Dllbt_kaon_data_test.to_numpy()
Dllbt2_kaon_data_test = Dllbt2_kaon_data_test.to_numpy()
P_kaon_data_test = np.array(P_kaon_data_test).reshape(-1, 1)
Pt_kaon_data_test = np.array(Pt_kaon_data_test).reshape(-1, 1)
nTracks_kaon_data_test = np.array(nTracks_kaon_data_test).reshape(-1, 1)
numRich1_kaon_data_test = np.array(numRich1_kaon_data_test).reshape(-1, 1)
numRich2_kaon_data_test = np.array(numRich2_kaon_data_test).reshape(-1, 1)
rich1EntryX_kaon_data_test = np.array(rich1EntryX_kaon_data_test).reshape(-1, 1)
rich1EntryY_kaon_data_test = np.array(rich1EntryY_kaon_data_test).reshape(-1, 1)
rich1ExitX_kaon_data_test = np.array(rich1ExitX_kaon_data_test).reshape(-1, 1)
rich1ExitY_kaon_data_test = np.array(rich1ExitY_kaon_data_test).reshape(-1, 1)
rich2EntryX_kaon_data_test = np.array(rich2EntryX_kaon_data_test).reshape(-1, 1)
rich2EntryY_kaon_data_test = np.array(rich2EntryY_kaon_data_test).reshape(-1, 1)
rich2ExitX_kaon_data_test = np.array(rich2ExitX_kaon_data_test).reshape(-1, 1)
rich2ExitY_kaon_data_test = np.array(rich2ExitY_kaon_data_test).reshape(-1, 1)
Dlle_kaon_data_test = np.array(Dlle_kaon_data_test).reshape(-1, 1)
Dlle2_kaon_data_test = np.array(Dlle2_kaon_data_test).reshape(-1, 1)
Dllmu_kaon_data_test = np.array(Dllmu_kaon_data_test).reshape(-1, 1)
Dllmu2_kaon_data_test = np.array(Dllmu2_kaon_data_test).reshape(-1, 1)
Dllk_kaon_data_test = np.array(Dllk_kaon_data_test).reshape(-1, 1)
Dllk2_kaon_data_test = np.array(Dllk2_kaon_data_test).reshape(-1, 1)
Dllp_kaon_data_test = np.array(Dllp_kaon_data_test).reshape(-1, 1)
Dllp2_kaon_data_test = np.array(Dllp2_kaon_data_test).reshape(-1, 1)
Dlld_kaon_data_test = np.array(Dlld_kaon_data_test).reshape(-1, 1)
Dlld2_kaon_data_test = np.array(Dlld2_kaon_data_test).reshape(-1, 1)
Dllbt_kaon_data_test = np.array(Dllbt_kaon_data_test).reshape(-1, 1)
Dllbt2_kaon_data_test = np.array(Dllbt2_kaon_data_test).reshape(-1, 1)
inputs_kaon_data_test = np.concatenate((P_kaon_data_test, Pt_kaon_data_test, nTracks_kaon_data_test, numRich1_kaon_data_test, numRich2_kaon_data_test, rich1EntryX_kaon_data_test, rich1EntryY_kaon_data_test, rich1ExitX_kaon_data_test, rich1ExitY_kaon_data_test, rich2EntryX_kaon_data_test, rich2EntryY_kaon_data_test, rich2ExitX_kaon_data_test, rich2ExitY_kaon_data_test), axis=1)
Dll_kaon_data_test = np.concatenate((Dlle_kaon_data_test, Dllmu_kaon_data_test, Dllk_kaon_data_test, Dllp_kaon_data_test, Dlld_kaon_data_test, Dllbt_kaon_data_test), axis=1)
Dll2_kaon_data_test = np.concatenate((Dlle2_kaon_data_test, Dllmu2_kaon_data_test, Dllk2_kaon_data_test, Dllp2_kaon_data_test, Dlld2_kaon_data_test, Dllbt2_kaon_data_test), axis=1)
print('Transforming inputs and outputs using Quantile Transformer...')
inputs_kaon_data_test = scaler_inputs.transform(inputs_kaon_data_test)
Dll_kaon_data_test = scaler_Dll.transform(Dll_kaon_data_test)
Dll2_kaon_data_test = scaler_Dll.transform(Dll2_kaon_data_test)
inputs_kaon_data_test = mapping(inputs_kaon_data_test)
Dll_kaon_data_test = mapping(Dll_kaon_data_test)
Dll2_kaon_data_test = mapping(Dll2_kaon_data_test)
# Producing testing data
params_list_test = np.random.normal(loc=mu, scale=sigma, size=[len(kaon_data_test), self.params])
for e in range(len(kaon_data_test)):
params_list_test[e][0] = inputs_kaon_data_test[e][0]
params_list_test[e][1] = inputs_kaon_data_test[e][1]
params_list_test[e][2] = inputs_kaon_data_test[e][2]
params_list_test[e][3] = inputs_kaon_data_test[e][3]
params_list_test[e][4] = inputs_kaon_data_test[e][4]
params_list_test[e][5] = inputs_kaon_data_test[e][5]
params_list_test[e][6] = inputs_kaon_data_test[e][6]
params_list_test[e][7] = inputs_kaon_data_test[e][7]
params_list_test[e][8] = inputs_kaon_data_test[e][8]
params_list_test[e][9] = inputs_kaon_data_test[e][9]
params_list_test[e][10] = inputs_kaon_data_test[e][10]
params_list_test[e][11] = inputs_kaon_data_test[e][11]
params_list_test[e][12] = inputs_kaon_data_test[e][12]
obs_simu_1_test = np.zeros((len(kaon_data_test), self.observables, 1))
obs_simu_1_test.fill(-1)
for e in range(len(kaon_data_test)):
obs_simu_1_test[e][0][0] = Dll_kaon_data_test[e][0]
obs_simu_1_test[e][1][0] = Dll_kaon_data_test[e][1]
obs_simu_1_test[e][2][0] = Dll_kaon_data_test[e][2]
obs_simu_1_test[e][3][0] = Dll_kaon_data_test[e][3]
obs_simu_1_test[e][4][0] = Dll_kaon_data_test[e][4]
obs_simu_1_test[e][5][0] = Dll_kaon_data_test[e][5]
obs_simu_2_test = np.zeros((len(kaon_data_test), self.observables, 1))
obs_simu_2_test.fill(-1)
for e in range(len(kaon_data_test)):
obs_simu_2_test[e][0][0] = Dll2_kaon_data_test[e][0]
obs_simu_2_test[e][1][0] = Dll2_kaon_data_test[e][1]
obs_simu_2_test[e][2][0] = Dll2_kaon_data_test[e][2]
obs_simu_2_test[e][3][0] = Dll2_kaon_data_test[e][3]
obs_simu_2_test[e][4][0] = Dll2_kaon_data_test[e][4]
obs_simu_2_test[e][5][0] = Dll2_kaon_data_test[e][5]
event_no_par = 0
event_no_obs_1 = 0
event_no_obs_2 = 0
d1_hist, d2_hist, d_hist, g_hist, a1_hist, a2_hist = list(), list(), list(), list(), list(), list()
print('Beginning pre-training...')
'''
#Pre-training stage
'''
for train_step in range(pretrain_steps):
log_mesg = '%d' % train_step
noise_value = 0.3
params_list = np.random.normal(loc=mu,scale=sigma, size=[batch_size, self.params])
y_ones = np.ones([batch_size, 1])
y_zeros = np.zeros([batch_size, 1])
# add physics parameters + noise to params_list
for b in range(batch_size):
params_list[b][0] = inputs_kaon_data_train[event_no_par][0]
params_list[b][1] = inputs_kaon_data_train[event_no_par][1]
params_list[b][2] = inputs_kaon_data_train[event_no_par][2]
params_list[b][3] = inputs_kaon_data_train[event_no_par][3]
params_list[b][4] = inputs_kaon_data_train[event_no_par][4]
params_list[b][5] = inputs_kaon_data_train[event_no_par][5]
params_list[b][6] = inputs_kaon_data_train[event_no_par][6]
params_list[b][7] = inputs_kaon_data_train[event_no_par][7]
params_list[b][8] = inputs_kaon_data_train[event_no_par][8]
params_list[b][9] = inputs_kaon_data_train[event_no_par][9]
params_list[b][10] = inputs_kaon_data_train[event_no_par][10]
params_list[b][11] = inputs_kaon_data_train[event_no_par][11]
params_list[b][12] = inputs_kaon_data_train[event_no_par][12]
event_no_par += 1
# Step 1
# simulated observables (number 1)
obs_simu_1 = np.zeros((batch_size, self.observables, 1))
obs_simu_1.fill(-1)
for b in range(batch_size):
obs_simu_1[b][0][0] = Dll_kaon_data_train[event_no_obs_1][0]
obs_simu_1[b][1][0] = Dll_kaon_data_train[event_no_obs_1][1]
obs_simu_1[b][2][0] = Dll_kaon_data_train[event_no_obs_1][2]
obs_simu_1[b][3][0] = Dll_kaon_data_train[event_no_obs_1][3]
obs_simu_1[b][4][0] = Dll_kaon_data_train[event_no_obs_1][4]
obs_simu_1[b][5][0] = Dll_kaon_data_train[event_no_obs_1][5]
event_no_obs_1 += 1
obs_simu_1_copy = np.copy(obs_simu_1)
# simulated observables (Gaussian smeared - number 2)
obs_simu_2 = np.zeros((batch_size, self.observables, 1))
obs_simu_2.fill(-1)
for b in range(batch_size):
obs_simu_2[b][0][0] = Dll2_kaon_data_train[event_no_obs_2][0]
obs_simu_2[b][1][0] = Dll2_kaon_data_train[event_no_obs_2][1]
obs_simu_2[b][2][0] = Dll2_kaon_data_train[event_no_obs_2][2]
obs_simu_2[b][3][0] = Dll2_kaon_data_train[event_no_obs_2][3]
obs_simu_2[b][4][0] = Dll2_kaon_data_train[event_no_obs_2][4]
obs_simu_2[b][5][0] = Dll2_kaon_data_train[event_no_obs_2][5]
event_no_obs_2 += 1
obs_simu_2_copy = np.copy(obs_simu_2)
# emulated DLL values
obs_emul = self.emulator.predict(params_list)
obs_emul_copy = np.copy(obs_emul)
# decay the learn rate
if(train_step % 1000 == 0 and train_step>0):
siamese_lr = K.eval(self.siamese.optimizer.lr)
K.set_value(self.siamese.optimizer.lr, siamese_lr*0.7)
print('lr for Siamese network updated from %f to %f' % (siamese_lr, siamese_lr*0.7))
adversarial1_lr = K.eval(self.adversarial1.optimizer.lr)
K.set_value(self.adversarial1.optimizer.lr, adversarial1_lr*0.7)
print('lr for Adversarial1 network updated from %f to %f' % (adversarial1_lr, adversarial1_lr*0.7))
loss_simu_list = [obs_simu_1_copy, obs_simu_2_copy]
loss_fake_list = [obs_simu_1_copy, obs_emul_copy]
input_val = 0
# swap which inputs to give to Siamese network
if(np.random.random() < 0.5):
loss_simu_list[0], loss_simu_list[1] = loss_simu_list[1], loss_simu_list[0]
if(np.random.random() < 0.5):
loss_fake_list[0] = obs_simu_2_copy
input_val = 1
# noise
y_ones = np.array([np.random.uniform(0.97, 1.00) for x in range(batch_size)]).reshape([batch_size, 1])
y_zeros = np.array([np.random.uniform(0.00, 0.03) for x in range(batch_size)]).reshape([batch_size, 1])
if(input_val == 0):
if np.random.random() < noise_value:
for b in range(batch_size):
if np.random.random() < noise_value:
obs_simu_1_copy[b], obs_simu_2_copy[b] = obs_simu_2[b], obs_simu_1[b]
obs_simu_1_copy[b], obs_emul_copy[b] = obs_emul[b], obs_simu_1[b]
if(input_val == 1):
if np.random.random() < noise_value:
for b in range(batch_size):
if np.random.random() < noise_value:
obs_simu_1_copy[b], obs_simu_2_copy[b] = obs_simu_2[b], obs_simu_1[b]
obs_simu_2_copy[b], obs_emul_copy[b] = obs_emul[b], obs_simu_2[b]
# train siamese
d_loss_simu = self.siamese.train_on_batch(loss_simu_list, y_ones)
d_loss_fake = self.siamese.train_on_batch(loss_fake_list, y_zeros)
d_loss = 0.5 * np.add(d_loss_simu, d_loss_fake)
log_mesg = '%s [S loss: %f]' % (log_mesg, d_loss[0])
#print(log_mesg)
#print('--------------------')
#noise_value*=0.999
#Step 2
# train emulator
a_loss_list = [obs_simu_1, params_list]
a_loss = self.adversarial1.train_on_batch(a_loss_list, y_ones)
log_mesg = '%s [E loss: %f]' % (log_mesg, a_loss[0])
print(log_mesg)
print('--------------------')
noise_value*=0.999
if __name__ == '__main__':
params_physics = 13
params_noise = 51 #51 looks ok, 61 is probably best, 100 also works
params = params_physics + params_noise
observables= 6
train_no = 1
magan = ModelAssistedGANPID(params=params, observables=observables)
magan.train(pretrain_steps=11001, train_steps=10000, batch_size=32, train_no=train_no)
NETWORKS
class Networks(object):
def __init__(self, noise_size=100, params=64, observables=5):
self.noise_size = noise_size
self.params = params
self.observables = observables
self.E = None # emulator
self.S = None # siamese
self.SM = None # siamese model
self.AM1 = None # adversarial model 1
'''
Emulator: generate identical observable parameters to those of the simulator S when both E and S are fed with the same input parameters
'''
def emulator(self):
if self.E:
return self.E
# input params
# the model takes as input an array of shape (*, self.params = 6)
input_params_shape = (self.params,)
input_params_layer = Input(shape=input_params_shape, name='input_params')
# architecture
self.E = Dense(1024)(input_params_layer)
self.E = LeakyReLU(0.2)(self.E)
self.E = Dense(self.observables*128, kernel_initializer=initializers.RandomNormal(stddev=0.02))(self.E)
self.E = LeakyReLU(0.2)(self.E)
self.E = Reshape((self.observables, 128))(self.E)
self.E = UpSampling1D(size=2)(self.E)
self.E = Conv1D(64, kernel_size=7, padding='valid')(self.E)
self.E = LeakyReLU(0.2)(self.E)
self.E = UpSampling1D(size=2)(self.E)
self.E = Conv1D(1, kernel_size=7, padding='valid', activation='tanh')(self.E)
# model
self.E = Model(inputs=input_params_layer, outputs=self.E, name='emulator')
# print
print("Emulator")
self.E.summary()
return self.E
'''
Siamese: determine the similarity between output values produced by the simulator and emulator
'''
def siamese(self):
if self.S:
return self.S
# input DLL images
input_shape = (self.observables, 1)
input_layer_anchor = Input(shape=input_shape, name='input_layer_anchor')
input_layer_candid = Input(shape=input_shape, name='input_layer_candidate')
input_layer = Input(shape=input_shape, name='input_layer')
# siamese
cnn = Conv1D(64, kernel_size=8, strides=2, padding='same',
kernel_initializer=initializers.RandomNormal(stddev=0.02))(input_layer)
cnn = LeakyReLU(0.2)(cnn)
cnn = Conv1D(128, kernel_size=5, strides=2, padding='same')(cnn)
cnn = LeakyReLU(0.2)(cnn)
cnn = Flatten()(cnn)
cnn = Dense(128, activation='sigmoid')(cnn)
cnn = Model(inputs=input_layer, outputs=cnn, name='cnn')
# left and right encodings
encoded_l = cnn(input_layer_anchor)
encoded_r = cnn(input_layer_candid)
# merge two encoded inputs with the L1 or L2 distance between them
L1_distance = lambda x: K.abs(x[0]-x[1])
L2_distance = lambda x: (x[0]-x[1]+K.epsilon())**2/(x[0]+x[1]+K.epsilon())
both = Lambda(L2_distance)([encoded_l, encoded_r])
prediction = Dense(1, activation='sigmoid')(both)
# model
self.S = Model([input_layer_anchor, input_layer_candid], outputs=prediction, name='siamese')
# print
print("Siamese:")
self.S.summary()
print("Siamese CNN:")
cnn.summary()
return self.S
'''
Siamese model
'''
def siamese_model(self):
if self.SM:
return self.SM
# optimizer
optimizer = Adam(lr=0.004, beta_1=0.5, beta_2=0.9)
# input DLL values
input_shape = (self.observables, 1)
input_layer_anchor = Input(shape=input_shape, name='input_layer_anchor')
input_layer_candid = Input(shape=input_shape, name='input_layer_candidate')
input_layer = [input_layer_anchor, input_layer_candid]
# discriminator
siamese_ref = self.siamese()
siamese_ref.trainable = True
self.SM = siamese_ref(input_layer)
# model
self.SM = Model(inputs=input_layer, outputs=self.SM, name='siamese_model')
self.SM.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[metrics.binary_accuracy])
print("Siamese model")
self.SM.summary()
return self.SM
'''
Adversarial 1 model (adversarial pre-training phase) - this is where the emulator and siamese network are trained to enable the emulator to generate DLL values for a set of given physics inputs
'''
def adversarial1_model(self):
if self.AM1:
return self.AM1
optimizer = Adam(lr=0.0004, beta_1=0.5, beta_2=0.9)
# input 1: simulated DLL values
input_obs_shape = (self.observables, 1)
input_obs_layer = Input(shape=input_obs_shape, name='input_obs')
# input 2: params
input_params_shape = (self.params, )
input_params_layer = Input(shape=input_params_shape, name='input_params')
# emulator
emulator_ref = self.emulator()
emulator_ref.trainable = True
self.AM1 = emulator_ref(input_params_layer)
# siamese
siamese_ref = self.siamese()
siamese_ref.trainable = False
self.AM1 = siamese_ref([input_obs_layer, self.AM1])
# model
input_layer = [input_obs_layer, input_params_layer]
self.AM1 = Model(inputs=input_layer, outputs=self.AM1, name='adversarial_1_model')
self.AM1.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[metrics.binary_accuracy])
# print
print("Adversarial 1 model:")
self.AM1.summary()
return self.AM1
INPUTS PLOT
OUTPUTS PLOT
LOSS PLOT
GENERATED OUTPUT (ORANGE) vs TRUE OUTPUT (BLUE)
I need to train a MASK-RCNN. But when I start the training, I got the following error message:
ERROR:root:Error processing image {'id': 'ISIC_0010064.jpg', 'source': 'lesion', 'path': '/home/mine/Desktop/ISIC2018/ISIC2018_inputs/ISIC_0010064.jpg'}
Traceback (most recent call last):
File "/home/mine/.virtualenvs/cv/lib/python3.6/site-packages/mask_rcnn-2.1-py3.6.egg/mrcnn/model.py", line 1709, in data_generator
use_mini_mask=config.USE_MINI_MASK)
File "/home/mine/.virtualenvs/cv/lib/python3.6/site-packages/mask_rcnn-2.1-py3.6.egg/mrcnn/model.py", line 1265, in load_image_gt
class_ids = class_ids[_idx]
IndexError: invalid index to scalar variable.
I've already changed the number of classes, change more parameters in config, but the error persists.
There is my code:
DATASET_PATH = "/home/enacom/Desktop/ISIC2018"
IMAGES_PATH = os.path.join(DATASET_PATH, "ISIC2018_inputs")
MASKS_PATH = os.path.join(DATASET_PATH, "ISIC2018_ground_truth")
IMAGES_PATH = sorted(list(paths.list_images(IMAGES_PATH)))
idxs = list(range(0, len(IMAGES_PATH)))
random.seed(42)
random.shuffle(idxs)
i = int(len(idxs) * 0.8)
trainIdxs = idxs[:i]
valIdxs = idxs[i:]
CLASS_NAMES = {1: "lesion"}
COCO_PATH = "mask_rcnn_coco.h5"
LOGS_AND_MODEL_DIR = "lesion_logs"
class LesionBoundaryConfig(Config):
NAME = "lesion"
GPU_COUNT = 1
IMAGES_PER_GPU = 1
STEPS_PER_EPOCH = len(trainIdxs)
VALIDATION_STEPS = len(valIdxs) # doesnt suport low values
NUM_CLASSES = len(CLASS_NAMES) + 1
DETECTION_MIN_CONFIDENCE = 0.75
IMAGE_MIN_DIM = 128
IMAGE_MAX_DIM = 1024
class LesionBoundaryDataset(Dataset):
def __init__(self, imagePaths, classNames, width = 1024):
super().__init__(self)
self.imagePaths = imagePaths
self.classNames = classNames
self.width = width
def load_lesions(self, idxs):
for (classID, label) in self.classNames.items():
self.add_class("lesion", classID, label)
for i in idxs:
imagePath = self.imagePaths[i]
filename = imagePath.split(os.path.sep)[-1]
self.add_image("lesion", image_id=filename, path = imagePath)
def load_image(self, image_ID):
p = self.image_info[image_ID]["path"]
image = cv2.imread(p)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = imutils.resize(image, width=self.width, height=self.width)
return image
def load_mask(self, image_id):
info = self.image_info[image_id]
filename = info["id"].split(".")[0]
annot_path = os.path.sep.join([MASKS_PATH, "{}_segmentation.png".format(filename)])
annot_mask = cv2.imread(annot_path)
annot_mask = cv2.split(annot_mask)[0]
annot_mask = imutils.resize(annot_mask, width=self.width, inter = cv2.INTER_NEAREST)
annot_mask[annot_mask > 0] = 1
# function to take unique ids
class_ids = np.unique(annot_mask)
# remove the id 0 because we should ignore the background
class_ids = np.delete(class_ids, [0])
masks = np.zeros((annot_mask.shape[0], annot_mask.shape[1], 1),
dtype="uint8")
for (i, class_ids) in enumerate(class_ids):
class_mask = np.zeros(annot_mask.shape, dtype="uint8")
class_mask[annot_mask == class_ids] = 1
masks[:, :, i] = class_mask
return (masks.astype("bool"), class_ids.astype("int32"))
mode = "training"
train_dataset = LesionBoundaryDataset(IMAGES_PATH, CLASS_NAMES)
train_dataset.load_lesions(trainIdxs)
train_dataset.prepare()
val_dataset = LesionBoundaryDataset(IMAGES_PATH, CLASS_NAMES)
val_dataset.load_lesions(valIdxs)
val_dataset.prepare()
config = LesionBoundaryConfig()
config.display()
aug = iaa.SomeOf((0, 2), [
iaa.Fliplr(0.5),
iaa.Fliplr(0.5),
iaa.Affine(rotate=(-10, 10))
])
model = MaskRCNN(mode, config = config, model_dir=LOGS_AND_MODEL_DIR)
model.load_weights(COCO_PATH, by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc","mrcnn_bbox", "mrcnn_mask"])
model.train(train_dataset, val_dataset, epochs=20,
layers="heads", learning_rate=config.LEARNING_RATE /10, augmentation=aug)
I just want a resolution to make my training works. I've searched before post here, but I couldn't found any solution.
I'm learning the Deep Reinforcement learning
framework Chainer.
I've followed a tutorial and gotten the following code:
def train_dddqn(env):
class Q_Network(chainer.Chain):
def __init__(self, input_size, hidden_size, output_size):
super(Q_Network, self).__init__(
fc1=L.Linear(input_size, hidden_size),
fc2=L.Linear(hidden_size, hidden_size),
fc3=L.Linear(hidden_size, hidden_size // 2),
fc4=L.Linear(hidden_size, hidden_size // 2),
state_value=L.Linear(hidden_size // 2, 1),
advantage_value=L.Linear(hidden_size // 2, output_size)
)
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
def __call__(self, x):
h = F.relu(self.fc1(x))
h = F.relu(self.fc2(h))
hs = F.relu(self.fc3(h))
ha = F.relu(self.fc4(h))
state_value = self.state_value(hs)
advantage_value = self.advantage_value(ha)
advantage_mean = (F.sum(advantage_value, axis=1) / float(self.output_size)).reshape(-1, 1)
q_value = F.concat([state_value for _ in range(self.output_size)], axis=1) + (
advantage_value - F.concat([advantage_mean for _ in range(self.output_size)], axis=1))
return q_value
def reset(self):
self.cleargrads()
Q = Q_Network(input_size=env.history_t + 1, hidden_size=100, output_size=3)
Q_ast = copy.deepcopy(Q)
optimizer = chainer.optimizers.Adam()
optimizer.setup(Q)
epoch_num = 50
step_max = len(env.data) - 1
memory_size = 200
batch_size = 50
epsilon = 1.0
epsilon_decrease = 1e-3
epsilon_min = 0.1
start_reduce_epsilon = 200
train_freq = 10
update_q_freq = 20
gamma = 0.97
show_log_freq = 5
memory = []
total_step = 0
total_rewards = []
total_losses = []
start = time.time()
for epoch in range(epoch_num):
pobs = env.reset()
step = 0
done = False
total_reward = 0
total_loss = 0
while not done and step < step_max:
# select act
pact = np.random.randint(3)
if np.random.rand() > epsilon:
pact = Q(np.array(pobs, dtype=np.float32).reshape(1, -1))
pact = np.argmax(pact.data)
# act
obs, reward, done = env.step(pact)
# add memory
memory.append((pobs, pact, reward, obs, done))
if len(memory) > memory_size:
memory.pop(0)
# train or update q
if len(memory) == memory_size:
if total_step % train_freq == 0:
shuffled_memory = np.random.permutation(memory)
memory_idx = range(len(shuffled_memory))
for i in memory_idx[::batch_size]:
batch = np.array(shuffled_memory[i:i + batch_size])
b_pobs = np.array(batch[:, 0].tolist(), dtype=np.float32).reshape(batch_size, -1)
b_pact = np.array(batch[:, 1].tolist(), dtype=np.int32)
b_reward = np.array(batch[:, 2].tolist(), dtype=np.int32)
b_obs = np.array(batch[:, 3].tolist(), dtype=np.float32).reshape(batch_size, -1)
b_done = np.array(batch[:, 4].tolist(), dtype=np.bool)
q = Q(b_pobs)
indices = np.argmax(q.data, axis=1)
maxqs = Q_ast(b_obs).data
target = copy.deepcopy(q.data)
for j in range(batch_size):
Q.reset()
loss = F.mean_squared_error(q, target)
total_loss += loss.data
loss.backward()
optimizer.update()
if total_step % update_q_freq == 0:
Q_ast = copy.deepcopy(Q)
# epsilon
if epsilon > epsilon_min and total_step > start_reduce_epsilon:
epsilon -= epsilon_decrease
# next step
total_reward += reward
pobs = obs
step += 1
total_step += 1
total_rewards.append(total_reward)
total_losses.append(total_loss)
if (epoch + 1) % show_log_freq == 0:
log_reward = sum(total_rewards[((epoch + 1) - show_log_freq):]) / show_log_freq
log_loss = sum(total_losses[((epoch + 1) - show_log_freq):]) / show_log_freq
elapsed_time = time.time() - start
print('\t'.join(map(str, [epoch + 1, epsilon, total_step, log_reward, log_loss, elapsed_time])))
start = time.time()
return Q, total_losses, total_rewards
Q, total_losses, total_rewards = train_dddqn(Environment1(train))
My question is how can I save and load this Model which has been train very well?I know Kreas has some function like: model.save and load_model.
So what's the specify code I need for this Chainer code?
You can use serializer module to save/load chainer's model's parameter (Chain class).
from chainer import serializers
Q = Q_Network(input_size=env.history_t + 1, hidden_size=100, output_size=3)
Q_ast = Q_Network(input_size=env.history_t + 1, hidden_size=100, output_size=3)
# --- train Q here... ---
# copy Q parameter into Q_ast by saving Q's parameter and load to Q_ast
serializers.save_npz('my.model', Q)
serializers.load_npz('my.model', Q_ast)
See official document for details:
http://docs.chainer.org/en/stable/guides/serializers.html
Also, you may refer chainerrl, which is a chainer library for reinforcement learning.
https://github.com/chainer/chainerrl
chainerrl have a util function copy_param to copy parameter from network source_link to target_link.
https://github.com/chainer/chainerrl/blob/master/chainerrl/misc/copy_param.py#L12-L30
Trying to recreate the Tensorflow NMT example
https://github.com/tensorflow/nmt#bidirectional-rnns
with a baseline of just copying the original sentence instead of translating, but getting this weird bug for my inference part where the only outputs are the same words multiple times.
Using the TF attention wrapper and using Greedy Embedding Helper for inference.
Using TF 1.3 and python 3.6 if that helps
Screenshot of the bugged prediction
The weird thing is during training, the predictions are normal and the loss decreased to around 0.1
I have already checked the embeddings and they do change from each time step and I suspect that this has something to do with the decoding stage since it's the only part the really changes from training to inference.
tf.reset_default_graph()
sess = tf.InteractiveSession()
PAD = 0
EOS = 1
max_gradient_norm = 1
learning_rate = 0.02
num_layers = 1
total_epoch = 2
sentence_length = 19
vocab_size = 26236
input_embedding_size = 128
if mode == "training":
batch_size = 100
isReused = None
else:
batch_size = 1
isReused = True
with tf.name_scope("encoder"):
encoder_embeddings = tf.get_variable('encoder_embeddings', [vocab_size, input_embedding_size], tf.float32,
tf.random_uniform_initializer(-1.0, 1.0))
encoder_hidden_units = 128
encoder_inputs = tf.placeholder(shape=(batch_size, None), dtype=tf.int32, name='encoder_inputs')
encoder_lengths = tf.placeholder(shape=batch_size, dtype=tf.int32, name='encoder_lengths')
encoder_cell = tf.contrib.rnn.BasicLSTMCell(encoder_hidden_units, state_is_tuple=True)
encoder_inputs_embedded = tf.nn.embedding_lookup(encoder_embeddings, encoder_inputs)
encoder_outputs, encoder_state = tf.nn.dynamic_rnn(encoder_cell, encoder_inputs_embedded, dtype=tf.float32,
sequence_length=encoder_lengths, time_major=False)
with tf.variable_scope("decoder", isReused):
decoder_hidden_units = encoder_hidden_units
decoder_inputs = tf.placeholder(shape=(batch_size, None), dtype=tf.int32, name='decoder_inputs')
decoder_targets = tf.placeholder(shape=(batch_size, None), dtype=tf.int32, name='decoder_targets')
decoder_lengths = tf.placeholder(shape=batch_size, dtype=tf.int32, name="decoder_lengths")
decoder_embeddings = tf.get_variable('decoder_embeddings', [vocab_size, input_embedding_size], tf.float32,
tf.random_uniform_initializer(-1.0, 1.0))
decoder_inputs_embedded = tf.nn.embedding_lookup(decoder_embeddings, decoder_inputs)
decoder_cell = tf.contrib.rnn.BasicLSTMCell(decoder_hidden_units, state_is_tuple=True)
projection_layer = layers_core.Dense(vocab_size, use_bias=False)
attention_mechanism = tf.contrib.seq2seq.LuongAttention(encoder_hidden_units, encoder_outputs,
memory_sequence_length=encoder_lengths)
attn_decoder_cell = tf.contrib.seq2seq.AttentionWrapper(decoder_cell, attention_mechanism,
attention_layer_size=encoder_hidden_units)
if mode == "training":
helper = tf.contrib.seq2seq.TrainingHelper(decoder_inputs_embedded, decoder_lengths, time_major=False)
maximum_iterations = None
else:
helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(decoder_embeddings, tf.fill([batch_size], EOS), EOS)
maximum_iterations = tf.round(tf.reduce_max(encoder_lengths) * 2)
# Decoder
init_state = attn_decoder_cell.zero_state(batch_size, tf.float32).clone(cell_state=encoder_state)
decoder = tf.contrib.seq2seq.BasicDecoder(attn_decoder_cell, helper, init_state, output_layer=projection_layer)
# Dynamic decoding
decoder_outputs, decoder_final_state, _ = tf.contrib.seq2seq.dynamic_decode(decoder, output_time_major=False,
swap_memory=True,
maximum_iterations=maximum_iterations)
decoder_logits = decoder_outputs.rnn_output
decoder_prediction = decoder_outputs.sample_id
if mode == "training":
with tf.name_scope("cross_entropy"):
labels = tf.one_hot(decoder_targets, depth=vocab_size, dtype=tf.float32)
decoder_crossent = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=decoder_logits)
with tf.name_scope("loss"):
target_weights = tf.sequence_mask(decoder_lengths, maxlen=20, dtype=decoder_logits.dtype)
train_loss = tf.reduce_sum(decoder_crossent * target_weights) / (batch_size * 20)
tf.summary.scalar('loss', train_loss)
with tf.name_scope("clip_gradients"):
params = tf.trainable_variables()
gradients = tf.gradients(train_loss, params)
clipped_gradients, _ = tf.clip_by_global_norm(gradients, max_gradient_norm)
with tf.name_scope("Optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate)
update_step = optimizer.apply_gradients(zip(clipped_gradients, params))
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(os.getcwd() + '/train', sess.graph)
test_writer = tf.summary.FileWriter(os.getcwd() + '/test', )
tf.global_variables_initializer().run()
sess.run(tf.global_variables_initializer())
I am using a machine which has 2 GPUs Titan Black to train my deep learning model which has 3 layers (3x3, 3x3 and 5x5).
The training runs pretty well but when I watch nvidia-smi (watch every 1 sec), I realized that my program uses only one GPU for computation, the second one always 0% even when the first one reach 100%.
I am trying to use tf.device to assign specific tasks for each of them but then they run one-by-one, not in parallel, and the total time was even increased, not reduced (I guess because 2 GPUs had to exchange values with each other)
Below is my program. It is quite messy, maybe you just need to pay attention at the graph where I use tf.device is enough...
Thank you so much!
import tensorflow as tf
import numpy as np
from six.moves import cPickle as pickle
import matplotlib.pyplot as plt
from os import listdir, sys
from os.path import isfile, join
from time import gmtime, strftime
import time
def validatePath(path):
path = path.replace("\\","/")
if (path[len(path)-1] != "/"):
path = path + "/"
return path
hidden_size_default = np.array([16, 32, 64, 32])
cnn1_default = 3
cnn2_default = 3
cnn3_default = 5
SIZE_BATCH_VALID = 200
input_path = 'ARCHIVES-sub-dataset'
output_path = 'ARCHIVES-model'
log_address = "trainlog.txt"
tf.app.flags.DEFINE_integer('h0', hidden_size_default[0], 'Size of hidden layer 0th')
tf.app.flags.DEFINE_integer('h1', hidden_size_default[1], 'Size of hidden layer 1st')
tf.app.flags.DEFINE_integer('h2', hidden_size_default[2], 'Size of hidden layer 2nd')
tf.app.flags.DEFINE_integer('h3', hidden_size_default[3], 'Size of hidden layer 3rd')
tf.app.flags.DEFINE_integer('k1', cnn1_default , 'Size of kernel 1st')
tf.app.flags.DEFINE_integer('k2', cnn2_default , 'Size of kernel 2nd')
tf.app.flags.DEFINE_integer('k3', cnn3_default , 'Size of kernel 3rd')
tf.app.flags.DEFINE_string('input_path', input_path, 'The parent directory which contains 2 directories: dataset and label')
tf.app.flags.DEFINE_string('output_path', output_path, 'The directory which will store models (you have to create)')
tf.app.flags.DEFINE_string('log_address', log_address, 'The file name which will store the log')
FLAGS = tf.app.flags.FLAGS
load_path = FLAGS.input_path
save_model_path = FLAGS.output_path
log_addr = FLAGS.log_address
load_path = validatePath(load_path)
save_model_path = validatePath(save_model_path)
cnn1 = FLAGS.k1
cnn2 = FLAGS.k2
cnn3 = FLAGS.k3
hidden_size = np.array([FLAGS.h0, FLAGS.h1, FLAGS.h2, FLAGS.h3])
# Shuffle the dataset and its label
def randomize(dataset, labels):
permutation = np.random.permutation(labels.shape[0])
shuffled_dataset = dataset[permutation,:]
shuffled_labels = labels[permutation]
return shuffled_dataset, shuffled_labels
def writemyfile(mystring):
with open(log_addr, "a") as myfile:
myfile.write(str(mystring + "\n"))
num_labels = 5
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/ predictions.shape[0])
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def DivideSets(input_set):
length_set = input_set.shape[0]
index_70 = int(length_set*0.7)
index_90 = int(length_set*0.9)
set_train = input_set[0:index_70]
set_valid = input_set[index_70:index_90]
set_test = input_set[index_90:length_set]
return np.float32(set_train), np.float32(set_valid), np.float32(set_test)
# from 1-value labels to 5 values of (0 and 1)
def LabelReconstruct(label_set):
label_set = label_set.astype(int)
new_label_set = np.zeros(shape=(len(label_set),num_labels))
for i in range(len(label_set)):
new_label_set[i][label_set[i]] = 1
return new_label_set.astype(int)
def LoadDataSet(load_path):
list_data = [f for f in listdir(load_path + "dataset/") if isfile(join(load_path + "dataset/", f))]
list_label = [f for f in listdir(load_path + "label/") if isfile(join(load_path + "dataset/", f))]
if list_data.sort() == list_label.sort():
return list_data
else:
print("data and labels are not suitable")
return 0
# load, randomize, normalize images and reconstruct labels
def PrepareData(*arg):
filename = arg[0]
loaded_dataset = pickle.load( open( load_path + "dataset/" + filename, "rb" ))
loaded_labels = pickle.load( open( load_path + "label/" + filename, "rb" ))
if len(arg) == 1:
datasize = len(loaded_labels)
elif len(arg) == 2:
datasize = int(arg[1])
else:
print("not more than 2 arguments please!")
dataset_full,labels_full = randomize(loaded_dataset[0:datasize], loaded_labels[0:datasize])
return NormalizeData(dataset_full), LabelReconstruct(labels_full)
def NormalizeData(dataset):
dataset = dataset - (dataset.mean())
dataset = dataset / (dataset.std())
return dataset
### LOAD DATA
listfiles = LoadDataSet(load_path)
# divide
listfiles_train = listfiles[0:15]
listfiles_valid = listfiles[15:25]
listfiles_test = listfiles[25:len(listfiles)]
graphCNN = tf.Graph()
with graphCNN.as_default():
with tf.device('/gpu:0'):
x = tf.placeholder(tf.float32, shape=(None, 224,224,3)) # X
y_ = tf.placeholder(tf.float32, shape=(None, num_labels)) # Y_
dropout = tf.placeholder(tf.float32)
if dropout == 1.0:
keep_prob = tf.constant([0.2, 0.3, 0.5], dtype=tf.float32)
else:
keep_prob = tf.constant([1.0, 1.0, 1.0], dtype=tf.float32)
weights_1 = weight_variable([cnn1,cnn1,3, hidden_size[0]])
biases_1 = bias_variable([hidden_size[0]])
weights_2 = weight_variable([cnn2,cnn2,hidden_size[0], hidden_size[1]])
biases_2 = bias_variable([hidden_size[1]])
weights_3 = weight_variable([cnn3,cnn3,hidden_size[1], hidden_size[2]])
biases_3 = bias_variable([hidden_size[2]])
weights_4 = weight_variable([56 * 56 * hidden_size[2], hidden_size[3]])
biases_4 = bias_variable([hidden_size[3]])
weights_5 = weight_variable([hidden_size[3], num_labels])
biases_5 = bias_variable([num_labels])
def model(data):
with tf.device('/gpu:1'):
train_hidden_1 = tf.nn.relu(conv2d(data, weights_1) + biases_1)
train_hidden_2 = max_pool_2x2(tf.nn.relu(conv2d(train_hidden_1, weights_2) + biases_2))
train_hidden_2_drop = tf.nn.dropout(train_hidden_2, keep_prob[0])
train_hidden_3 = max_pool_2x2(tf.nn.relu(conv2d(train_hidden_2_drop, weights_3) + biases_3))
train_hidden_3_drop = tf.nn.dropout(train_hidden_3, keep_prob[1])
train_hidden_3_drop = tf.reshape(train_hidden_3_drop,[-1, 56 * 56 * hidden_size[2]])
train_hidden_4 = tf.nn.relu(tf.matmul(train_hidden_3_drop, weights_4) + biases_4)
train_hidden_4_drop = tf.nn.dropout(train_hidden_4, keep_prob[2])
logits = tf.matmul(train_hidden_4_drop, weights_5) + biases_5
return logits
t_train_labels = tf.argmax(y_, 1) # From one-hot (one and zeros) vectors to values
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=model(x), labels=t_train_labels))
optimizer = tf.train.AdamOptimizer(0.01).minimize(loss)
y = tf.nn.softmax(model(x))
### RUNNING
print("log address: %s" % (log_addr))
#num_steps = 10001
times_repeat = 20 # number of epochs
batch_size = 100
with tf.Session(graph=graphCNN,config=tf.ConfigProto(log_device_placement=True)) as session:
tf.initialize_all_variables().run()
saver = tf.train.Saver(max_to_keep=0)
writemyfile("---ARCHIVES_M1----")
mytime = strftime("%Y-%m-%d %H:%M:%S", time.localtime())
writemyfile(str("\nTime: %s \nLayers: %d,%d,%d \epochs: %d" % (mytime,cnn1,cnn2,cnn3,times_repeat)))
writemyfile("Train files:" + str(listfiles_train))
writemyfile("Valid files:" + str(listfiles_valid))
writemyfile("Test files:" + str(listfiles_test))
print("Model will be saved in file: %s" % save_model_path)
writemyfile(str("Model will be saved in file: %s" % save_model_path))
### TRAINING & VALIDATION
valid_accuracies_epochs = np.array([])
for time_repeat in range(times_repeat):
print("- time_repeat:",time_repeat)
writemyfile("- time_repeat:"+str(time_repeat))
for file_train in listfiles_train:
file_train_id = int(file_train[0:len(file_train)-4])
time_start_this_file = time.time()
#LOAD DATA
print("- - file:",file_train_id, end=' ')
writemyfile("- - file:" + str(file_train_id))
Data_train, Label_train= PrepareData(file_train)
for step in range(0,len(Data_train)-batch_size,batch_size):
batch_data = Data_train[step:step+batch_size]
batch_labels = Label_train[step:step+batch_size]
feed_dict = {x : batch_data, y_ : batch_labels, dropout: 1.0}
opti, l, predictions = session.run([optimizer, loss, y], feed_dict=feed_dict)
train_accuracies = np.array([])
for index_tr_accu in range(0,len(Data_train)-SIZE_BATCH_VALID,SIZE_BATCH_VALID):
current_predictions = y.eval(feed_dict={x: Data_train[index_tr_accu:index_tr_accu+SIZE_BATCH_VALID],dropout: 0.0})
current_accuracy = accuracy(current_predictions, Label_train[index_tr_accu:index_tr_accu+SIZE_BATCH_VALID])
train_accuracies = np.r_[train_accuracies,current_accuracy]
train_accuracy = train_accuracies.mean()
print("batch accu: %.2f%%" %(train_accuracy),end=" | ")
writemyfile("batch accu: %.2f%%" %(train_accuracy))
time_done_this_file = time.time() - time_start_this_file
print("time: %.2fs" % (time_done_this_file))
writemyfile("time: %.2fs" % (time_done_this_file))
# save model
model_addr = save_model_path + "model335" + "-epoch-" + str(time_repeat) + ".ckpt"
save_path = saver.save(session, model_addr,) # max_to_keep default was 5
mytime = strftime("%Y-%m-%d %H:%M:%S", time.localtime())
print("epoch finished at %s \n model address: %s" % (mytime,model_addr))
writemyfile("epoch finished at %s \n model address: %s" % (mytime,model_addr))
# validation
valid_accuracies = np.array([])
for file_valid in listfiles_valid:
file_valid_id = int(file_valid[0:len(file_valid)-4])
Data_valid, Label_valid = PrepareData(file_valid)
for index_vl_accu in range(0,len(Data_valid)-SIZE_BATCH_VALID,SIZE_BATCH_VALID):
current_predictions = y.eval(feed_dict={x: Data_valid[index_vl_accu:index_vl_accu+SIZE_BATCH_VALID],dropout: 0.0})
current_accuracy = accuracy(current_predictions, Label_valid[index_vl_accu:index_vl_accu+SIZE_BATCH_VALID])
valid_accuracies = np.r_[valid_accuracies,current_accuracy]
valid_accuracy = valid_accuracies.mean()
print("epoch %d - valid accu: %.2f%%" %(time_repeat,valid_accuracy))
writemyfile("epoch %d - valid accu: %.2f%%" %(time_repeat,valid_accuracy))
valid_accuracies_epochs = np.hstack([valid_accuracies_epochs,valid_accuracy])
print('Done!!')
writemyfile(str('Done!!'))
session.close()
Update: I found cifar10_multi_gpu_train.py seems to be a good example for training with multi GPUs, but honestly I don't know how to apply on my case.
I think you need to change
def model(data):
with tf.device('/gpu:1'):
to:
def model(data):
for d in ['/gpu:0', '/gpu:1']:
with tf.device(d):
and ditch the line with tf.device('/gpu:0'):
Since at the first with tf.device... you are only doing initiation
of variables and then you are resetting your devices with the next with tf.device.
Let me know if this works since I can't test it.