Convert LSTM-LSTM model to CNN-LSTM model - machine-learning

from keras import regularizers,constraints,initializers
model = Sequential()
FIRST_LSTM = 256
SECOND_LSTM = 256
EMBEDD_VECTOR_LENGTH = 128
outputs=[]
inputs_=[]
EMB = Embedding(DICTIONARY_LENGTH, EMBEDD_VECTOR_LENGTH, input_length=30)
BDR = Bidirectional(CuDNNLSTM(FIRST_LSTM,return_sequences=True),merge_mode='concat')
TDB = TimeDistributed(Dense(DICTIONARY_LENGTH+3))
for i in range(SENTENCES_IN_NOVEL): # SENTENCES_IN_NOVEL = 100
if(i%10==0):
print(i,"/",SENTENCES_IN_NOVEL)
inputlayer = Input(shape=[MAX_LENGTH]) # MAX_LENGTH = 30 (30 words per sentence)
inputs_.append(inputlayer)
layer = EMB(inputlayer) # 50 is embedd vector size for each POS Tag
layer = BDR(layer) # output 100 because of bidirectonal concat
layer = Lambda( lambda x: K.sum(x, axis=1), input_shape=(30,2*FIRST_LSTM))(layer)
outputs.append(layer)
merge_ = concatenate(outputs)
merge_ = Reshape((SENTENCES_IN_NOVEL, 2*FIRST_LSTM), input_shape=(SENTENCES_IN_NOVEL*(2*FIRST_LSTM),))(merge_)
merge_ = Bidirectional(CuDNNLSTM(SECOND_LSTM,return_sequences=True),merge_mode='concat')(merge_)
attention_mul = Attention(100)(merge_)#step_dimen =100
output = Dense(14, activation='softmax')(attention_mul)
model = Model(inputs=inputs_,outputs=output)
model.summary()
I wrote this model to predict author of Novels LSTM-LSTM Model.
Wants to convert CNN-LSTM.
I am unable to figure out this problem.

Related

I have been training a decoder based transformer for word generation. But it keeps generating the same words over and over again

I have been trying to create a decoder based transformer for text generation and the text its generating is the same no matter the input sequence
The following is my code some of , the code for preprocessing was remove
def process_batch(ds):
ds = tokenizer(ds)
## padd short senteces to max len using the [PAD] id
## add special tokens [START] and [END]
ds_start_end_packer = StartEndPacker(
sequence_length=MAX_SEQUENCE_LENGTH + 1,
start_value = tokenizer.token_to_id("[START]"),
end_value = tokenizer.token_to_id("[END]"),
pad_value = tokenizer.token_to_id("[PAD]")
)
ds = ds_start_end_packer(ds)
return ({"decoder_inputs":ds[:, :-1]}, ds[:, 1:])
def make_ds(seq):
dataset = tf.data.Dataset.from_tensor_slices(seq)
dataset = dataset.batch(BATCH_SIZE)
dataset = dataset.map(process_batch, num_parallel_calls=tf.data.AUTOTUNE)
return dataset.shuffle(128).prefetch(32).cache()
train_ds = make_ds(train_seq)
val_ds = make_ds(val_seq)
This is the decoder section i was using keras_nlp
It have 2 decoders layers
decoder_inputs = Input(shape=(None,), dtype="int64",
name="decoder_inputs")
x = TokenAndPositionEmbedding(
vocabulary_size= VOCAB_SIZE,
sequence_length = MAX_SEQUENCE_LENGTH,
embedding_dim = EMBED_DIM,
mask_zero =True
)(decoder_inputs)
x = TransformerDecoder(
intermediate_dim = INTERMEDIATE_DIM, num_heads= NUM_HEADS
)(x)
x = TransformerDecoder(
intermediate_dim = INTERMEDIATE_DIM, num_heads= NUM_HEADS
)(x)
x = Dropout(0.5)(x)
decoder_ouput = Dense(VOCAB_SIZE, activation="softmax")(x)
decoder = Model([decoder_inputs],decoder_ouput)
decoder_outputs = decoder([decoder_inputs])
transformer = Model(inputs=decoder_inputs, outputs=decoder_outputs, name="transformer")
#transformer.load_weights("/content/my-drive/MyDrive/projects/Olsen/weights-improvement-07-0.41.hdf5")
transformer.compile("adam",loss="sparse_categorical_crossentropy", metrics=['accuracy'])

When training a multi class CNN with PyTorch displays extraordinarily large loss

I am currently trying train a CNN using PyTorch to predict a subject's age. The age group ranges from 0 to 116. I used the same model to train it on gender classification with two options: male or female.
I ported the same code for the age classification, I was getting errors. The error was due to our last fully connected layer not return a large enough output (in terms of matrix size, it was initially returning a 50 x 2 matrix due to our gender classifier but I switched it to 50 x 117 for the age classification based on the total age options.)
My issue now is that the training loop prints epochs with a huge loss (~3.5 while before, when training the gender classification, it was sub zero.)
Below is my code:
DataLoader class:
class MyDataset(Dataset):
def __init__(self, root_directory, csv_file, image_path, transform = None):
annotated_path = os.path.relpath(csv_file) # Path to UTKFace Dataset and Annotations
self.read_in_csv = pd.read_csv(annotated_path, index_col=False)
self.image_path = os.path.join(root_directory, image_path)
self.transform = transform
self.labels = np.asarray(self.read_in_csv.loc[:,'age'])
def __getitem__(self, index):
attr = self.labels[index]
image_name = str(self.read_in_csv.loc[index, 'file'])
image = Image.open(image_name)
if self.transform:
image = self.transform(image)
dict = {'image':image, 'label':attr}
return dict
def __len__(self):
return len(self.read_in_csv.index)
CNN Architecture:
class ConvolutionalNN(nn.Module):
def __init__(self):
super(ConvolutionalNN,self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(3,96,kernel_size=7,stride=4),
nn.BatchNorm2d(96), # Number of Features
nn.ReLU(),
nn.MaxPool2d(kernel_size=3,stride=2))
self.layer2 = nn.Sequential(
nn.Conv2d(96,256,kernel_size=5,padding=2),
nn.BatchNorm2d(256),
nn.ReLU(), # Default = False
nn.MaxPool2d(kernel_size=3,stride=2))
self.layer3 = nn.Sequential(
nn.Conv2d(256,384,kernel_size=3,padding=1),
nn.BatchNorm2d(384),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3,stride=2))
self.fc1 = nn.Linear(384*6*6,512)
self.fc2 = nn.Linear(512,512)
self.fc3 = nn.Linear(512,117)
def forward(self,x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = out.view(out.size(0),-1)
#print out.size()
out = F.dropout(F.relu(self.fc1(out)))
out = F.dropout(F.relu(self.fc2(out)))
out = self.fc3(out)
return out
Training Loop:
def training_loop(checkpoint = None, best=False):
current_epoch = 1
num_epochs = 50
train_acc_history = []
val_acc_history = []
epoch_history = []
learning_rate = 0.001
best_val_acc = 0.0
is_best = False
criterion = nn.CrossEntropyLoss()
## Predict the Age and Gender of the Human in the Image
optimizer = torch.optim.SGD(cnn.parameters(),lr=0.001,momentum=0.9)
if checkpoint is not None:
is_best = best
current_epoch = checkpoint['epoch']
train_acc_history = checkpoint['train_acc_history']
val_acc_history = checkpoint['val_acc_history']
best_val_acc = checkpoint['best_val_acc']
optimizer.load_state_dict(checkpoint['optimizer'])
epoch_history = checkpoint['epoch_history']
print('Uploading our images now...')
for epoch in range(current_epoch, num_epochs + current_epoch):
print('Starting epoch %d / %d' % (epoch + 1, num_epochs + current_epoch))
print('Learning Rate for this epoch: {}'.format(learning_rate))
for i, batch in enumerate(train_loader):
images, labels = batch['image'], batch['label']
images = images.clone().detach()
labels = labels.clone().detach()
if use_gpu:
images = images.cuda()
labels = labels.cuda()
optimizer.zero_grad()
pred_labels = cnn(images)
loss = criterion(pred_labels,labels)
loss.backward()
optimizer.step()
So this is my code. It does not seem to be training well.
Please let me know on what could be done to fix this.

About pytorch reduction mean

I want use L1loss and BCELoss with reduction='mean' in vae reconstruction loss
but it produce same result for all different input i.e. result for landmark
so i use reduction='sum' it produce correct result that different output for different input.
how can i use mean reduction??
L1Loss = nn.L1Loss(reduction='mean').to(device)
BCELoss = nn.BCELoss(reduction='mean').to(device)
kld_criterion = KLDLoss(reduction='mean').to(device)
in training
rec_m, (rec_f, mean_f, logvar_f), (rec_l, mean_l, logvar_l) = model(origin)
lm_loss = CELoss(rec_l, lm)
f_loss = L1Loss(rec_f, f)
m_loss = CELoss(rec_m, m)
lm_kld_loss = kld_criterion(mean_l, logvar_l)
f_kld_loss = kld_criterion(mean_f, logvar_f)
loss = 4000*(f_loss + m_loss) + 30 * (lm_kld_loss + f_kld_loss) + 2000 * lm_loss
and model code
class VAE_NET(nn.Module):
def __init__(self, nc=3, ndf=32, nef=32, nz=128, isize=128, device=torch.device("cuda:0"), is_train=True):
super(VAE_NET, self).__init__()
self.nz = nz
# Encoder
self.l_encoder = Encoder(nc=nc, nef=nef, nz=nz, isize=isize, device=device)
self.f_encoder = Encoder(nc=nc, nef=nef, nz=nz, isize=isize, device=device)
# Decoder
self.l_decoder = Decoder(nc=nc, ndf=ndf, nz=nz, isize=isize)
self.m_decoder = Decoder(nc = nc, ndf = ndf, nz = nz * 2, isize = isize)
self.f_decoder = Decoder(nc = nc, ndf = ndf, nz = nz * 2, isize = isize)
if is_train == False:
for param in self.encoder.parameters():
param.requires_grad = False
for param in self.decoder.parameters():
param.requires_grad = False
def forward(self, x):
latent_l, mean_l, logvar_l = self.l_encoder(x)
latent_f, mean_f, logvar_f = self.f_encoder(x)
concat_latent = torch.cat((latent_l, latent_f), 1)
rec_l = self.l_decoder(latent_l)
rec_m = self.m_decoder(concat_latent)
rec_f = self.f_decoder(concat_latent)
return rec_m, (rec_f, mean_f, latent_f), (rec_l, mean_l, latent_l)
l is for face landmark
m is for face mask
f is for face part
reduction='sum' and reduction='mean' differs only by a scalar multiple. There is nothing wrong with your implementation from what I see. If your model only produces correct results with reduction='sum', it is likely that your learning rate is too low (and sum makes up for that difference by amplifying the gradient).

GAN generator producing distinguishable output

I am trying to train a special type of GAN called a Model-Assisted GAN (https://arxiv.org/pdf/1812.00879) using Keras, which takes as an input a vector of 13 input parameters + Gaussian noise, and generate a vector of 6 outputs. The mapping between the vectors of inputs and outputs is non-trivial, as it is related to a high energy physics simulation (in particular the simulation has some inherent randomness). The biggest dependence on the final outputs are encoded in the first five inputs. The biggest difference for this model to a traditional GAN is the use of a Siamese network as the discriminator, and this takes two inputs at a time, so for the same input parameters we provide two sets of possible outputs per training (possible due to the randomness of the simulation), so there are sort of 12 output distributions, but only 6 are unique, which is what we aim to generate. We used a 1D convolutional neural network for both the discriminator and generator.
The current model we have trained seems to reproduce the output distributions for an independent testing sample to reasonably good accuracy (see below plot of histograms overlayed), but there are still some clear differences between the distributions, and the eventual goal is for the model to be able to produce indistinguishable data to the simulation. I have so far tried varying the learning rate and added varying amounts of learning rate decay, tweaking the network architectures, changing some of the hyperparameters of the optimiser, adding some more noise to the discriminator training by implementing some label smoothing and swapping the order of inputs, adding some label smoothing to the generator training, increasing the batch size and also increasing the amount of noise inputs, and I still cannot get the model to perfectly reproduce the output distributions. I am struggling to come up with ideas of what to do next, and I was wondering if anyone else has had a similar problem, whereby the output is not quite perfect, and if so how they might have gone about solving this problem? Any thoughts or tips would be greatly appreciated!
I have included the full code for the training, as well as some plots of the input and output distributions (before applying the Quantile Transformer), the loss plots for the adversarial network and the discriminator (A for Adversarial, S for Siamese (Discriminator)) and then the overlay of the histograms for the generated and true output distributions for the independent testing sample (which is where you can see the small differences that arise).
Thanks in advance.
TRAINING CODE
"""
Training implementation
"""
net_range = [-1,1]
gauss_range = [-5.5,5.5]
mapping = interp1d(gauss_range, net_range)
class ModelAssistedGANPID(object):
def __init__(self, params=64, observables=6):
self.params = params
self.observables = observables
self.Networks = Networks(params=params, observables=observables)
self.siamese = self.Networks.siamese_model()
self.adversarial1 = self.Networks.adversarial1_model()
def train(self, pretrain_steps=4500, train_steps=100000, batch_size=32, train_no=1):
print('Pretraining for ', pretrain_steps,' steps before training for ', train_steps, ' steps')
print('Batch size = ', batch_size)
print('Training number = ', train_no)
'''
Pre-training stage
'''
# Number of tracks for the training + validation sample
n_events = 1728000 + 100000
n_train = n_events - 100000
# Parameters for Gaussian noise
lower = -1
upper = 1
mu = 0
sigma = 1
# import simulation data
print('Loading data...')
kaon_data = pd.read_hdf('PATH')
kaon_data = kaon_data.sample(n=n_events)
kaon_data = kaon_data.reset_index(drop=True)
kaon_data_train = kaon_data[:n_train]
kaon_data_test = kaon_data[n_train:n_events]
print("Producing training data...")
# add all inputs
P_kaon_data_train = kaon_data_train['TrackP']
Pt_kaon_data_train = kaon_data_train['TrackPt']
nTracks_kaon_data_train = kaon_data_train['NumLongTracks']
numRich1_kaon_data_train = kaon_data_train['NumRich1Hits']
numRich2_kaon_data_train = kaon_data_train['NumRich2Hits']
rich1EntryX_kaon_data_train = kaon_data_train['TrackRich1EntryX']
rich1EntryY_kaon_data_train = kaon_data_train['TrackRich1EntryY']
rich1ExitX_kaon_data_train = kaon_data_train['TrackRich1ExitX']
rich1ExitY_kaon_data_train = kaon_data_train['TrackRich1ExitY']
rich2EntryX_kaon_data_train = kaon_data_train['TrackRich2EntryX']
rich2EntryY_kaon_data_train = kaon_data_train['TrackRich2EntryY']
rich2ExitX_kaon_data_train = kaon_data_train['TrackRich2ExitX']
rich2ExitY_kaon_data_train = kaon_data_train['TrackRich2ExitY']
# add different DLL outputs
Dlle_kaon_data_train = kaon_data_train['RichDLLe']
Dlle2_kaon_data_train = kaon_data_train['RichDLLe2']
Dllmu_kaon_data_train = kaon_data_train['RichDLLmu']
Dllmu2_kaon_data_train = kaon_data_train['RichDLLmu2']
Dllk_kaon_data_train = kaon_data_train['RichDLLk']
Dllk2_kaon_data_train = kaon_data_train['RichDLLk2']
Dllp_kaon_data_train = kaon_data_train['RichDLLp']
Dllp2_kaon_data_train = kaon_data_train['RichDLLp2']
Dlld_kaon_data_train = kaon_data_train['RichDLLd']
Dlld2_kaon_data_train = kaon_data_train['RichDLLd2']
Dllbt_kaon_data_train = kaon_data_train['RichDLLbt']
Dllbt2_kaon_data_train = kaon_data_train['RichDLLbt2']
# convert to numpy array
P_kaon_data_train = P_kaon_data_train.to_numpy()
Pt_kaon_data_train = Pt_kaon_data_train.to_numpy()
nTracks_kaon_data_train = nTracks_kaon_data_train.to_numpy()
numRich1_kaon_data_train = numRich1_kaon_data_train.to_numpy()
numRich2_kaon_data_train = numRich2_kaon_data_train.to_numpy()
rich1EntryX_kaon_data_train = rich1EntryX_kaon_data_train.to_numpy()
rich1EntryY_kaon_data_train = rich1EntryY_kaon_data_train.to_numpy()
rich1ExitX_kaon_data_train = rich1ExitX_kaon_data_train.to_numpy()
rich1ExitY_kaon_data_train = rich1ExitY_kaon_data_train.to_numpy()
rich2EntryX_kaon_data_train = rich2EntryX_kaon_data_train.to_numpy()
rich2EntryY_kaon_data_train = rich2EntryY_kaon_data_train.to_numpy()
rich2ExitX_kaon_data_train = rich2ExitX_kaon_data_train.to_numpy()
rich2ExitY_kaon_data_train = rich2ExitY_kaon_data_train.to_numpy()
Dlle_kaon_data_train = Dlle_kaon_data_train.to_numpy()
Dlle2_kaon_data_train = Dlle2_kaon_data_train.to_numpy()
Dllmu_kaon_data_train = Dllmu_kaon_data_train.to_numpy()
Dllmu2_kaon_data_train = Dllmu2_kaon_data_train.to_numpy()
Dllk_kaon_data_train = Dllk_kaon_data_train.to_numpy()
Dllk2_kaon_data_train = Dllk2_kaon_data_train.to_numpy()
Dllp_kaon_data_train = Dllp_kaon_data_train.to_numpy()
Dllp2_kaon_data_train = Dllp2_kaon_data_train.to_numpy()
Dlld_kaon_data_train = Dlld_kaon_data_train.to_numpy()
Dlld2_kaon_data_train = Dlld2_kaon_data_train.to_numpy()
Dllbt_kaon_data_train = Dllbt_kaon_data_train.to_numpy()
Dllbt2_kaon_data_train = Dllbt2_kaon_data_train.to_numpy()
# Reshape arrays
P_kaon_data_train = np.array(P_kaon_data_train).reshape(-1, 1)
Pt_kaon_data_train = np.array(Pt_kaon_data_train).reshape(-1, 1)
nTracks_kaon_data_train = np.array(nTracks_kaon_data_train).reshape(-1, 1)
numRich1_kaon_data_train = np.array(numRich1_kaon_data_train).reshape(-1, 1)
numRich2_kaon_data_train = np.array(numRich2_kaon_data_train).reshape(-1, 1)
rich1EntryX_kaon_data_train = np.array(rich1EntryX_kaon_data_train).reshape(-1, 1)
rich1EntryY_kaon_data_train = np.array(rich1EntryY_kaon_data_train).reshape(-1, 1)
rich1ExitX_kaon_data_train = np.array(rich1ExitX_kaon_data_train).reshape(-1, 1)
rich1ExitY_kaon_data_train = np.array(rich1ExitY_kaon_data_train).reshape(-1, 1)
rich2EntryX_kaon_data_train = np.array(rich2EntryX_kaon_data_train).reshape(-1, 1)
rich2EntryY_kaon_data_train = np.array(rich2EntryY_kaon_data_train).reshape(-1, 1)
rich2ExitX_kaon_data_train = np.array(rich2ExitX_kaon_data_train).reshape(-1, 1)
rich2ExitY_kaon_data_train = np.array(rich2ExitY_kaon_data_train).reshape(-1, 1)
Dlle_kaon_data_train = np.array(Dlle_kaon_data_train).reshape(-1, 1)
Dlle2_kaon_data_train = np.array(Dlle2_kaon_data_train).reshape(-1, 1)
Dllmu_kaon_data_train = np.array(Dllmu_kaon_data_train).reshape(-1, 1)
Dllmu2_kaon_data_train = np.array(Dllmu2_kaon_data_train).reshape(-1, 1)
Dllk_kaon_data_train = np.array(Dllk_kaon_data_train).reshape(-1, 1)
Dllk2_kaon_data_train = np.array(Dllk2_kaon_data_train).reshape(-1, 1)
Dllp_kaon_data_train = np.array(Dllp_kaon_data_train).reshape(-1, 1)
Dllp2_kaon_data_train = np.array(Dllp2_kaon_data_train).reshape(-1, 1)
Dlld_kaon_data_train = np.array(Dlld_kaon_data_train).reshape(-1, 1)
Dlld2_kaon_data_train = np.array(Dlld2_kaon_data_train).reshape(-1, 1)
Dllbt_kaon_data_train = np.array(Dllbt_kaon_data_train).reshape(-1, 1)
Dllbt2_kaon_data_train = np.array(Dllbt2_kaon_data_train).reshape(-1, 1)
inputs_kaon_data_train = np.concatenate((P_kaon_data_train, Pt_kaon_data_train, nTracks_kaon_data_train, numRich1_kaon_data_train, numRich2_kaon_data_train, rich1EntryX_kaon_data_train,
rich1EntryY_kaon_data_train, rich1ExitX_kaon_data_train, rich1ExitY_kaon_data_train, rich2EntryX_kaon_data_train, rich2EntryY_kaon_data_train, rich2ExitX_kaon_data_train, rich2ExitY_kaon_data_train), axis=1)
Dll_kaon_data_train = np.concatenate((Dlle_kaon_data_train, Dllmu_kaon_data_train, Dllk_kaon_data_train, Dllp_kaon_data_train, Dlld_kaon_data_train, Dllbt_kaon_data_train), axis=1)
Dll2_kaon_data_train = np.concatenate((Dlle2_kaon_data_train, Dllmu2_kaon_data_train, Dllk2_kaon_data_train, Dllp2_kaon_data_train, Dlld2_kaon_data_train, Dllbt2_kaon_data_train), axis=1)
print('Transforming inputs and outputs using Quantile Transformer...')
scaler_inputs = QuantileTransformer(output_distribution='normal', n_quantiles=int(1e5), subsample=int(1e10)).fit(inputs_kaon_data_train)
scaler_Dll = QuantileTransformer(output_distribution='normal', n_quantiles=int(1e5), subsample=int(1e10)).fit(Dll_kaon_data_train)
scaler_Dll2 = QuantileTransformer(output_distribution='normal', n_quantiles=int(1e5), subsample=int(1e10)).fit(Dll2_kaon_data_train)
inputs_kaon_data_train = scaler_inputs.transform(inputs_kaon_data_train)
Dll_kaon_data_train = scaler_Dll.transform(Dll_kaon_data_train)
Dll2_kaon_data_train = scaler_Dll2.transform(Dll2_kaon_data_train)
inputs_kaon_data_train = mapping(inputs_kaon_data_train)
Dll_kaon_data_train = mapping(Dll_kaon_data_train)
Dll2_kaon_data_train = mapping(Dll2_kaon_data_train)
# REPEATING FOR TESTING DATA
print("Producing testing data...")
# add all inputs
P_kaon_data_test = kaon_data_test['TrackP']
Pt_kaon_data_test = kaon_data_test['TrackPt']
nTracks_kaon_data_test = kaon_data_test['NumLongTracks']
numRich1_kaon_data_test = kaon_data_test['NumRich1Hits']
numRich2_kaon_data_test = kaon_data_test['NumRich2Hits']
rich1EntryX_kaon_data_test = kaon_data_test['TrackRich1EntryX']
rich1EntryY_kaon_data_test = kaon_data_test['TrackRich1EntryY']
rich1ExitX_kaon_data_test = kaon_data_test['TrackRich1ExitX']
rich1ExitY_kaon_data_test = kaon_data_test['TrackRich1ExitY']
rich2EntryX_kaon_data_test = kaon_data_test['TrackRich2EntryX']
rich2EntryY_kaon_data_test = kaon_data_test['TrackRich2EntryY']
rich2ExitX_kaon_data_test = kaon_data_test['TrackRich2ExitX']
rich2ExitY_kaon_data_test = kaon_data_test['TrackRich2ExitY']
# add different DLL outputs
Dlle_kaon_data_test = kaon_data_test['RichDLLe']
Dlle2_kaon_data_test = kaon_data_test['RichDLLe2']
Dllmu_kaon_data_test = kaon_data_test['RichDLLmu']
Dllmu2_kaon_data_test = kaon_data_test['RichDLLmu2']
Dllk_kaon_data_test = kaon_data_test['RichDLLk']
Dllk2_kaon_data_test = kaon_data_test['RichDLLk2']
Dllp_kaon_data_test = kaon_data_test['RichDLLp']
Dllp2_kaon_data_test = kaon_data_test['RichDLLp2']
Dlld_kaon_data_test = kaon_data_test['RichDLLd']
Dlld2_kaon_data_test = kaon_data_test['RichDLLd2']
Dllbt_kaon_data_test = kaon_data_test['RichDLLbt']
Dllbt2_kaon_data_test = kaon_data_test['RichDLLbt2']
# convert to numpy array
P_kaon_data_test = P_kaon_data_test.to_numpy()
Pt_kaon_data_test = Pt_kaon_data_test.to_numpy()
nTracks_kaon_data_test = nTracks_kaon_data_test.to_numpy()
numRich1_kaon_data_test = numRich1_kaon_data_test.to_numpy()
numRich2_kaon_data_test = numRich2_kaon_data_test.to_numpy()
rich1EntryX_kaon_data_test = rich1EntryX_kaon_data_test.to_numpy()
rich1EntryY_kaon_data_test = rich1EntryY_kaon_data_test.to_numpy()
rich1ExitX_kaon_data_test = rich1ExitX_kaon_data_test.to_numpy()
rich1ExitY_kaon_data_test = rich1ExitY_kaon_data_test.to_numpy()
rich2EntryX_kaon_data_test = rich2EntryX_kaon_data_test.to_numpy()
rich2EntryY_kaon_data_test = rich2EntryY_kaon_data_test.to_numpy()
rich2ExitX_kaon_data_test = rich2ExitX_kaon_data_test.to_numpy()
rich2ExitY_kaon_data_test = rich2ExitY_kaon_data_test.to_numpy()
Dlle_kaon_data_test = Dlle_kaon_data_test.to_numpy()
Dlle2_kaon_data_test = Dlle2_kaon_data_test.to_numpy()
Dllmu_kaon_data_test = Dllmu_kaon_data_test.to_numpy()
Dllmu2_kaon_data_test = Dllmu2_kaon_data_test.to_numpy()
Dllk_kaon_data_test = Dllk_kaon_data_test.to_numpy()
Dllk2_kaon_data_test = Dllk2_kaon_data_test.to_numpy()
Dllp_kaon_data_test = Dllp_kaon_data_test.to_numpy()
Dllp2_kaon_data_test = Dllp2_kaon_data_test.to_numpy()
Dlld_kaon_data_test = Dlld_kaon_data_test.to_numpy()
Dlld2_kaon_data_test = Dlld2_kaon_data_test.to_numpy()
Dllbt_kaon_data_test = Dllbt_kaon_data_test.to_numpy()
Dllbt2_kaon_data_test = Dllbt2_kaon_data_test.to_numpy()
P_kaon_data_test = np.array(P_kaon_data_test).reshape(-1, 1)
Pt_kaon_data_test = np.array(Pt_kaon_data_test).reshape(-1, 1)
nTracks_kaon_data_test = np.array(nTracks_kaon_data_test).reshape(-1, 1)
numRich1_kaon_data_test = np.array(numRich1_kaon_data_test).reshape(-1, 1)
numRich2_kaon_data_test = np.array(numRich2_kaon_data_test).reshape(-1, 1)
rich1EntryX_kaon_data_test = np.array(rich1EntryX_kaon_data_test).reshape(-1, 1)
rich1EntryY_kaon_data_test = np.array(rich1EntryY_kaon_data_test).reshape(-1, 1)
rich1ExitX_kaon_data_test = np.array(rich1ExitX_kaon_data_test).reshape(-1, 1)
rich1ExitY_kaon_data_test = np.array(rich1ExitY_kaon_data_test).reshape(-1, 1)
rich2EntryX_kaon_data_test = np.array(rich2EntryX_kaon_data_test).reshape(-1, 1)
rich2EntryY_kaon_data_test = np.array(rich2EntryY_kaon_data_test).reshape(-1, 1)
rich2ExitX_kaon_data_test = np.array(rich2ExitX_kaon_data_test).reshape(-1, 1)
rich2ExitY_kaon_data_test = np.array(rich2ExitY_kaon_data_test).reshape(-1, 1)
Dlle_kaon_data_test = np.array(Dlle_kaon_data_test).reshape(-1, 1)
Dlle2_kaon_data_test = np.array(Dlle2_kaon_data_test).reshape(-1, 1)
Dllmu_kaon_data_test = np.array(Dllmu_kaon_data_test).reshape(-1, 1)
Dllmu2_kaon_data_test = np.array(Dllmu2_kaon_data_test).reshape(-1, 1)
Dllk_kaon_data_test = np.array(Dllk_kaon_data_test).reshape(-1, 1)
Dllk2_kaon_data_test = np.array(Dllk2_kaon_data_test).reshape(-1, 1)
Dllp_kaon_data_test = np.array(Dllp_kaon_data_test).reshape(-1, 1)
Dllp2_kaon_data_test = np.array(Dllp2_kaon_data_test).reshape(-1, 1)
Dlld_kaon_data_test = np.array(Dlld_kaon_data_test).reshape(-1, 1)
Dlld2_kaon_data_test = np.array(Dlld2_kaon_data_test).reshape(-1, 1)
Dllbt_kaon_data_test = np.array(Dllbt_kaon_data_test).reshape(-1, 1)
Dllbt2_kaon_data_test = np.array(Dllbt2_kaon_data_test).reshape(-1, 1)
inputs_kaon_data_test = np.concatenate((P_kaon_data_test, Pt_kaon_data_test, nTracks_kaon_data_test, numRich1_kaon_data_test, numRich2_kaon_data_test, rich1EntryX_kaon_data_test, rich1EntryY_kaon_data_test, rich1ExitX_kaon_data_test, rich1ExitY_kaon_data_test, rich2EntryX_kaon_data_test, rich2EntryY_kaon_data_test, rich2ExitX_kaon_data_test, rich2ExitY_kaon_data_test), axis=1)
Dll_kaon_data_test = np.concatenate((Dlle_kaon_data_test, Dllmu_kaon_data_test, Dllk_kaon_data_test, Dllp_kaon_data_test, Dlld_kaon_data_test, Dllbt_kaon_data_test), axis=1)
Dll2_kaon_data_test = np.concatenate((Dlle2_kaon_data_test, Dllmu2_kaon_data_test, Dllk2_kaon_data_test, Dllp2_kaon_data_test, Dlld2_kaon_data_test, Dllbt2_kaon_data_test), axis=1)
print('Transforming inputs and outputs using Quantile Transformer...')
inputs_kaon_data_test = scaler_inputs.transform(inputs_kaon_data_test)
Dll_kaon_data_test = scaler_Dll.transform(Dll_kaon_data_test)
Dll2_kaon_data_test = scaler_Dll.transform(Dll2_kaon_data_test)
inputs_kaon_data_test = mapping(inputs_kaon_data_test)
Dll_kaon_data_test = mapping(Dll_kaon_data_test)
Dll2_kaon_data_test = mapping(Dll2_kaon_data_test)
# Producing testing data
params_list_test = np.random.normal(loc=mu, scale=sigma, size=[len(kaon_data_test), self.params])
for e in range(len(kaon_data_test)):
params_list_test[e][0] = inputs_kaon_data_test[e][0]
params_list_test[e][1] = inputs_kaon_data_test[e][1]
params_list_test[e][2] = inputs_kaon_data_test[e][2]
params_list_test[e][3] = inputs_kaon_data_test[e][3]
params_list_test[e][4] = inputs_kaon_data_test[e][4]
params_list_test[e][5] = inputs_kaon_data_test[e][5]
params_list_test[e][6] = inputs_kaon_data_test[e][6]
params_list_test[e][7] = inputs_kaon_data_test[e][7]
params_list_test[e][8] = inputs_kaon_data_test[e][8]
params_list_test[e][9] = inputs_kaon_data_test[e][9]
params_list_test[e][10] = inputs_kaon_data_test[e][10]
params_list_test[e][11] = inputs_kaon_data_test[e][11]
params_list_test[e][12] = inputs_kaon_data_test[e][12]
obs_simu_1_test = np.zeros((len(kaon_data_test), self.observables, 1))
obs_simu_1_test.fill(-1)
for e in range(len(kaon_data_test)):
obs_simu_1_test[e][0][0] = Dll_kaon_data_test[e][0]
obs_simu_1_test[e][1][0] = Dll_kaon_data_test[e][1]
obs_simu_1_test[e][2][0] = Dll_kaon_data_test[e][2]
obs_simu_1_test[e][3][0] = Dll_kaon_data_test[e][3]
obs_simu_1_test[e][4][0] = Dll_kaon_data_test[e][4]
obs_simu_1_test[e][5][0] = Dll_kaon_data_test[e][5]
obs_simu_2_test = np.zeros((len(kaon_data_test), self.observables, 1))
obs_simu_2_test.fill(-1)
for e in range(len(kaon_data_test)):
obs_simu_2_test[e][0][0] = Dll2_kaon_data_test[e][0]
obs_simu_2_test[e][1][0] = Dll2_kaon_data_test[e][1]
obs_simu_2_test[e][2][0] = Dll2_kaon_data_test[e][2]
obs_simu_2_test[e][3][0] = Dll2_kaon_data_test[e][3]
obs_simu_2_test[e][4][0] = Dll2_kaon_data_test[e][4]
obs_simu_2_test[e][5][0] = Dll2_kaon_data_test[e][5]
event_no_par = 0
event_no_obs_1 = 0
event_no_obs_2 = 0
d1_hist, d2_hist, d_hist, g_hist, a1_hist, a2_hist = list(), list(), list(), list(), list(), list()
print('Beginning pre-training...')
'''
#Pre-training stage
'''
for train_step in range(pretrain_steps):
log_mesg = '%d' % train_step
noise_value = 0.3
params_list = np.random.normal(loc=mu,scale=sigma, size=[batch_size, self.params])
y_ones = np.ones([batch_size, 1])
y_zeros = np.zeros([batch_size, 1])
# add physics parameters + noise to params_list
for b in range(batch_size):
params_list[b][0] = inputs_kaon_data_train[event_no_par][0]
params_list[b][1] = inputs_kaon_data_train[event_no_par][1]
params_list[b][2] = inputs_kaon_data_train[event_no_par][2]
params_list[b][3] = inputs_kaon_data_train[event_no_par][3]
params_list[b][4] = inputs_kaon_data_train[event_no_par][4]
params_list[b][5] = inputs_kaon_data_train[event_no_par][5]
params_list[b][6] = inputs_kaon_data_train[event_no_par][6]
params_list[b][7] = inputs_kaon_data_train[event_no_par][7]
params_list[b][8] = inputs_kaon_data_train[event_no_par][8]
params_list[b][9] = inputs_kaon_data_train[event_no_par][9]
params_list[b][10] = inputs_kaon_data_train[event_no_par][10]
params_list[b][11] = inputs_kaon_data_train[event_no_par][11]
params_list[b][12] = inputs_kaon_data_train[event_no_par][12]
event_no_par += 1
# Step 1
# simulated observables (number 1)
obs_simu_1 = np.zeros((batch_size, self.observables, 1))
obs_simu_1.fill(-1)
for b in range(batch_size):
obs_simu_1[b][0][0] = Dll_kaon_data_train[event_no_obs_1][0]
obs_simu_1[b][1][0] = Dll_kaon_data_train[event_no_obs_1][1]
obs_simu_1[b][2][0] = Dll_kaon_data_train[event_no_obs_1][2]
obs_simu_1[b][3][0] = Dll_kaon_data_train[event_no_obs_1][3]
obs_simu_1[b][4][0] = Dll_kaon_data_train[event_no_obs_1][4]
obs_simu_1[b][5][0] = Dll_kaon_data_train[event_no_obs_1][5]
event_no_obs_1 += 1
obs_simu_1_copy = np.copy(obs_simu_1)
# simulated observables (Gaussian smeared - number 2)
obs_simu_2 = np.zeros((batch_size, self.observables, 1))
obs_simu_2.fill(-1)
for b in range(batch_size):
obs_simu_2[b][0][0] = Dll2_kaon_data_train[event_no_obs_2][0]
obs_simu_2[b][1][0] = Dll2_kaon_data_train[event_no_obs_2][1]
obs_simu_2[b][2][0] = Dll2_kaon_data_train[event_no_obs_2][2]
obs_simu_2[b][3][0] = Dll2_kaon_data_train[event_no_obs_2][3]
obs_simu_2[b][4][0] = Dll2_kaon_data_train[event_no_obs_2][4]
obs_simu_2[b][5][0] = Dll2_kaon_data_train[event_no_obs_2][5]
event_no_obs_2 += 1
obs_simu_2_copy = np.copy(obs_simu_2)
# emulated DLL values
obs_emul = self.emulator.predict(params_list)
obs_emul_copy = np.copy(obs_emul)
# decay the learn rate
if(train_step % 1000 == 0 and train_step>0):
siamese_lr = K.eval(self.siamese.optimizer.lr)
K.set_value(self.siamese.optimizer.lr, siamese_lr*0.7)
print('lr for Siamese network updated from %f to %f' % (siamese_lr, siamese_lr*0.7))
adversarial1_lr = K.eval(self.adversarial1.optimizer.lr)
K.set_value(self.adversarial1.optimizer.lr, adversarial1_lr*0.7)
print('lr for Adversarial1 network updated from %f to %f' % (adversarial1_lr, adversarial1_lr*0.7))
loss_simu_list = [obs_simu_1_copy, obs_simu_2_copy]
loss_fake_list = [obs_simu_1_copy, obs_emul_copy]
input_val = 0
# swap which inputs to give to Siamese network
if(np.random.random() < 0.5):
loss_simu_list[0], loss_simu_list[1] = loss_simu_list[1], loss_simu_list[0]
if(np.random.random() < 0.5):
loss_fake_list[0] = obs_simu_2_copy
input_val = 1
# noise
y_ones = np.array([np.random.uniform(0.97, 1.00) for x in range(batch_size)]).reshape([batch_size, 1])
y_zeros = np.array([np.random.uniform(0.00, 0.03) for x in range(batch_size)]).reshape([batch_size, 1])
if(input_val == 0):
if np.random.random() < noise_value:
for b in range(batch_size):
if np.random.random() < noise_value:
obs_simu_1_copy[b], obs_simu_2_copy[b] = obs_simu_2[b], obs_simu_1[b]
obs_simu_1_copy[b], obs_emul_copy[b] = obs_emul[b], obs_simu_1[b]
if(input_val == 1):
if np.random.random() < noise_value:
for b in range(batch_size):
if np.random.random() < noise_value:
obs_simu_1_copy[b], obs_simu_2_copy[b] = obs_simu_2[b], obs_simu_1[b]
obs_simu_2_copy[b], obs_emul_copy[b] = obs_emul[b], obs_simu_2[b]
# train siamese
d_loss_simu = self.siamese.train_on_batch(loss_simu_list, y_ones)
d_loss_fake = self.siamese.train_on_batch(loss_fake_list, y_zeros)
d_loss = 0.5 * np.add(d_loss_simu, d_loss_fake)
log_mesg = '%s [S loss: %f]' % (log_mesg, d_loss[0])
#print(log_mesg)
#print('--------------------')
#noise_value*=0.999
#Step 2
# train emulator
a_loss_list = [obs_simu_1, params_list]
a_loss = self.adversarial1.train_on_batch(a_loss_list, y_ones)
log_mesg = '%s [E loss: %f]' % (log_mesg, a_loss[0])
print(log_mesg)
print('--------------------')
noise_value*=0.999
if __name__ == '__main__':
params_physics = 13
params_noise = 51 #51 looks ok, 61 is probably best, 100 also works
params = params_physics + params_noise
observables= 6
train_no = 1
magan = ModelAssistedGANPID(params=params, observables=observables)
magan.train(pretrain_steps=11001, train_steps=10000, batch_size=32, train_no=train_no)
NETWORKS
class Networks(object):
def __init__(self, noise_size=100, params=64, observables=5):
self.noise_size = noise_size
self.params = params
self.observables = observables
self.E = None # emulator
self.S = None # siamese
self.SM = None # siamese model
self.AM1 = None # adversarial model 1
'''
Emulator: generate identical observable parameters to those of the simulator S when both E and S are fed with the same input parameters
'''
def emulator(self):
if self.E:
return self.E
# input params
# the model takes as input an array of shape (*, self.params = 6)
input_params_shape = (self.params,)
input_params_layer = Input(shape=input_params_shape, name='input_params')
# architecture
self.E = Dense(1024)(input_params_layer)
self.E = LeakyReLU(0.2)(self.E)
self.E = Dense(self.observables*128, kernel_initializer=initializers.RandomNormal(stddev=0.02))(self.E)
self.E = LeakyReLU(0.2)(self.E)
self.E = Reshape((self.observables, 128))(self.E)
self.E = UpSampling1D(size=2)(self.E)
self.E = Conv1D(64, kernel_size=7, padding='valid')(self.E)
self.E = LeakyReLU(0.2)(self.E)
self.E = UpSampling1D(size=2)(self.E)
self.E = Conv1D(1, kernel_size=7, padding='valid', activation='tanh')(self.E)
# model
self.E = Model(inputs=input_params_layer, outputs=self.E, name='emulator')
# print
print("Emulator")
self.E.summary()
return self.E
'''
Siamese: determine the similarity between output values produced by the simulator and emulator
'''
def siamese(self):
if self.S:
return self.S
# input DLL images
input_shape = (self.observables, 1)
input_layer_anchor = Input(shape=input_shape, name='input_layer_anchor')
input_layer_candid = Input(shape=input_shape, name='input_layer_candidate')
input_layer = Input(shape=input_shape, name='input_layer')
# siamese
cnn = Conv1D(64, kernel_size=8, strides=2, padding='same',
kernel_initializer=initializers.RandomNormal(stddev=0.02))(input_layer)
cnn = LeakyReLU(0.2)(cnn)
cnn = Conv1D(128, kernel_size=5, strides=2, padding='same')(cnn)
cnn = LeakyReLU(0.2)(cnn)
cnn = Flatten()(cnn)
cnn = Dense(128, activation='sigmoid')(cnn)
cnn = Model(inputs=input_layer, outputs=cnn, name='cnn')
# left and right encodings
encoded_l = cnn(input_layer_anchor)
encoded_r = cnn(input_layer_candid)
# merge two encoded inputs with the L1 or L2 distance between them
L1_distance = lambda x: K.abs(x[0]-x[1])
L2_distance = lambda x: (x[0]-x[1]+K.epsilon())**2/(x[0]+x[1]+K.epsilon())
both = Lambda(L2_distance)([encoded_l, encoded_r])
prediction = Dense(1, activation='sigmoid')(both)
# model
self.S = Model([input_layer_anchor, input_layer_candid], outputs=prediction, name='siamese')
# print
print("Siamese:")
self.S.summary()
print("Siamese CNN:")
cnn.summary()
return self.S
'''
Siamese model
'''
def siamese_model(self):
if self.SM:
return self.SM
# optimizer
optimizer = Adam(lr=0.004, beta_1=0.5, beta_2=0.9)
# input DLL values
input_shape = (self.observables, 1)
input_layer_anchor = Input(shape=input_shape, name='input_layer_anchor')
input_layer_candid = Input(shape=input_shape, name='input_layer_candidate')
input_layer = [input_layer_anchor, input_layer_candid]
# discriminator
siamese_ref = self.siamese()
siamese_ref.trainable = True
self.SM = siamese_ref(input_layer)
# model
self.SM = Model(inputs=input_layer, outputs=self.SM, name='siamese_model')
self.SM.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[metrics.binary_accuracy])
print("Siamese model")
self.SM.summary()
return self.SM
'''
Adversarial 1 model (adversarial pre-training phase) - this is where the emulator and siamese network are trained to enable the emulator to generate DLL values for a set of given physics inputs
'''
def adversarial1_model(self):
if self.AM1:
return self.AM1
optimizer = Adam(lr=0.0004, beta_1=0.5, beta_2=0.9)
# input 1: simulated DLL values
input_obs_shape = (self.observables, 1)
input_obs_layer = Input(shape=input_obs_shape, name='input_obs')
# input 2: params
input_params_shape = (self.params, )
input_params_layer = Input(shape=input_params_shape, name='input_params')
# emulator
emulator_ref = self.emulator()
emulator_ref.trainable = True
self.AM1 = emulator_ref(input_params_layer)
# siamese
siamese_ref = self.siamese()
siamese_ref.trainable = False
self.AM1 = siamese_ref([input_obs_layer, self.AM1])
# model
input_layer = [input_obs_layer, input_params_layer]
self.AM1 = Model(inputs=input_layer, outputs=self.AM1, name='adversarial_1_model')
self.AM1.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[metrics.binary_accuracy])
# print
print("Adversarial 1 model:")
self.AM1.summary()
return self.AM1
INPUTS PLOT
OUTPUTS PLOT
LOSS PLOT
GENERATED OUTPUT (ORANGE) vs TRUE OUTPUT (BLUE)

very large value of loss in AlexNet

Actually I am using AlexNet to classify my images in 2 groups , I am feeding images to the model in a batch of 60 images and the loss which I am getting after every batch is 6 to 7 digits large (for ex. 1428529.0) , here I am confused that why my loss is such a large value because on MNIST dataset the loss which I got was very small as compared to this. Can anyone explain me why I am getting such a large loss value.
Thanks in advance ;-)
Here is the code :-
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os
img_size = 227
num_channels = 1
img_flat_size = img_size * img_size
num_classes = 2
drop = 0.5
x = tf.placeholder(tf.float32,[None,img_flat_size])
y = tf.placeholder(tf.float32,[None,num_classes])
drop_p = tf.placeholder(tf.float32)
def new_weight(shape):
return tf.Variable(tf.random_normal(shape))
def new_bias(size):
return tf.Variable(tf.random_normal(size))
def new_conv(x,num_input_channels,filter_size,num_filters,stride,padd="SAME"):
shape = [filter_size,filter_size,num_input_channels,num_filters]
weight = new_weight(shape)
bias = new_bias([num_filters])
conv = tf.nn.conv2d(x,weight,strides=[1,stride,stride,1],padding=padd)
conv = tf.nn.bias_add(conv,bias)
return tf.nn.relu(conv)
def new_max_pool(x,k,stride):
max_pool = tf.nn.max_pool(x,ksize=[1,k,k,1],strides=[1,stride,stride,1],padding="VALID")
return max_pool
def flatten_layer(layer):
layer_shape = layer.get_shape()
num_features = layer_shape[1:4].num_elements()
flat_layer = tf.reshape(layer,[-1,num_features])
return flat_layer,num_features
def new_fc_layer(x,num_input,num_output):
weight = new_weight([num_input,num_output])
bias = new_bias([num_output])
fc_layer = tf.matmul(x,weight) + bias
return fc_layer
def lrn(x, radius, alpha, beta, bias=1.0):
"""Create a local response normalization layer."""
return tf.nn.local_response_normalization(x, depth_radius=radius,
alpha=alpha, beta=beta,
bias=bias)
def AlexNet(x,drop,img_size):
x = tf.reshape(x,shape=[-1,img_size,img_size,1])
conv1 = new_conv(x,num_channels,11,96,4,"VALID")
max_pool1 = new_max_pool(conv1,3,2)
norm1 = lrn(max_pool1, 2, 2e-05, 0.75)
conv2 = new_conv(norm1,96,5,256,1)
max_pool2 = new_max_pool(conv2,3,2)
norm2 = lrn(max_pool2, 2, 2e-05, 0.75)
conv3 = new_conv(norm2,256,3,384,1)
conv4 = new_conv(conv3,384,3,384,1)
conv5 = new_conv(conv4,384,3,256,1)
max_pool3 = new_max_pool(conv5,3,2)
layer , num_features = flatten_layer(max_pool3)
fc1 = new_fc_layer(layer,num_features,4096)
fc1 = tf.nn.relu(fc1)
fc1 = tf.nn.dropout(fc1,drop)
fc2 = new_fc_layer(fc1,4096,4096)
fc2 = tf.nn.relu(fc2)
fc2 = tf.nn.dropout(fc2,drop)
out = new_fc_layer(fc2,4096,2)
return out #, tf.nn.softmax(out)
def read_and_decode(tfrecords_file, batch_size):
'''read and decode tfrecord file, generate (image, label) batches
Args:
tfrecords_file: the directory of tfrecord file
batch_size: number of images in each batch
Returns:
image: 4D tensor - [batch_size, width, height, channel]
label: 1D tensor - [batch_size]
'''
# make an input queue from the tfrecord file
filename_queue = tf.train.string_input_producer([tfrecords_file])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
img_features = tf.parse_single_example(
serialized_example,
features={
'label': tf.FixedLenFeature([], tf.int64),
'image_raw': tf.FixedLenFeature([], tf.string),
})
image = tf.decode_raw(img_features['image_raw'], tf.uint8)
##########################################################
# you can put data augmentation here, I didn't use it
##########################################################
# all the images of notMNIST are 28*28, you need to change the image size if you use other dataset.
image = tf.reshape(image, [227, 227])
label = tf.cast(img_features['label'], tf.int32)
image_batch, label_batch = tf.train.batch([image, label],
batch_size= batch_size,
num_threads= 1,
capacity = 6000)
return tf.reshape(image_batch,[batch_size,227*227*1]), tf.reshape(label_batch, [batch_size])
pred = AlexNet(x,drop_p,img_size) #pred
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred,labels=y))
optimiser = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(loss)
correct_pred = tf.equal(tf.argmax(pred,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))
cost = tf.summary.scalar('loss',loss)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
merge_summary = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter('./AlexNet',graph = tf.get_default_graph())
tf_record_file = 'train.tfrecords'
x_val ,y_val = read_and_decode(tf_record_file,20)
y_val = tf.one_hot(y_val,depth=2,on_value=1,off_value=0)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
x_val = x_val.eval()
y_val = y_val.eval()
epoch = 2
for i in range(epoch):
_, summary= sess.run([optimiser,merge_summary],feed_dict={x:x_val,y:y_val,drop_p:drop})
summary_writer.add_summary(summary,i)
loss_a,accu = sess.run([loss,accuracy],feed_dict={x:x_val,y:y_val,drop_p:1.0})
print "Epoch "+str(i+1) +', Minibatch Loss = '+ \
"{:.6f}".format(loss_a) + ', Training Accuracy = '+ \
'{:.5f}'.format(accu)
print "Optimization Finished!"
tf_record_file1 = 'test.tfrecords'
x_v ,y_v = read_and_decode(tf_record_file1,10)
y_v = tf.one_hot(y_v,depth=2,on_value=1,off_value=0)
coord1 = tf.train.Coordinator()
threads1 = tf.train.start_queue_runners(coord=coord1)
x_v = sess.run(x_v)
y_v = sess.run(y_v)
print "Testing Accuracy : "
print sess.run(accuracy,feed_dict={x:x_v,y:y_v,drop_p:1.0})
coord.request_stop()
coord.join(threads)
coord1.request_stop()
coord1.join(threads1)
Take a look a what a confusion matrix is. It is a performance evaluator. In addition, you should compare your precision versus your recall. Precision is the accuracy of your positive predictions and recall is the ratio of positive instances that are correctly detected by the classifier. By combining both precision and recall, you get the F_1 score which is keep in evaluating the problems of your model.
I would suggest you pick up the text Hands-On Machine Learning with Scikit-Learn and TensorFlow. It is a truly comprehensive book and covers what I describe above in more detail.

Resources