PyTorch Siamese Network Oscillating / Fluctuating Loss Function - image-processing

I have implemented a siamese NN for regression using the resnet18 for transfer learning. The goal is to calculate the correlation coefficient between two images, since we do not have raw data but only images for a specific signal. We want to measure similarity between images. However the loss function of my nn is always oscillating up and down.
Code below:
Model itself
class firstNN(nn.Module):
def __init__(self):
# wofür ist das da?
super(firstNN, self).__init__()
self.resnet = models.resnet18(pretrained=True)
for param in self.resnet.parameters():
param.requires_grad = False
# over-write the first conv layer to be able to read images
# as resnet18 reads (3,x,x) where 3 is RGB channels
# whereas MNIST has (1,x,x) where 1 is a gray-scale channel
self.resnet.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
self.fc_in_features = self.resnet.fc.in_features
# remove the last layer of resnet18 (linear layer which is before avgpool layer)
self.resnet = torch.nn.Sequential(*(list(self.resnet.children())[:-1]))
# add linear layers to compare between the features of the two images
self.fc = nn.Sequential(
nn.Linear(self.fc_in_features, hidden_dim),
torch.nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim2),
torch.nn.ReLU(),
nn.Linear(hidden_dim2,hidden_dim3),
torch.nn.ReLU(),
nn.Linear(hidden_dim3,1),
# nn.ReLU(),
# nn.Linear(input_dim, third_dim),
)
# Distance function
self.binary = False
# Get params and register optimizer
info, params = self.get_model_params()
#self.optimizer = optim.Adam(params, lr=learning_rate,
# weight_decay=weight_decay)
# self.optimizer = optim.SGD(params, lr=learning_rate,
# momentum=0.5)
#self.criterion = nn.BCELoss()
#self.criterion = nn.MSELoss()
LOGGER.info(info)
# Initialisiert die weights mit "random" Werten
def init_layers(self):
nn.init.xavier_normal(self.fc[0].weight.data).to(device)
nn.init.xavier_normal(self.fc[2].weight.data).to(device)
# Erstellt NN mit dem input, inputs ist unser batch
def siamese_basic(self, inputs):
output = self.resnet(inputs)
output = output.view(output.size()[0], -1)
output = self.fc(output)
return output
def distance_layer(self, vec1, vec2, distance='cos'):
if distance == 'cos':
similarity = F.cosine_similarity(
vec1 + 1e-16, vec2 + 1e-16, dim=-1)
elif distance == 'l1':
similarity = self.dist_fc(torch.abs(vec1 - vec2))
similarity = similarity.squeeze(1)
elif distance == 'l2':
similarity = self.dist_fc(torch.abs(vec1 - vec2) ** 2)
similarity = similarity.squeeze(1)
ic()
#if self.binary:
# similarity = F.sigmoid(similarity)
return similarity
def forward(self, template, img):
embed1 = self.siamese_basic(template)
embed2 = self.siamese_basic(img)
# print(f"Before reshape embed2 {embed2.shape}")
# print(f"Befor reshape embed1 {embed1.shape}")
embed1 = embed1.reshape(template.shape[0],-1).float()
embed2 = embed2.reshape(img.shape[0],-1).float()
similarity = self.distance_layer(embed1, embed2)
# print(f"embed2 {embed2.shape}")
# print(f"embed1 {embed1.shape}")
# print(f"similarity {similarity.shape}")
ic()
return similarity#, embed1, embed2
def get_loss(self, outputs, targets):
loss = self.criterion(outputs, targets)
ic()
return loss
def get_model_params(self):
params = []
total_size = 0
def multiply_iter(p_list):
out = 1
for p in p_list:
out *= p
return out
for p in self.parameters():
if p.requires_grad:
params.append(p)
total_size += multiply_iter(p.size())
return '{}\nparam size: {:,}\n'.format(self, total_size), params
def save_checkpoint(self, state, checkpoint_dir, filename):
filename = checkpoint_dir + filename
LOGGER.info('Save checkpoint %s' % filename)
torch.save(state, filename)
def load_checkpoint(self, checkpoint_dir, filename):
filename = checkpoint_dir + filename
LOGGER.info('Load checkpoint %s' % filename)
checkpoint = torch.load(filename)
self.load_state_dict(checkpoint['state_dict'])
self.optimizer.load_state_dict(checkpoint['optimizer'])
Choice of criterion etc
model = firstNN()
criterion = nn.MSELoss()
#optimizer = optim.Adam(model.parameters(), lr=learning_rate,
# weight_decay=weight_decay)
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
training_data = CustomImageDataset("")
# Create data loaders.
train_loader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
hidden_dim = 128
hidden_dim2 = 64
hidden_dim3 = 32
do_learn = True
save_frequency = 2
batch_size = 40 if torch.cuda.is_available() else 64
learning_rate = 0.0001
num_epochs = 15
weight_decay = 0.1
momentum = 0.9
loss_history = []
r2_history = []
loss_history2 = []
r2_history2 = []
LOGGER = logging.getLogger(__name__)
torch.cuda.empty_cache()
model = firstNN().to(device)
model.train()
for epoch in range (num_epochs):
running_r2 = 0.0
running_loss = 0.0
for batch_idx, (templates, images, targets) in enumerate(train_loader):
templates = templates.unsqueeze(1).float().to(device)
images = images.unsqueeze(1).float().to(device)
targets = targets.float().to(device)
optimizer.zero_grad()
outputs = model(templates, images)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
r2score = torchmetrics.R2Score().to(device)
rscore = r2score(outputs, torch.tensor(targets).squeeze())
running_loss += loss.item()
running_r2 += rscore.item()
loss_history2.append(loss.item())
r2_history2.append(rscore.item())
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tR2Score: {}'.format(
epoch, batch_idx * len(templates), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item(), rscore ))
running_loss = running_loss / len(train_loader)
running_r2 = running_r2 / len(train_loader)
loss_history.append(running_loss)
r2_history.append(running_r2)
Example of images with spearman correlation of 0.45
Example of Oscillating loss and r2
I have tried using several different learning rates and experimented with weight decay and change of optimizer / nn setup but I dont understant exactly how to combat the issue.

Related

Pytorch Batch Size issue when comparing outputs form model and labels

Im having issues with the input and output size being halfed from 16 to 8 when running through my model.I've tried tweaking the input/output size between the maxpool and linear layer, that doesn't work. I was wondering if it has something to do with my loss criterion inputs or if the model should be outputting 16 instead of 8.
import torch
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from os import listdir
import os
from os.path import isdir
from torchsummary import summary
# Define the preprocessing steps
transform = transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# Define the custom dataset
class VideoDataset(Dataset):
def __init__(self, data_dir, transform=None):
self.data_dir = data_dir
self.transform = transform
def __len__(self):
return len(self.data_dir)
def __getitem__(self, idx):
video_dir = self.data_dir[idx]
print(video_dir)
video = []
for i in range(10): # For example, each video has 10 frames
img = Image.open(f"{video_dir}/frame_{i}.jpg")
if self.transform:
img = self.transform(img)
video.append(img)
video = torch.stack(video)
if(video_dir.find("squat")):
label = 1
if(video_dir.find("pull")):
label = 0
else:
label = 0
# label = str(video_dir.split("/")[-2]) # Assume the class label is included in the video directory name
sample = {'video': video, 'label': label}
#print(sample)
return sample
# Load the data
path = "videos/squat/"
path_pullups = "videos/pull ups/"
path_situp = "videos/situp/"
data_dir = list()
for file in os.scandir(path):
if file.is_dir():
data_dir.append(path + file.name)
for file in os.scandir(path_pullups):
if file.is_dir():
data_dir.append(path_pullups + file.name)
for file in os.scandir(path_situp):
if file.is_dir():
data_dir.append(path_situp + file.name)
print(len(data_dir)/2)
# Split the data into training and validation sets
train_data = VideoDataset(data_dir[:243], transform=transform) # Use first two classes for training
#print("train" + str(train_data.data_dir))
#valid_data = VideoDataset(data_dir[165:], transform=transform) # Use last class for validation
# Define the data loaders
train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
#valid_loader = DataLoader(valid_data, batch_size=16, shuffle=False)
# Define the CNN model
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = torch.nn.Conv3d(10, 16, kernel_size=(3, 3, 3), stride=1, padding=1)
self.pool = torch.nn.MaxPool3d(kernel_size=(2, 2, 2), stride=2, padding=0)
self.fc1 = torch.nn.Linear(16 * 8 * 8 * 8, 32) #16*16*2
self.fc2 = torch.nn.Linear(32, 3)
self.fc3 = torch.nn.Linear(3, 1)
def forward(self, x):
x = self.pool(torch.nn.functional.relu(self.conv1(x)))
x = x.view(-1, 16 * 8 * 8 * 8)
x = torch.nn.functional.relu(self.fc1(x))
x = self.fc2(x)
x = self.fc3(x)
x = torch.sigmoid(x)
return x
# Initialize the model, loss function, and optimizer
model = Net()
criterion = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# Train the model
for epoch in range(10): # Train for 10 epochs
running_loss = 0.0
for i, data in enumerate(train_loader, 0):
inputs, labels = data['video'], data['label']
# .view(-1,1)
outputs = model(inputs)
#if labels.shape[0] != outputs.shape[0]:
# labels = labels.view(-1, outputs.shape[0]).t()
summary(model, (10, 3, 32, 32), device='cpu')
print("Labels size:" + str(labels.shape))
print("Outputs size:" + str(outputs.shape))
print(outputs, labels)
#####################################################################
loss = criterion(outputs, labels) #### error here
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch + 1} loss: {running_loss / (i + 1)}")
# Evaluate the model
# correct = 0
# total = 0
# with torch.no_grad():
# for data in valid_loader:
# inputs, labels = data['video'], data['label']
# outputs = model(inputs)
# _, predicted = torch.max(outputs.data, 1)
# total += labels.size(0)
# correct += (predicted == labels).sum().item()
# print(f"Accuracy of the model on the validation set: {100 * correct / total}%")
Sample inputs are frames from video clips like this:
described by their exercise category such as squats, situps, pullups, etc.
Desired outputs for this model would be a binary representation of either 1 or 0 that each exercise given is a squat or not as labeled and indicated in the dataset customization function.

tgt and src have to have equal features for a Transformer Network in Pytorch

I am attempting to train EEG data through a transformer network. The input dimensions are 50x16684x60 (seq x batch x features) and the output is 16684x2. Right now I am simply trying to run a basic transformer, and I keep getting an error telling me
RuntimeError: the feature number of src and tgt must be equal to d_model
Why would the source and target feature number ever be equal? Is it possible to run such a dataset through a transformer?
Here is my basic model:
input_size = 60 # seq x batch x features
hidden_size = 32
num_classes = 2
learning_rate = 0.001
batch_size = 64
num_epochs = 2
sequence_length = 50
num_layers = 2
dropout = 0.5
class Transformer(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(Transformer, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.transformer = nn.Transformer(60, 2)
self.fc = nn.Linear(hidden_size * sequence_length, num_classes)
def forward(self, x, y):
# Forward Propogation
out, _ = self.transformer(x,y)
out = out.reshape(out.shape[0], -1)
out = self.fc(out)
return out
model = Transformer(input_size, hidden_size, num_layers, num_classes)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
for index in tqdm(range(16684)):
X, y = (X_train[index], Y_train[index])
print(X.shape, y.shape)
output = model(X, y)
loss = criterion(output, y)
model.zero_grad()
loss.backward()
optimizer.step()
if index % 500 == 0:
print(f"Epoch {epoch}, Batch: {index}, Loss: {loss}")
You train the model to find some features by feeding it the input sequence and desired sequence. The backprop trains the net by computing the loss as a "difference" between src and target features.
If the features sizes aren't the same - the backprop can't find the accordance to some desired feature and the model can't be trained.

When training a multi class CNN with PyTorch displays extraordinarily large loss

I am currently trying train a CNN using PyTorch to predict a subject's age. The age group ranges from 0 to 116. I used the same model to train it on gender classification with two options: male or female.
I ported the same code for the age classification, I was getting errors. The error was due to our last fully connected layer not return a large enough output (in terms of matrix size, it was initially returning a 50 x 2 matrix due to our gender classifier but I switched it to 50 x 117 for the age classification based on the total age options.)
My issue now is that the training loop prints epochs with a huge loss (~3.5 while before, when training the gender classification, it was sub zero.)
Below is my code:
DataLoader class:
class MyDataset(Dataset):
def __init__(self, root_directory, csv_file, image_path, transform = None):
annotated_path = os.path.relpath(csv_file) # Path to UTKFace Dataset and Annotations
self.read_in_csv = pd.read_csv(annotated_path, index_col=False)
self.image_path = os.path.join(root_directory, image_path)
self.transform = transform
self.labels = np.asarray(self.read_in_csv.loc[:,'age'])
def __getitem__(self, index):
attr = self.labels[index]
image_name = str(self.read_in_csv.loc[index, 'file'])
image = Image.open(image_name)
if self.transform:
image = self.transform(image)
dict = {'image':image, 'label':attr}
return dict
def __len__(self):
return len(self.read_in_csv.index)
CNN Architecture:
class ConvolutionalNN(nn.Module):
def __init__(self):
super(ConvolutionalNN,self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(3,96,kernel_size=7,stride=4),
nn.BatchNorm2d(96), # Number of Features
nn.ReLU(),
nn.MaxPool2d(kernel_size=3,stride=2))
self.layer2 = nn.Sequential(
nn.Conv2d(96,256,kernel_size=5,padding=2),
nn.BatchNorm2d(256),
nn.ReLU(), # Default = False
nn.MaxPool2d(kernel_size=3,stride=2))
self.layer3 = nn.Sequential(
nn.Conv2d(256,384,kernel_size=3,padding=1),
nn.BatchNorm2d(384),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3,stride=2))
self.fc1 = nn.Linear(384*6*6,512)
self.fc2 = nn.Linear(512,512)
self.fc3 = nn.Linear(512,117)
def forward(self,x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = out.view(out.size(0),-1)
#print out.size()
out = F.dropout(F.relu(self.fc1(out)))
out = F.dropout(F.relu(self.fc2(out)))
out = self.fc3(out)
return out
Training Loop:
def training_loop(checkpoint = None, best=False):
current_epoch = 1
num_epochs = 50
train_acc_history = []
val_acc_history = []
epoch_history = []
learning_rate = 0.001
best_val_acc = 0.0
is_best = False
criterion = nn.CrossEntropyLoss()
## Predict the Age and Gender of the Human in the Image
optimizer = torch.optim.SGD(cnn.parameters(),lr=0.001,momentum=0.9)
if checkpoint is not None:
is_best = best
current_epoch = checkpoint['epoch']
train_acc_history = checkpoint['train_acc_history']
val_acc_history = checkpoint['val_acc_history']
best_val_acc = checkpoint['best_val_acc']
optimizer.load_state_dict(checkpoint['optimizer'])
epoch_history = checkpoint['epoch_history']
print('Uploading our images now...')
for epoch in range(current_epoch, num_epochs + current_epoch):
print('Starting epoch %d / %d' % (epoch + 1, num_epochs + current_epoch))
print('Learning Rate for this epoch: {}'.format(learning_rate))
for i, batch in enumerate(train_loader):
images, labels = batch['image'], batch['label']
images = images.clone().detach()
labels = labels.clone().detach()
if use_gpu:
images = images.cuda()
labels = labels.cuda()
optimizer.zero_grad()
pred_labels = cnn(images)
loss = criterion(pred_labels,labels)
loss.backward()
optimizer.step()
So this is my code. It does not seem to be training well.
Please let me know on what could be done to fix this.

How to compute the uncertainty of a Monte Carlo Dropout neural network with PyTorch?

I am trying to implement Bayesian CNN using Mc Dropout on Pytorch, the main idea is that by applying dropout at test time and running over many forward passes, you get predictions from a variety of different models. I need to obtain the uncertainty, does anyone have an idea of how I can do it Please
This is how I defined my CNN
'''
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
self.dropout = nn.Dropout(p=0.3)
nn.init.xavier_uniform_(self.conv1.weight)
nn.init.constant_(self.conv1.bias, 0.0)
nn.init.xavier_uniform_(self.conv2.weight)
nn.init.constant_(self.conv2.bias, 0.0)
nn.init.xavier_uniform_(self.fc1.weight)
nn.init.constant_(self.fc1.bias, 0.0)
nn.init.xavier_uniform_(self.fc2.weight)
nn.init.constant_(self.fc2.bias, 0.0)
nn.init.xavier_uniform_(self.fc3.weight)
nn.init.constant_(self.fc3.bias, 0.0)
def forward(self, x):
x = self.pool(F.relu(self.dropout(self.conv1(x)))) # recommended to add the relu
x = self.pool(F.relu(self.dropout(self.conv2(x)))) # recommended to add the relu
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(self.dropout(x)))
x = self.fc3(self.dropout(x)) # no activation function needed for the last layer
return x
model = Net().to(device)
train_accuracies=np.zeros(num_epochs)
test_accuracies=np.zeros(num_epochs)
dataiter = iter(trainloader)
images, labels = dataiter.next()
#initializing variables
loss_acc = []
class_acc_mcdo = []
start_train = True
#Defining the Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def train():
loss_vals = []
acc_vals = []
for epoch in range(num_epochs): # loop over the dataset multiple times
n_correct = 0 # initialize number of correct predictions
acc = 0 # initialize accuracy of each epoch
somme = 0 # initialize somme of losses of each epoch
epoch_loss = []
for i, (images, labels) in enumerate(trainloader):
# origin shape: [4, 3, 32, 32] = 4, 3, 1024
# input_layer: 3 input channels, 6 output channels, 5 kernel size
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model.train()(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad() # zero the parameter gradients
loss.backward()
epoch_loss.append(loss.item()) # add the loss to epoch_loss list
optimizer.step()
# max returns (value ,index)
_, predicted = torch.max(outputs, 1)
n_correct += (predicted == labels).sum().item()
# print statistics
if (i + 1) % 2000 == 0:
print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{n_total_steps}], Loss:
{loss.item():.4f}')
somme = (sum(epoch_loss)) / len(epoch_loss)
loss_vals.append(somme) # add the epoch's loss to loss_vals
print("Loss = {}".format(somme))
acc = 100 * n_correct / len(trainset)
acc_vals.append(acc) # add the epoch's Accuracy to acc_vals
print("Accuracy = {}".format(acc))
# SAVE
PATH = './cnn.pth'
torch.save(model.state_dict(), PATH)
loss_acc.append(loss_vals)
loss_acc.append(acc_vals)
return loss_acc
And here is the code of the mc dropout
'''
def enable_dropout(model):
""" Function to enable the dropout layers during test-time """
for m in model.modules():
if m.__class__.__name__.startswith('Dropout'):
m.train()
def test():
# set non-dropout layers to eval mode
model.eval()
# set dropout layers to train mode
enable_dropout(model)
test_loss = 0
correct = 0
n_samples = 0
n_class_correct = [0 for i in range(10)]
n_class_samples = [0 for i in range(10)]
T = 100
for images, labels in testloader:
images = images.to(device)
labels = labels.to(device)
with torch.no_grad():
output_list = []
# getting outputs for T forward passes
for i in range(T):
output_list.append(torch.unsqueeze(model(images), 0))
# calculating mean
output_mean = torch.cat(output_list, 0).mean(0)
test_loss += F.nll_loss(F.log_softmax(output_mean, dim=1), labels,
reduction='sum').data # sum up batch loss
_, predicted = torch.max(output_mean, 1) # get the index of the max log-probability
correct += (predicted == labels).sum().item() # sum up correct predictions
n_samples += labels.size(0)
for i in range(batch_size):
label = labels[i]
predi = predicted[i]
if (label == predi):
n_class_correct[label] += 1
n_class_samples[label] += 1
test_loss /= len(testloader.dataset)
# PRINT TO HTML PAGE
print('\n Average loss: {:.4f}, Accuracy: ({:.3f}%)\n'.format(
test_loss,
100. * correct / n_samples))
# Accuracy for each class
acc_classes = []
for i in range(10):
acc = 100.0 * n_class_correct[i] / n_class_samples[i]
print(f'Accuracy of {classes[i]}: {acc} %')
acc_classes.append(acc)
class_acc_mcdo.extend(acc_classes)
print('Finished Testing')
You can compute the statistics, such as the sample mean or the sample variance, of different stochastic forward passes at test time (i.e. with the test or validation data), when the dropout is enabled. These statistics can be used to represent uncertainty. For example, you can compute the entropy, which is a measure of uncertainty, from the sample mean.

Tensorflow RNN stuck at high cost

The following RNN model decreases the loss for the first one or two epochs and then fluctuates around the cost of 6. This seems like the model is so random and not learning at all. I varied the learning rate from 0.1 to 0.0001 and it didn't help. The data is fed with an input pipeline, which worked fine with other models, so the functions that extract the label and images are not presented here. I have looked at this for so many times but still couldn't find what's wrong with it. Here's the code:
n_steps = 224
n_inputs = 224
learning_rate = 0.00015
batch_size = 256 # n_neurons
epochs = 100
num_batch = int(len(trainnames)/batch_size)
keep_prob = tf.placeholder(tf.float32)
# TRAIN QUEUE
train_queue = tf.RandomShuffleQueue(len(trainnames)*1.5, 0, [tf.string, tf.float32], shapes=[[],[num_labels,]])
enqueue_train = train_queue.enqueue_many([trainnames, train_label])
train_image, train_image_label = train_queue.dequeue()
train_image = read_image_file(train_image)
train_batch, train_label_batch = tf.train.batch(
[train_image, train_image_label],
batch_size=batch_size,
num_threads=1,
capacity=10*batch_size,
enqueue_many=False,
shapes=[[224,224], [num_labels,]],
allow_smaller_final_batch=True
)
train_close = train_queue.close()
def RNN(inputs, reuse):
with tf.variable_scope('cells', reuse=reuse):
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=batch_size, reuse=reuse)
with tf.variable_scope('rnn'):
outputs, states = tf.nn.dynamic_rnn(basic_cell, inputs, dtype=tf.float32)
fc_drop = tf.nn.dropout(states, keep_prob)
logits = tf.contrib.layers.fully_connected(fc_drop, num_labels, activation_fn=None)
return logits
#Training
with tf.name_scope("cost_function") as scope:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=train_label_batch, logits=RNN(train_batch, reuse=None)))
train_step = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(cost)
cost_summary = tf.summary.scalar("cost_function", cost)
file_writer = tf.summary.FileWriter(logdir)
#Session
with tf.Session() as sess:
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord, start=True)
step = 0
for epoch in range(epochs):
sess.run(enqueue_train)
for batch in range(num_batch):
if step % 100 == 0:
summary_str = cost_summary.eval(feed_dict={keep_prob: 1.0})
file_writer.add_summary(summary_str, step)
else:
sess.run(train_step, feed_dict={keep_prob: 0.5})
step += 1
sess.run(train_close)
coord.request_stop()
coord.join(threads)
file_writer.close()

Resources