Predicting probabilities in classfier tensorflow - machine-learning

Hey i am pretty new to tensorflow. I am building a classification model basically classifying into 0/1. Is there a way to predict probability of output being 1. Can predict_proba be used over here? Its been widely used in tflearn.dnn but can't find any reference to do it in my case.
def main():
train_x,test_x,train_y,test_y = load_csv_data()
x_size = train_x.shape[1]
y_size = train_y.shape[1]
print(x_size)
print(y_size)
# variables
X = tf.placeholder("float", shape=[None, x_size])
y = tf.placeholder("float", shape=[None, y_size])
weights_1 = initialize_weights((x_size, h_size))
weights_2 = initialize_weights((h_size, y_size))
# Forward propagation
y_pred = forward_propagation(X, weights_1, weights_2)
predict = tf.argmax(y_pred, dimension=1)
# Backward propagation
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_pred))
updates_sgd = tf.train.GradientDescentOptimizer(sgd_step).minimize(cost)
# Start tensorflow session
with tf.Session() as sess:
init = tf.global_variables_initializer()
steps = 1
sess.run(init)
x = np.arange(steps)
test_acc = []
train_acc = []
print("Step, train accuracy, test accuracy")
for step in range(steps):
# Train with each example
batch_size = len(train_x)
avg_cost = 0
print(batch_size)
for i in range(len(train_x)):
_, c = sess.run([updates_sgd,cost], feed_dict={X: train_x[i: i + 1], y: train_y[i: i + 1]})
print(c)
avg_cost += c/batch_size
train_accuracy = np.mean(np.argmax(train_y, axis=1) ==
sess.run(predict, feed_dict={X: train_x, y: train_y}))
test_accuracy = np.mean(np.argmax(test_y, axis=1) ==
sess.run(predict, feed_dict={X: test_x, y: test_y}))
print(avg_cost)
print("%d, %.2f%%, %.2f%%"
% (step + 1, 100. * train_accuracy, 100. * test_accuracy))
test_acc.append(100. * test_accuracy)
train_acc.append(100. * train_accuracy)
predict = tf.argmax(y_pred,1)
test_data = load_test_data( )
print(test_data)
pred = predict.eval(feed_dict={X:test_data})
print(pred)
for x in range(0,100):
print(pred[x])
print(np.unique(pred))
main()

Here you take argmax of probabilities:
predict = tf.argmax(y_pred, dimension=1)
If you return simply "y_pred" you should get probabilities.

Related

PyTorch Siamese Network Oscillating / Fluctuating Loss Function

I have implemented a siamese NN for regression using the resnet18 for transfer learning. The goal is to calculate the correlation coefficient between two images, since we do not have raw data but only images for a specific signal. We want to measure similarity between images. However the loss function of my nn is always oscillating up and down.
Code below:
Model itself
class firstNN(nn.Module):
def __init__(self):
# wofür ist das da?
super(firstNN, self).__init__()
self.resnet = models.resnet18(pretrained=True)
for param in self.resnet.parameters():
param.requires_grad = False
# over-write the first conv layer to be able to read images
# as resnet18 reads (3,x,x) where 3 is RGB channels
# whereas MNIST has (1,x,x) where 1 is a gray-scale channel
self.resnet.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
self.fc_in_features = self.resnet.fc.in_features
# remove the last layer of resnet18 (linear layer which is before avgpool layer)
self.resnet = torch.nn.Sequential(*(list(self.resnet.children())[:-1]))
# add linear layers to compare between the features of the two images
self.fc = nn.Sequential(
nn.Linear(self.fc_in_features, hidden_dim),
torch.nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim2),
torch.nn.ReLU(),
nn.Linear(hidden_dim2,hidden_dim3),
torch.nn.ReLU(),
nn.Linear(hidden_dim3,1),
# nn.ReLU(),
# nn.Linear(input_dim, third_dim),
)
# Distance function
self.binary = False
# Get params and register optimizer
info, params = self.get_model_params()
#self.optimizer = optim.Adam(params, lr=learning_rate,
# weight_decay=weight_decay)
# self.optimizer = optim.SGD(params, lr=learning_rate,
# momentum=0.5)
#self.criterion = nn.BCELoss()
#self.criterion = nn.MSELoss()
LOGGER.info(info)
# Initialisiert die weights mit "random" Werten
def init_layers(self):
nn.init.xavier_normal(self.fc[0].weight.data).to(device)
nn.init.xavier_normal(self.fc[2].weight.data).to(device)
# Erstellt NN mit dem input, inputs ist unser batch
def siamese_basic(self, inputs):
output = self.resnet(inputs)
output = output.view(output.size()[0], -1)
output = self.fc(output)
return output
def distance_layer(self, vec1, vec2, distance='cos'):
if distance == 'cos':
similarity = F.cosine_similarity(
vec1 + 1e-16, vec2 + 1e-16, dim=-1)
elif distance == 'l1':
similarity = self.dist_fc(torch.abs(vec1 - vec2))
similarity = similarity.squeeze(1)
elif distance == 'l2':
similarity = self.dist_fc(torch.abs(vec1 - vec2) ** 2)
similarity = similarity.squeeze(1)
ic()
#if self.binary:
# similarity = F.sigmoid(similarity)
return similarity
def forward(self, template, img):
embed1 = self.siamese_basic(template)
embed2 = self.siamese_basic(img)
# print(f"Before reshape embed2 {embed2.shape}")
# print(f"Befor reshape embed1 {embed1.shape}")
embed1 = embed1.reshape(template.shape[0],-1).float()
embed2 = embed2.reshape(img.shape[0],-1).float()
similarity = self.distance_layer(embed1, embed2)
# print(f"embed2 {embed2.shape}")
# print(f"embed1 {embed1.shape}")
# print(f"similarity {similarity.shape}")
ic()
return similarity#, embed1, embed2
def get_loss(self, outputs, targets):
loss = self.criterion(outputs, targets)
ic()
return loss
def get_model_params(self):
params = []
total_size = 0
def multiply_iter(p_list):
out = 1
for p in p_list:
out *= p
return out
for p in self.parameters():
if p.requires_grad:
params.append(p)
total_size += multiply_iter(p.size())
return '{}\nparam size: {:,}\n'.format(self, total_size), params
def save_checkpoint(self, state, checkpoint_dir, filename):
filename = checkpoint_dir + filename
LOGGER.info('Save checkpoint %s' % filename)
torch.save(state, filename)
def load_checkpoint(self, checkpoint_dir, filename):
filename = checkpoint_dir + filename
LOGGER.info('Load checkpoint %s' % filename)
checkpoint = torch.load(filename)
self.load_state_dict(checkpoint['state_dict'])
self.optimizer.load_state_dict(checkpoint['optimizer'])
Choice of criterion etc
model = firstNN()
criterion = nn.MSELoss()
#optimizer = optim.Adam(model.parameters(), lr=learning_rate,
# weight_decay=weight_decay)
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
training_data = CustomImageDataset("")
# Create data loaders.
train_loader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
hidden_dim = 128
hidden_dim2 = 64
hidden_dim3 = 32
do_learn = True
save_frequency = 2
batch_size = 40 if torch.cuda.is_available() else 64
learning_rate = 0.0001
num_epochs = 15
weight_decay = 0.1
momentum = 0.9
loss_history = []
r2_history = []
loss_history2 = []
r2_history2 = []
LOGGER = logging.getLogger(__name__)
torch.cuda.empty_cache()
model = firstNN().to(device)
model.train()
for epoch in range (num_epochs):
running_r2 = 0.0
running_loss = 0.0
for batch_idx, (templates, images, targets) in enumerate(train_loader):
templates = templates.unsqueeze(1).float().to(device)
images = images.unsqueeze(1).float().to(device)
targets = targets.float().to(device)
optimizer.zero_grad()
outputs = model(templates, images)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
r2score = torchmetrics.R2Score().to(device)
rscore = r2score(outputs, torch.tensor(targets).squeeze())
running_loss += loss.item()
running_r2 += rscore.item()
loss_history2.append(loss.item())
r2_history2.append(rscore.item())
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tR2Score: {}'.format(
epoch, batch_idx * len(templates), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item(), rscore ))
running_loss = running_loss / len(train_loader)
running_r2 = running_r2 / len(train_loader)
loss_history.append(running_loss)
r2_history.append(running_r2)
Example of images with spearman correlation of 0.45
Example of Oscillating loss and r2
I have tried using several different learning rates and experimented with weight decay and change of optimizer / nn setup but I dont understant exactly how to combat the issue.

How to compute the uncertainty of a Monte Carlo Dropout neural network with PyTorch?

I am trying to implement Bayesian CNN using Mc Dropout on Pytorch, the main idea is that by applying dropout at test time and running over many forward passes, you get predictions from a variety of different models. I need to obtain the uncertainty, does anyone have an idea of how I can do it Please
This is how I defined my CNN
'''
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
self.dropout = nn.Dropout(p=0.3)
nn.init.xavier_uniform_(self.conv1.weight)
nn.init.constant_(self.conv1.bias, 0.0)
nn.init.xavier_uniform_(self.conv2.weight)
nn.init.constant_(self.conv2.bias, 0.0)
nn.init.xavier_uniform_(self.fc1.weight)
nn.init.constant_(self.fc1.bias, 0.0)
nn.init.xavier_uniform_(self.fc2.weight)
nn.init.constant_(self.fc2.bias, 0.0)
nn.init.xavier_uniform_(self.fc3.weight)
nn.init.constant_(self.fc3.bias, 0.0)
def forward(self, x):
x = self.pool(F.relu(self.dropout(self.conv1(x)))) # recommended to add the relu
x = self.pool(F.relu(self.dropout(self.conv2(x)))) # recommended to add the relu
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(self.dropout(x)))
x = self.fc3(self.dropout(x)) # no activation function needed for the last layer
return x
model = Net().to(device)
train_accuracies=np.zeros(num_epochs)
test_accuracies=np.zeros(num_epochs)
dataiter = iter(trainloader)
images, labels = dataiter.next()
#initializing variables
loss_acc = []
class_acc_mcdo = []
start_train = True
#Defining the Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def train():
loss_vals = []
acc_vals = []
for epoch in range(num_epochs): # loop over the dataset multiple times
n_correct = 0 # initialize number of correct predictions
acc = 0 # initialize accuracy of each epoch
somme = 0 # initialize somme of losses of each epoch
epoch_loss = []
for i, (images, labels) in enumerate(trainloader):
# origin shape: [4, 3, 32, 32] = 4, 3, 1024
# input_layer: 3 input channels, 6 output channels, 5 kernel size
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model.train()(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad() # zero the parameter gradients
loss.backward()
epoch_loss.append(loss.item()) # add the loss to epoch_loss list
optimizer.step()
# max returns (value ,index)
_, predicted = torch.max(outputs, 1)
n_correct += (predicted == labels).sum().item()
# print statistics
if (i + 1) % 2000 == 0:
print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{n_total_steps}], Loss:
{loss.item():.4f}')
somme = (sum(epoch_loss)) / len(epoch_loss)
loss_vals.append(somme) # add the epoch's loss to loss_vals
print("Loss = {}".format(somme))
acc = 100 * n_correct / len(trainset)
acc_vals.append(acc) # add the epoch's Accuracy to acc_vals
print("Accuracy = {}".format(acc))
# SAVE
PATH = './cnn.pth'
torch.save(model.state_dict(), PATH)
loss_acc.append(loss_vals)
loss_acc.append(acc_vals)
return loss_acc
And here is the code of the mc dropout
'''
def enable_dropout(model):
""" Function to enable the dropout layers during test-time """
for m in model.modules():
if m.__class__.__name__.startswith('Dropout'):
m.train()
def test():
# set non-dropout layers to eval mode
model.eval()
# set dropout layers to train mode
enable_dropout(model)
test_loss = 0
correct = 0
n_samples = 0
n_class_correct = [0 for i in range(10)]
n_class_samples = [0 for i in range(10)]
T = 100
for images, labels in testloader:
images = images.to(device)
labels = labels.to(device)
with torch.no_grad():
output_list = []
# getting outputs for T forward passes
for i in range(T):
output_list.append(torch.unsqueeze(model(images), 0))
# calculating mean
output_mean = torch.cat(output_list, 0).mean(0)
test_loss += F.nll_loss(F.log_softmax(output_mean, dim=1), labels,
reduction='sum').data # sum up batch loss
_, predicted = torch.max(output_mean, 1) # get the index of the max log-probability
correct += (predicted == labels).sum().item() # sum up correct predictions
n_samples += labels.size(0)
for i in range(batch_size):
label = labels[i]
predi = predicted[i]
if (label == predi):
n_class_correct[label] += 1
n_class_samples[label] += 1
test_loss /= len(testloader.dataset)
# PRINT TO HTML PAGE
print('\n Average loss: {:.4f}, Accuracy: ({:.3f}%)\n'.format(
test_loss,
100. * correct / n_samples))
# Accuracy for each class
acc_classes = []
for i in range(10):
acc = 100.0 * n_class_correct[i] / n_class_samples[i]
print(f'Accuracy of {classes[i]}: {acc} %')
acc_classes.append(acc)
class_acc_mcdo.extend(acc_classes)
print('Finished Testing')
You can compute the statistics, such as the sample mean or the sample variance, of different stochastic forward passes at test time (i.e. with the test or validation data), when the dropout is enabled. These statistics can be used to represent uncertainty. For example, you can compute the entropy, which is a measure of uncertainty, from the sample mean.

Loss not Converging for CNN Model

Image Transformation and Batch
transform = transforms.Compose([
transforms.Resize((100,100)),
transforms.ToTensor(),
transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
data_set = datasets.ImageFolder(root="/content/drive/My Drive/models/pokemon/dataset",transform=transform)
train_loader = DataLoader(data_set,batch_size=10,shuffle=True,num_workers=6)
Below is my Model
class pokimonClassifier(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3,6,3,1)
self.conv2 = nn.Conv2d(6,18,3,1)
self.fc1 = nn.Linear(23*23*18,520)
self.fc2 = nn.Linear(520,400)
self.fc3 = nn.Linear(400,320)
self.fc4 = nn.Linear(320,149)
def forward(self,x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(x,2,2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x,2,2)
x = x.view(-1,23*23*18)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = F.log_softmax(self.fc4(x), dim=1)
return x
Creating Instance of model, Use GPU, Set Criterion and optimizer
Here is firsr set lr = 0.001 then later changed to 0.0001
model = pokimonClassifier()
model.to('cuda')
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr = 0.0001)
Training Dataset
for e in range(epochs):
train_crt = 0
for b,(train_x,train_y) in enumerate(train_loader):
b+=1
train_x, train_y = train_x.to('cuda'), train_y.to('cuda')
# train model
y_preds = model(train_x)
loss = criterion(y_preds,train_y)
# analysis model
predicted = torch.max(y_preds,1)[1]
correct = (predicted == train_y).sum()
train_crt += correct
# print loss and accuracy
if b%50 == 0:
print(f'Epoch {e} batch{b} loss:{loss.item()} ')
# updating weights and bais
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss.append(loss)
train_correct.append(train_crt)
My loss value remains between 4 - 3 and its not converging to 0.
I am super new to deep learning and I don't know much about it.
The dataset I am using is here: https://www.kaggle.com/thedagger/pokemon-generation-one
A help will be much appreciated.
Thank You
The problem with your network is that you are applying softmax() twice - once at fc4() layer and once more while using nn.CrossEntropyLoss().
According to the official documentation, Pytorch takes care of softmax() while applying nn.CrossEntropyLoss().
So in your code, please change this line
x = F.log_softmax(self.fc4(x), dim=1)
to
x = self.fc4(x)

Wasserstein GAN problem with last discriminator layer and clipping

When I use linear or No activation in the last Discriminator layer using weight clipping Discriminator accuracy goes to 1 and Generator goes to 0. In case when I remove weight clipping, Generator accuracy goes to 1 and discriminator goes to 0 around 300 iterations. But when I use sigmoid activation as the last layer in the discriminator with clipping Generator accuracy goes to 1 and without clipping the generator loss get stuck while accuracies going as they should around 0.5.
NOTE - in all cases, results are produced and all of the show WARNING:tensorflow:Discrepancy between trainable weights and collected trainable weights, did you set model.trainable without calling model.compile after ?
Code is given here, please do not mind the indentation on copying and pasting it's everywhere -
class WGAN():
def __init__(self,
input_dim,
disc_filter,
disc_kernel,
disc_strides,
disc_dropout,
disc_lr,
gen_filter,
gen_kernel,
gen_strides,
gen_upsample,
gen_lr,
z_dim,
batch_size):
self.input_dim = input_dim
self.disc_filter = disc_filter
self.disc_kernel = disc_kernel
self.disc_strides = disc_strides
self.disc_dropout = disc_dropout
self.disc_lr = disc_lr
self.gen_filter = gen_filter
self.gen_kernel = gen_kernel
self.gen_strides = gen_strides
self.gen_upsample = gen_upsample
self.gen_lr = gen_lr
self.z_dim = z_dim
self.batch_size = batch_size
self.weight_init = RandomNormal(mean=0., stddev=0.02)
self.d_losses = []
self.g_losses = []
self.epoch = 0
self.Discriminator()
self.Generator()
self.full_model()
def wasserstein(self, y_true, y_pred):
return -K.mean(y_true * y_pred)
def Discriminator(self):
disc_input = Input(shape=self.input_dim, name='discriminator_input')
x = disc_input
for i in range(len(self.disc_filter)):
x = Conv2D(filters=self.disc_filter[i], kernel_size=self.disc_kernel[i], strides=self.disc_strides[i], padding='same', name='disc_'+str(i))(x)
x = LeakyReLU()(x)
x = Dropout(self.disc_dropout)(x)
x = BatchNormalization()(x)
x = Flatten()(x)
disc_output = Dense(1, activation='sigmoid', kernel_initializer = self.weight_init)(x)
self.discriminator = Model(disc_input, disc_output)
def Generator(self):
gen_input = Input(shape=(self.z_dim,), name='generator_input')
x = gen_input
x = Dense(7*7*self.batch_size, kernel_initializer = self.weight_init)(x)
x = LeakyReLU()(x)
x = BatchNormalization()(x)
x = Reshape(target_shape=(7,7,self.batch_size))(x)
for i in range(len(self.gen_filter)):
if self.gen_upsample[i]==2:
x = UpSampling2D(size=self.gen_upsample[i], name='upsample_'+str(i/2))(x)
x = Conv2D(filters=self.gen_filter[i], kernel_size=self.gen_kernel[i], strides=self.gen_strides[i], padding='same', name='gen_'+str(i))(x)
else:
x = Conv2DTranspose(filters=self.gen_filter[i], kernel_size=self.gen_kernel[i], strides=self.gen_strides[i], padding='same', name='gen_'+str(i))(x)
if i<len(self.gen_filter)-1:
x = BatchNormalization()(x)
x = LeakyReLU()(x)
else:
x = Activation("tanh")(x)
gen_output = x
self.generator = Model(gen_input, gen_output)
def set_trainable(self, model, val):
model.trainable=val
for l in model.layers:
l.trainable=val
def full_model(self):
### COMPILE DISCRIMINATOR
self.discriminator.compile(optimizer= Adam(self.disc_lr), loss = self.wasserstein, metrics=['accuracy'])
### COMPILE THE FULL GAN
self.set_trainable(self.discriminator, False)
self.discriminator.compile(optimizer= Adam(self.disc_lr), loss = self.wasserstein, metrics=['accuracy'])
model_input = Input(shape=(self.z_dim,), name='model_input')
model_output = self.discriminator(self.generator(model_input))
self.model = Model(model_input, model_output)
self.model.compile(optimizer= Adam(self.disc_lr), loss = self.wasserstein, metrics=['accuracy'])
self.set_trainable(self.discriminator, True)
def train_generator(self, batch_size):
valid = np.ones((batch_size,1))
noise = np.random.normal(0, 1, (batch_size, self.z_dim))
return self.model.train_on_batch(noise, valid)
def train_discriminator(self, x_train, batch_size, using_generator):
valid = np.ones((batch_size,1))
fake = np.zeros((batch_size,1))
if using_generator:
true_imgs = next(x_train)[0]
if true_imgs.shape[0] != batch_size:
true_imgs = next(x_train)[0]
else:
idx = np.random.randint(0, x_train.shape[0], batch_size)
true_imgs = x_train[idx]
noise = np.random.normal(0, 1, (batch_size, self.z_dim))
gen_imgs = self.generator.predict(noise)
d_loss_real, d_acc_real = self.discriminator.train_on_batch(true_imgs, valid)
d_loss_fake, d_acc_fake = self.discriminator.train_on_batch(gen_imgs, fake)
d_loss = 0.5 * (d_loss_real + d_loss_fake)
d_acc = 0.5 * (d_acc_real + d_acc_fake)
for l in self.discriminator.layers:
weights = l.get_weights()
weights = [np.clip(w, -0.01, 0.01) for w in weights]
l.set_weights(weights)
return [d_loss, d_loss_real, d_loss_fake, d_acc, d_acc_real, d_acc_fake]
def train(self, x_train, batch_size, epochs, print_every_n_batches = 50, using_generator = False):
for epoch in range(self.epoch, self.epoch + epochs):
d = self.train_discriminator(x_train, batch_size, using_generator)
g = self.train_generator(batch_size)
if self.epoch % print_every_n_batches == 0:
print ("%d [D loss: (%.3f)(R %.3f, F %.3f)] [D acc: (%.3f)(%.3f, %.3f)] [G loss: %.3f] [G acc: %.3f]" % (epoch, d[0], d[1], d[2], d[3], d[4], d[5], g[0], g[1]))
self.d_losses.append(d)
self.g_losses.append(g)
self.epoch+=1

Declaring a function to calculate activation of a layer | How does Tensorflow version work and mine doesn't?

So I tried implementing a Convolutional Neural Network on MNIST dataset in a similar fashion as this: https://github.com/tensorflow/tensorflow/blob/r1.1/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
However, on doing that, I noticed that for some reason my second max_pool is not happening. Also, I don't understand how the code in the above link works, more specifically, how the nn_layer method can be reused as the weights exist only in that scope and calling it twice would change them?
My code:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import os
from tensorflow.contrib.tensorboard.plugins import projector
current_path = os.path.dirname(os.path.realpath(__file__))
current_path = current_path+"/logs"
def train():
mnist = input_data.read_data_sets("MNIST_data", one_hot = True)
def initializer(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def conv2d(x,W):
return tf.nn.conv2d(x , W , [1,1,1,1] , padding="SAME")
def max_pool(x):
return tf.nn.max_pool(x , [1,2,2,1] , [1,2,2,1] , padding="SAME")
def conv_layer(x,length,width,input_channels,output_channels,layer_name,act=tf.nn.relu):
with tf.name_scope(layer_name):
with tf.name_scope('weights'):
weights = initializer([length,width,input_channels,output_channels])
tf.summary.histogram(layer_name+"_weights",weights)
with tf.name_scope('biases'):
biases = initializer([output_channels])
tf.summary.histogram(layer_name+"_biases",biases)
with tf.name_scope('activations'):
activations = act(conv2d(x,weights) + biases)
activations = max_pool(activations)
tf.summary.histogram(layer_name+"_activations",activations)
return activations
def dense_layer(x,input_size,output_size,layer_name,act=tf.nn.relu):
with tf.name_scope(layer_name):
with tf.name_scope('weights'):
weights = initializer([input_size,output_size])
tf.summary.histogram(layer_name+"_weights",weights)
with tf.name_scope('biases'):
biases = initializer([output_size])
tf.summary.histogram(layer_name+"_biases",biases)
with tf.name_scope('activations'):
activations = act(tf.matmul(x,weights) + biases)
tf.summary.histogram(layer_name+"_activations",activations)
return activations
def dropout(x,keep_prob):
with tf.name_scope('Dropout'):
dropped =tf.nn.dropout(x,keep_prob)
return dropped
with tf.name_scope('input'):
x = tf.placeholder(tf.float32, [None,784],name='image_inputs')
y = tf.placeholder(tf.float32, [None,10],name='image_labels')
keep_prob = tf.placeholder(tf.float32,name='keep_probability')
with tf.name_scope('input_reshape'):
x_image = tf.reshape(x , [-1,28,28,1])
tf.summary.image('input',x_image,50)
h1 = conv_layer(x_image,3,3,1,32,"first_convolution_layer")
h2 = conv_layer(h1,3,3,32,64,"second_convolution_layer")
h2 = tf.reshape(h1,[-1,7*7*64])
h2 = dropout(h2,keep_prob)
h3 = dense_layer(h2,7*7*64,1024,"first_dense_layer")
h3 = dropout(h3,keep_prob)
h4 = dense_layer(h3,1024,1024,"second_dense_layer")
h4 = dropout(h4,keep_prob)
h_out = dense_layer(h4,1024,10,"output_dense_layer",act=tf.nn.sigmoid)
with tf.name_scope("Loss"):
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=h_out))
tf.summary.scalar('Loss',cost)
train = tf.train.AdamOptimizer().minimize(cost)
with tf.name_scope("Accuracy"):
correct_pred = tf.equal(tf.argmax(h_out, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
summary = tf.summary.merge_all()
init = tf.global_variables_initializer()
sess = tf.InteractiveSession()
sess.run(init)
saver = tf.train.Saver()
summary_writer = tf.summary.FileWriter(current_path, sess.graph)
for i in range(500):
batch = mnist.train.next_batch(500)
if(i%100 == 0):
summary_str = sess.run(summary,feed_dict={x:batch[0], y:batch[1], keep_prob:1.0})
summary_writer.add_summary(summary_str, i)
summary_writer.flush()
train_accuracy = accuracy.eval(feed_dict={x:batch[0], y:batch[1], keep_prob:1.0})
saver.save(sess, os.path.join(current_path,'model.ckpt'), i)
print("Step %d Training Accuracy: %f" %((i/100 + 1), train_accuracy))
train.run(feed_dict={x:batch[0], y:batch[1], keep_prob:0.5})
sum=0.0
for i in range(10):
batch_x = mnist.test.images[(i*1000):((i+1)*1000)-1]
batch_y = mnist.test.labels[(i*1000):((i+1)*1000)-1]
sum = sum + accuracy.eval(feed_dict={x:batch_x, y:batch_y, keep_prob:1.0})
print("Test Accuracy: %f" %(sum/10.0))
if tf.gfile.Exists(current_path):
tf.gfile.DeleteRecursively(current_path)
tf.gfile.MakeDirs(current_path)
train()
This is a simple typo.
Change this
h2 = tf.reshape(h1,[-1,7*7*64])
to this
h2 = tf.reshape(h2,[-1,7*7*64])
The error
InvalidArgumentError (see above for traceback): logits and labels must be same size: logits_size=[1000,10] labels_size=[500,10]
[[Node: Loss/SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](Loss/Reshape, Loss/Reshape_1)]]
went away.

Resources