Wasserstein GAN problem with last discriminator layer and clipping - machine-learning

When I use linear or No activation in the last Discriminator layer using weight clipping Discriminator accuracy goes to 1 and Generator goes to 0. In case when I remove weight clipping, Generator accuracy goes to 1 and discriminator goes to 0 around 300 iterations. But when I use sigmoid activation as the last layer in the discriminator with clipping Generator accuracy goes to 1 and without clipping the generator loss get stuck while accuracies going as they should around 0.5.
NOTE - in all cases, results are produced and all of the show WARNING:tensorflow:Discrepancy between trainable weights and collected trainable weights, did you set model.trainable without calling model.compile after ?
Code is given here, please do not mind the indentation on copying and pasting it's everywhere -
class WGAN():
def __init__(self,
input_dim,
disc_filter,
disc_kernel,
disc_strides,
disc_dropout,
disc_lr,
gen_filter,
gen_kernel,
gen_strides,
gen_upsample,
gen_lr,
z_dim,
batch_size):
self.input_dim = input_dim
self.disc_filter = disc_filter
self.disc_kernel = disc_kernel
self.disc_strides = disc_strides
self.disc_dropout = disc_dropout
self.disc_lr = disc_lr
self.gen_filter = gen_filter
self.gen_kernel = gen_kernel
self.gen_strides = gen_strides
self.gen_upsample = gen_upsample
self.gen_lr = gen_lr
self.z_dim = z_dim
self.batch_size = batch_size
self.weight_init = RandomNormal(mean=0., stddev=0.02)
self.d_losses = []
self.g_losses = []
self.epoch = 0
self.Discriminator()
self.Generator()
self.full_model()
def wasserstein(self, y_true, y_pred):
return -K.mean(y_true * y_pred)
def Discriminator(self):
disc_input = Input(shape=self.input_dim, name='discriminator_input')
x = disc_input
for i in range(len(self.disc_filter)):
x = Conv2D(filters=self.disc_filter[i], kernel_size=self.disc_kernel[i], strides=self.disc_strides[i], padding='same', name='disc_'+str(i))(x)
x = LeakyReLU()(x)
x = Dropout(self.disc_dropout)(x)
x = BatchNormalization()(x)
x = Flatten()(x)
disc_output = Dense(1, activation='sigmoid', kernel_initializer = self.weight_init)(x)
self.discriminator = Model(disc_input, disc_output)
def Generator(self):
gen_input = Input(shape=(self.z_dim,), name='generator_input')
x = gen_input
x = Dense(7*7*self.batch_size, kernel_initializer = self.weight_init)(x)
x = LeakyReLU()(x)
x = BatchNormalization()(x)
x = Reshape(target_shape=(7,7,self.batch_size))(x)
for i in range(len(self.gen_filter)):
if self.gen_upsample[i]==2:
x = UpSampling2D(size=self.gen_upsample[i], name='upsample_'+str(i/2))(x)
x = Conv2D(filters=self.gen_filter[i], kernel_size=self.gen_kernel[i], strides=self.gen_strides[i], padding='same', name='gen_'+str(i))(x)
else:
x = Conv2DTranspose(filters=self.gen_filter[i], kernel_size=self.gen_kernel[i], strides=self.gen_strides[i], padding='same', name='gen_'+str(i))(x)
if i<len(self.gen_filter)-1:
x = BatchNormalization()(x)
x = LeakyReLU()(x)
else:
x = Activation("tanh")(x)
gen_output = x
self.generator = Model(gen_input, gen_output)
def set_trainable(self, model, val):
model.trainable=val
for l in model.layers:
l.trainable=val
def full_model(self):
### COMPILE DISCRIMINATOR
self.discriminator.compile(optimizer= Adam(self.disc_lr), loss = self.wasserstein, metrics=['accuracy'])
### COMPILE THE FULL GAN
self.set_trainable(self.discriminator, False)
self.discriminator.compile(optimizer= Adam(self.disc_lr), loss = self.wasserstein, metrics=['accuracy'])
model_input = Input(shape=(self.z_dim,), name='model_input')
model_output = self.discriminator(self.generator(model_input))
self.model = Model(model_input, model_output)
self.model.compile(optimizer= Adam(self.disc_lr), loss = self.wasserstein, metrics=['accuracy'])
self.set_trainable(self.discriminator, True)
def train_generator(self, batch_size):
valid = np.ones((batch_size,1))
noise = np.random.normal(0, 1, (batch_size, self.z_dim))
return self.model.train_on_batch(noise, valid)
def train_discriminator(self, x_train, batch_size, using_generator):
valid = np.ones((batch_size,1))
fake = np.zeros((batch_size,1))
if using_generator:
true_imgs = next(x_train)[0]
if true_imgs.shape[0] != batch_size:
true_imgs = next(x_train)[0]
else:
idx = np.random.randint(0, x_train.shape[0], batch_size)
true_imgs = x_train[idx]
noise = np.random.normal(0, 1, (batch_size, self.z_dim))
gen_imgs = self.generator.predict(noise)
d_loss_real, d_acc_real = self.discriminator.train_on_batch(true_imgs, valid)
d_loss_fake, d_acc_fake = self.discriminator.train_on_batch(gen_imgs, fake)
d_loss = 0.5 * (d_loss_real + d_loss_fake)
d_acc = 0.5 * (d_acc_real + d_acc_fake)
for l in self.discriminator.layers:
weights = l.get_weights()
weights = [np.clip(w, -0.01, 0.01) for w in weights]
l.set_weights(weights)
return [d_loss, d_loss_real, d_loss_fake, d_acc, d_acc_real, d_acc_fake]
def train(self, x_train, batch_size, epochs, print_every_n_batches = 50, using_generator = False):
for epoch in range(self.epoch, self.epoch + epochs):
d = self.train_discriminator(x_train, batch_size, using_generator)
g = self.train_generator(batch_size)
if self.epoch % print_every_n_batches == 0:
print ("%d [D loss: (%.3f)(R %.3f, F %.3f)] [D acc: (%.3f)(%.3f, %.3f)] [G loss: %.3f] [G acc: %.3f]" % (epoch, d[0], d[1], d[2], d[3], d[4], d[5], g[0], g[1]))
self.d_losses.append(d)
self.g_losses.append(g)
self.epoch+=1

Related

Loss not Converging for CNN Model

Image Transformation and Batch
transform = transforms.Compose([
transforms.Resize((100,100)),
transforms.ToTensor(),
transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
data_set = datasets.ImageFolder(root="/content/drive/My Drive/models/pokemon/dataset",transform=transform)
train_loader = DataLoader(data_set,batch_size=10,shuffle=True,num_workers=6)
Below is my Model
class pokimonClassifier(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3,6,3,1)
self.conv2 = nn.Conv2d(6,18,3,1)
self.fc1 = nn.Linear(23*23*18,520)
self.fc2 = nn.Linear(520,400)
self.fc3 = nn.Linear(400,320)
self.fc4 = nn.Linear(320,149)
def forward(self,x):
x = F.relu(self.conv1(x))
x = F.max_pool2d(x,2,2)
x = F.relu(self.conv2(x))
x = F.max_pool2d(x,2,2)
x = x.view(-1,23*23*18)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = F.log_softmax(self.fc4(x), dim=1)
return x
Creating Instance of model, Use GPU, Set Criterion and optimizer
Here is firsr set lr = 0.001 then later changed to 0.0001
model = pokimonClassifier()
model.to('cuda')
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr = 0.0001)
Training Dataset
for e in range(epochs):
train_crt = 0
for b,(train_x,train_y) in enumerate(train_loader):
b+=1
train_x, train_y = train_x.to('cuda'), train_y.to('cuda')
# train model
y_preds = model(train_x)
loss = criterion(y_preds,train_y)
# analysis model
predicted = torch.max(y_preds,1)[1]
correct = (predicted == train_y).sum()
train_crt += correct
# print loss and accuracy
if b%50 == 0:
print(f'Epoch {e} batch{b} loss:{loss.item()} ')
# updating weights and bais
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss.append(loss)
train_correct.append(train_crt)
My loss value remains between 4 - 3 and its not converging to 0.
I am super new to deep learning and I don't know much about it.
The dataset I am using is here: https://www.kaggle.com/thedagger/pokemon-generation-one
A help will be much appreciated.
Thank You
The problem with your network is that you are applying softmax() twice - once at fc4() layer and once more while using nn.CrossEntropyLoss().
According to the official documentation, Pytorch takes care of softmax() while applying nn.CrossEntropyLoss().
So in your code, please change this line
x = F.log_softmax(self.fc4(x), dim=1)
to
x = self.fc4(x)

CNN-LSTM with extra data: but I got an error :(

I am trying to build a CNN+RNN model for my project, but I got an error after concatenating the layers to give it as input to LSTM.
The model I try to build:
MODEL
The error:
ValueError: Input 0 is incompatible with layer lstm_1: expected ndim=3, found ndim=2
with the following code:
kernel_size1 = 3
kernel_size2 = 5
dropout = 0.2
learning_rate = 0.001
weights = initializers.TruncatedNormal(mean=0.0, stddev=0.1, seed=2)
nb_filter = 64
rnn_output_size = 128
hidden_dims = 512
wider = True
deeper = True
batch_size=128
def build_model():
input_news = Input(shape=(max_daily_length,), name='News_Input')
embedding = Embedding(input_dim=vocabulary_size, # size of the vocabulary
output_dim=embedding_dimension,
weights=[word_vectors],
trainable=False,
input_length=max_daily_length)(input_news)
input_price = Input(shape=(len(selected_features),),
name='Price_Input')
x = Dropout(dropout)(embedding)
x = Convolution1D(filters=nb_filter,
kernel_size=kernel_size1,
padding='same',
activation='relu')(x)
x = MaxPooling1D(pool_size=2)(x)
x = Convolution1D(filters=nb_filter,
kernel_size=kernel_size2,
padding='same',
activation='relu')(x)
x = MaxPooling1D(pool_size=2)(x)
x = Flatten(name='flate_0')(x)
x = Dense(units=1024,
activation='relu',
name='dense_0')(x)
x = Dense(units=1024,
activation='relu',
name='dense_1')(x)
model_concat = concatenate(inputs=[input_price, input_news], axis=-1)
lstm = LSTM(rnn_output_size,
activation=None,
kernel_initializer=weights,
batch_size=batch_size,
dropout=dropout)(model_concat)
model_concat = Dense(hidden_dims, kernel_initializer=weights)(lstm)
model_concat = Dropout(dropout)(model_concat)
if deeper == True:
model_concat = Dense(hidden_dims//2, kernel_initializer=weights)(model_concat)
model_concat = Dropout(dropout)(model_concat)
model_output = Dense(1, kernel_initializer=weights, name='output')(model_concat)
model = Model(inputs=[input_news, input_price], outputs=[model_output])

Predicting probabilities in classfier tensorflow

Hey i am pretty new to tensorflow. I am building a classification model basically classifying into 0/1. Is there a way to predict probability of output being 1. Can predict_proba be used over here? Its been widely used in tflearn.dnn but can't find any reference to do it in my case.
def main():
train_x,test_x,train_y,test_y = load_csv_data()
x_size = train_x.shape[1]
y_size = train_y.shape[1]
print(x_size)
print(y_size)
# variables
X = tf.placeholder("float", shape=[None, x_size])
y = tf.placeholder("float", shape=[None, y_size])
weights_1 = initialize_weights((x_size, h_size))
weights_2 = initialize_weights((h_size, y_size))
# Forward propagation
y_pred = forward_propagation(X, weights_1, weights_2)
predict = tf.argmax(y_pred, dimension=1)
# Backward propagation
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_pred))
updates_sgd = tf.train.GradientDescentOptimizer(sgd_step).minimize(cost)
# Start tensorflow session
with tf.Session() as sess:
init = tf.global_variables_initializer()
steps = 1
sess.run(init)
x = np.arange(steps)
test_acc = []
train_acc = []
print("Step, train accuracy, test accuracy")
for step in range(steps):
# Train with each example
batch_size = len(train_x)
avg_cost = 0
print(batch_size)
for i in range(len(train_x)):
_, c = sess.run([updates_sgd,cost], feed_dict={X: train_x[i: i + 1], y: train_y[i: i + 1]})
print(c)
avg_cost += c/batch_size
train_accuracy = np.mean(np.argmax(train_y, axis=1) ==
sess.run(predict, feed_dict={X: train_x, y: train_y}))
test_accuracy = np.mean(np.argmax(test_y, axis=1) ==
sess.run(predict, feed_dict={X: test_x, y: test_y}))
print(avg_cost)
print("%d, %.2f%%, %.2f%%"
% (step + 1, 100. * train_accuracy, 100. * test_accuracy))
test_acc.append(100. * test_accuracy)
train_acc.append(100. * train_accuracy)
predict = tf.argmax(y_pred,1)
test_data = load_test_data( )
print(test_data)
pred = predict.eval(feed_dict={X:test_data})
print(pred)
for x in range(0,100):
print(pred[x])
print(np.unique(pred))
main()
Here you take argmax of probabilities:
predict = tf.argmax(y_pred, dimension=1)
If you return simply "y_pred" you should get probabilities.

Declaring a function to calculate activation of a layer | How does Tensorflow version work and mine doesn't?

So I tried implementing a Convolutional Neural Network on MNIST dataset in a similar fashion as this: https://github.com/tensorflow/tensorflow/blob/r1.1/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
However, on doing that, I noticed that for some reason my second max_pool is not happening. Also, I don't understand how the code in the above link works, more specifically, how the nn_layer method can be reused as the weights exist only in that scope and calling it twice would change them?
My code:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import os
from tensorflow.contrib.tensorboard.plugins import projector
current_path = os.path.dirname(os.path.realpath(__file__))
current_path = current_path+"/logs"
def train():
mnist = input_data.read_data_sets("MNIST_data", one_hot = True)
def initializer(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def conv2d(x,W):
return tf.nn.conv2d(x , W , [1,1,1,1] , padding="SAME")
def max_pool(x):
return tf.nn.max_pool(x , [1,2,2,1] , [1,2,2,1] , padding="SAME")
def conv_layer(x,length,width,input_channels,output_channels,layer_name,act=tf.nn.relu):
with tf.name_scope(layer_name):
with tf.name_scope('weights'):
weights = initializer([length,width,input_channels,output_channels])
tf.summary.histogram(layer_name+"_weights",weights)
with tf.name_scope('biases'):
biases = initializer([output_channels])
tf.summary.histogram(layer_name+"_biases",biases)
with tf.name_scope('activations'):
activations = act(conv2d(x,weights) + biases)
activations = max_pool(activations)
tf.summary.histogram(layer_name+"_activations",activations)
return activations
def dense_layer(x,input_size,output_size,layer_name,act=tf.nn.relu):
with tf.name_scope(layer_name):
with tf.name_scope('weights'):
weights = initializer([input_size,output_size])
tf.summary.histogram(layer_name+"_weights",weights)
with tf.name_scope('biases'):
biases = initializer([output_size])
tf.summary.histogram(layer_name+"_biases",biases)
with tf.name_scope('activations'):
activations = act(tf.matmul(x,weights) + biases)
tf.summary.histogram(layer_name+"_activations",activations)
return activations
def dropout(x,keep_prob):
with tf.name_scope('Dropout'):
dropped =tf.nn.dropout(x,keep_prob)
return dropped
with tf.name_scope('input'):
x = tf.placeholder(tf.float32, [None,784],name='image_inputs')
y = tf.placeholder(tf.float32, [None,10],name='image_labels')
keep_prob = tf.placeholder(tf.float32,name='keep_probability')
with tf.name_scope('input_reshape'):
x_image = tf.reshape(x , [-1,28,28,1])
tf.summary.image('input',x_image,50)
h1 = conv_layer(x_image,3,3,1,32,"first_convolution_layer")
h2 = conv_layer(h1,3,3,32,64,"second_convolution_layer")
h2 = tf.reshape(h1,[-1,7*7*64])
h2 = dropout(h2,keep_prob)
h3 = dense_layer(h2,7*7*64,1024,"first_dense_layer")
h3 = dropout(h3,keep_prob)
h4 = dense_layer(h3,1024,1024,"second_dense_layer")
h4 = dropout(h4,keep_prob)
h_out = dense_layer(h4,1024,10,"output_dense_layer",act=tf.nn.sigmoid)
with tf.name_scope("Loss"):
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=h_out))
tf.summary.scalar('Loss',cost)
train = tf.train.AdamOptimizer().minimize(cost)
with tf.name_scope("Accuracy"):
correct_pred = tf.equal(tf.argmax(h_out, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
summary = tf.summary.merge_all()
init = tf.global_variables_initializer()
sess = tf.InteractiveSession()
sess.run(init)
saver = tf.train.Saver()
summary_writer = tf.summary.FileWriter(current_path, sess.graph)
for i in range(500):
batch = mnist.train.next_batch(500)
if(i%100 == 0):
summary_str = sess.run(summary,feed_dict={x:batch[0], y:batch[1], keep_prob:1.0})
summary_writer.add_summary(summary_str, i)
summary_writer.flush()
train_accuracy = accuracy.eval(feed_dict={x:batch[0], y:batch[1], keep_prob:1.0})
saver.save(sess, os.path.join(current_path,'model.ckpt'), i)
print("Step %d Training Accuracy: %f" %((i/100 + 1), train_accuracy))
train.run(feed_dict={x:batch[0], y:batch[1], keep_prob:0.5})
sum=0.0
for i in range(10):
batch_x = mnist.test.images[(i*1000):((i+1)*1000)-1]
batch_y = mnist.test.labels[(i*1000):((i+1)*1000)-1]
sum = sum + accuracy.eval(feed_dict={x:batch_x, y:batch_y, keep_prob:1.0})
print("Test Accuracy: %f" %(sum/10.0))
if tf.gfile.Exists(current_path):
tf.gfile.DeleteRecursively(current_path)
tf.gfile.MakeDirs(current_path)
train()
This is a simple typo.
Change this
h2 = tf.reshape(h1,[-1,7*7*64])
to this
h2 = tf.reshape(h2,[-1,7*7*64])
The error
InvalidArgumentError (see above for traceback): logits and labels must be same size: logits_size=[1000,10] labels_size=[500,10]
[[Node: Loss/SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](Loss/Reshape, Loss/Reshape_1)]]
went away.

Getting Linear Regression score from Transfer Learning

I am having a task of assigning a score from 0.0 to 1.0 to images. For this I have made use of already learned models meant for classification of ImageNet competition like VGG, SqueezeNet etc. From the output of the convoluted layers of this models, I have added my own 2 or 3 dense layers (fully connected layers), with the first few layer having a certain 'x' hidden units and last layer having only one unit. The value coming from this last layer (having one unit), I am using as score.
I am performing retraining on all the dense layers, but after I perform training, I get a constant score of around 0.75 for whichever input I send. I have a good training set of 50000 images.
Please can somebody explain me where I am going wrong in this approach. Also, some directions on how to proceed in this type of problem will be very helpful.
Important parts of Code:-
from tensorflow.python.ops import control_flow_ops
def fcLayer(images, weight, bias, should_activate = True):
fc = tf.matmul(images, weight)
bias_add = tf.nn.bias_add(fc, bias)
if not should_activate:
return bias_add
out = tf.nn.relu(bias_add)
return out
weights = np.load('../Data/vgg16_weights.npz')
def fc_VGG(pool5_flat): # Feed directly the bottleneck features.
# fc6
with tf.variable_scope('fc6'):
fc6W = tf.get_variable('fc6_W', dtype = tf.float32, trainable = True,
initializer = weights['fc6_W'])
fc6b = tf.get_variable('fc6_b', dtype = tf.float32, trainable = True,
initializer = weights['fc6_b'])
fc6 = fcLayer(pool5_flat, fc6W, fc6b)
# fc7
with tf.variable_scope('fc7'):
fc7W = tf.get_variable('fc7_W', dtype = tf.float32, trainable = True,
initializer = weights['fc7_W'])
fc7b = tf.get_variable('fc7_b', dtype = tf.float32, trainable = True,
initializer = weights['fc7_b'])
fc7 = fcLayer(fc6, fc7W, fc7b)
fc7 = tf.cond(is_train, lambda: tf.nn.dropout(fc7, keep_prob = 0.35), lambda: fc7)
with tf.variable_scope('fc8'):
fc7_shape = int(np.prod(fc7.get_shape()[1:]))
fc8W = tf.get_variable('fc8_W', dtype = tf.float32, trainable = True,
initializer = tf.random_normal((fc7_shape, new_output_units), stddev = 1e-1))
fc8b = tf.get_variable('fc8_b', dtype = tf.float32, trainable = True,
initializer = tf.ones((1)))
fc8 = fcLayer(fc7, fc8W, fc8b, should_activate = False)
return fc8
learning_rate = 0.0001
tf.reset_default_graph()
X = tf.placeholder(tf.float32, shape = (None, 25088))
y = tf.placeholder(tf.float32, shape = (None))
alpha = tf.constant(learning_rate, tf.float32)
is_train = tf.placeholder(tf.bool)
logits = fc_VGG(X)
loss = tf.reduce_mean(tf.abs(tf.subtract(logits, y)))
optimizer = tf.train.AdamOptimizer(learning_rate = alpha).minimize(loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(EPOCHS):
current_learning_rate = learning_rate * (1 - WEIGHT_DECAY)
num_examples = len(y_train)
X_train_files, y_train = shuffle(X_train_files, y_train)
for offset in range(0, num_examples, BATCH_SIZE):
end = offset + BATCH_SIZE
batch_x_files, batch_y = X_train_files[offset: end], y_train[offset: end]
batch_x = load_batchX()
_, loss_val = sess.run([optimizer, loss], feed_dict = {X: batch_x, y: batch_y,
alpha: current_learning_rate, is_train: True})
loss_history.append(loss_val)

Resources