CNN model for RGB images giving 0% accuracy - machine-learning

I am trying to train a CNN model on CelebA (RGB images) dataset. But, when I train the model and check its accuracy it is 0% or close to 0%. I think the issue is in the ConNeuralNet function or the hyperparameters but due to my limited knowledge I'm not sure what I'm missing here. Can someone please help. Thanks
# Creating a simple network
class ConvNeuralNet(torch.nn.Module):
def __init__(self, num_classes=10178):
super(ConvNeuralNet, self).__init__()
self.conv_layer1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)
self.conv_layer2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3)
self.max_pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
self.conv_layer3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
self.conv_layer4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3)
self.max_pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)
self.fc1 = nn.Linear(13312, 128)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(128, num_classes)
def forward(self, x):
out = self.conv_layer1(x)
out = self.conv_layer2(out)
out = self.max_pool1(out)
out = self.conv_layer3(out)
out = self.conv_layer4(out)
out = self.max_pool2(out)
out = out.reshape(out.size(0), -1)
out = self.fc1(out)
out = self.relu1(out)
out = self.fc2(out)
return F.log_softmax(out,dim=-1)
def trainTorch(torch_model, train_loader, test_loader,
nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, train_end=-1, test_end=-1, learning_rate=LEARNING_RATE, optimizer=None):
train_loss = []
total = 0
correct = 0
step = 0
for _epoch in range(nb_epochs):
for xs, ys in train_loader:
xs, ys = Variable(xs), Variable(ys)
if torch.cuda.is_available():
xs, ys = xs.cuda(), ys.cuda()
optimizer.zero_grad()
preds = torch_model(xs)
preds = F.log_softmax(preds, dim=1)
loss = F.cross_entropy(preds, ys)
loss.backward()
train_loss.append(loss.data.item())
optimizer.step() # update gradients
preds_np = preds.cpu().detach().numpy()
correct += (np.argmax(preds_np, axis=1) == ys.cpu().detach().numpy()).sum()
total += train_loader.batch_size
step += 1
if total % 1000 == 0:
acc = float(correct) / total
print('[%s] Training accuracy: %.2f%%' % (step, acc * 100))
total = 0
correct = 0
nb_epochs = 8
image_size = 64
batch_size = 64
num_classes = 10178
learning_rate = 0.001
num_epochs = 8
# Device will determine whether to run the training on GPU or CPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
trans = transforms.Compose([
transforms.Resize(image_size),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
train_loader = torch.utils.data.DataLoader(
datasets.CelebA('data', split='train', target_type='identity', transform=trans, download="True"),
batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
datasets.CelebA('data', split='test', target_type='identity', transform=trans),
batch_size=batch_size)
#Training the model
print("Training Model")
# Set optimizer with optimizer
optimizer = torch.optim.SGD(model1.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)
total_step = len(train_loader)
trainTorch(model1, train_loader, test_loader, nb_epochs, batch_size, train_end, test_end, learning_rate, optimizer = optimizer)

**Update I ran the code for a bit to see if it would start converging. One thing is that there are over 10,000 classes. With a batch size of 64 this means that it will take more than 150 mini-batches before your model has seen every class in your dataset. You certanly shouldn't expect the model to start achieving accurate predictions within a few hundred steps.
When I printed the loss value I noticed it was decreasing very slowly. I changed to learning rate to 0.01 and it started decreasing faster.
Also, your model is very shallow for a face recognition model. You're better off using something like a resnet variant (e.g. resnet-50 or resnet-101 from torchvision), rather than custom rolling your own model.
Primary changes include
Learning rate increased
Fix the loss function
Remove log_softmax from output of model
Add activation to the conv layers
IMO the comments about softmax are a bit misleading since you don't need to softmax the output of your model if you are using cross_entropy. You also don't need softmax to get the argmax of the prediction since both softmax and log_softmax don't change the relative ordering of the predictions (i.e. both softmax and log are strictly increasing functions).
IMO the comment about using average pooling to reduce the input size of the first fc layer is a good one and may improve performance, but you'll need to experiment with that one to find good parameters for it so I left it out of this answer.
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torchvision import datasets, transforms
# Creating a simple network
class ConvNeuralNet(torch.nn.Module):
def __init__(self, num_classes=10178):
super(ConvNeuralNet, self).__init__()
self.conv_layer1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)
self.conv_layer2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3)
self.max_pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
self.conv_layer3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
self.conv_layer4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3)
self.max_pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)
self.fc1 = nn.Linear(13312, 128)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(128, num_classes)
def forward(self, x):
# note the relu activations on the conv layers
out = F.relu(self.conv_layer1(x))
out = F.relu(self.conv_layer2(out))
out = self.max_pool1(out)
out = F.relu(self.conv_layer3(out))
out = F.relu(self.conv_layer4(out))
out = self.max_pool2(out)
# you may want an adaptive average pool 2d here to reduce size of feature map further
out = out.reshape(out.size(0), -1)
out = self.fc1(out)
out = self.relu1(out)
out = self.fc2(out)
# return raw logits, not log-softmax output
return out
def trainTorch(torch_model, train_loader, test_loader, nb_epochs, batch_size, learning_rate, optimizer):
train_loss = []
total = 0
correct = 0
step = 0
for _epoch in range(nb_epochs):
for xs, ys in train_loader:
# the Variable interface has been deprecated for years, it is effectively a no-op in modern pytorch
# see: https://pytorch.org/docs/stable/autograd.html#variable-deprecated
if torch.cuda.is_available():
xs, ys = xs.cuda(), ys.cuda()
optimizer.zero_grad()
logits = torch_model(xs)
# don't softmax or log-softmax the inputs to cross_entropy
loss = F.cross_entropy(logits, ys)
# The following is equivalent but less numerically stable
# loss = F.nll_loss(F.log_softmax(logits), ys)
loss.backward()
train_loss.append(loss.item())
optimizer.step() # update gradients
logits_np = logits.cpu().detach().numpy()
correct += (np.argmax(logits_np, axis=1) == ys.cpu().detach().numpy()).sum()
total += train_loader.batch_size
step += 1
if step % 200 == 0:
acc = float(correct) / total
avg_loss = sum(train_loss) / len(train_loss)
print(f'[{step}] Training accuracy: {acc*100:.2f}% Training loss: {avg_loss:.4f}')
total = 0
correct = 0
train_loss = []
nb_epochs = 8
image_size = 64
batch_size = 64
num_classes = 10178
# increased learning rate to 0.01
learning_rate = 0.01
num_epochs = 8
# Device will determine whether to run the training on GPU or CPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
trans = transforms.Compose([
transforms.Resize(image_size),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
train_loader = torch.utils.data.DataLoader(
datasets.CelebA('data', split='train', target_type='identity', transform=trans, download=True),
batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
datasets.CelebA('data', split='test', target_type='identity', transform=trans),
batch_size=batch_size)
model = ConvNeuralNet(num_classes)
if torch.cuda.is_available():
model.cuda()
#Training the model
print("Training Model")
# Set optimizer with optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=0.005, momentum=0.9)
total_step = len(train_loader)
trainTorch(model, train_loader, test_loader, nb_epochs, batch_size, learning_rate, optimizer=optimizer)
Output
Training Model
[200] Training accuracy: 0.00% Training loss: 9.2286
[400] Training accuracy: 0.02% Training loss: 9.2286
[600] Training accuracy: 0.04% Training loss: 9.2265
[800] Training accuracy: 0.00% Training loss: 9.2253
[1000] Training accuracy: 0.00% Training loss: 9.2222
[1200] Training accuracy: 0.00% Training loss: 9.2105
[1400] Training accuracy: 0.02% Training loss: 9.1776
[1600] Training accuracy: 0.03% Training loss: 9.1329
[1800] Training accuracy: 0.02% Training loss: 9.1013
[2000] Training accuracy: 0.02% Training loss: 9.0830
[2200] Training accuracy: 0.02% Training loss: 9.0715
[2400] Training accuracy: 0.01% Training loss: 9.0622
[2600] Training accuracy: 0.02% Training loss: 9.0456
[2800] Training accuracy: 0.00% Training loss: 9.0301
[3000] Training accuracy: 0.00% Training loss: 9.0357
[3200] Training accuracy: 0.02% Training loss: 9.0402
[3400] Training accuracy: 0.02% Training loss: 9.0321
[3600] Training accuracy: 0.02% Training loss: 9.0217
[3800] Training accuracy: 0.02% Training loss: 8.9757
[4000] Training accuracy: 0.09% Training loss: 8.9059
[4200] Training accuracy: 0.09% Training loss: 8.8331
[4400] Training accuracy: 0.09% Training loss: 8.7601
[4600] Training accuracy: 0.09% Training loss: 8.7356
[4800] Training accuracy: 0.10% Training loss: 8.6717
[5000] Training accuracy: 0.12% Training loss: 8.6311
[5200] Training accuracy: 0.16% Training loss: 8.5515
[5400] Training accuracy: 0.16% Training loss: 8.4943
[5600] Training accuracy: 0.14% Training loss: 8.4345
[5800] Training accuracy: 0.14% Training loss: 8.4107
[6000] Training accuracy: 0.18% Training loss: 8.3317
[6200] Training accuracy: 0.22% Training loss: 8.2716
[6400] Training accuracy: 0.31% Training loss: 8.1934
[6600] Training accuracy: 0.30% Training loss: 8.1500
[6800] Training accuracy: 0.35% Training loss: 8.0979
[7000] Training accuracy: 0.21% Training loss: 8.0739
[7200] Training accuracy: 0.44% Training loss: 8.0220
[7400] Training accuracy: 0.29% Training loss: 7.9819
From the output we see the loss is decreasing and the accuracy is starting to increase. Its hard to predict how well this will work and when it will converge but this is a good start. You'll probably need to use a better model and a learning rate scheduler to get better performance.
For example, just switching for a resnet-50
model = torchvision.models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
The model starts converging much faster
Training Model
[200] Training accuracy: 0.05% Training loss: 9.1942
[400] Training accuracy: 0.05% Training loss: 8.9244
[600] Training accuracy: 0.15% Training loss: 8.5936
[800] Training accuracy: 0.30% Training loss: 8.3147
[1000] Training accuracy: 0.39% Training loss: 8.0745
[1200] Training accuracy: 0.43% Training loss: 7.9146
[1400] Training accuracy: 0.45% Training loss: 7.7706
[1600] Training accuracy: 0.64% Training loss: 7.6551
[1800] Training accuracy: 0.68% Training loss: 7.5784
[2000] Training accuracy: 0.74% Training loss: 7.5327
[2200] Training accuracy: 0.72% Training loss: 7.4689
[2400] Training accuracy: 0.63% Training loss: 7.4378
[2600] Training accuracy: 0.83% Training loss: 7.3789
[2800] Training accuracy: 0.90% Training loss: 7.2812
[3000] Training accuracy: 0.84% Training loss: 7.2771
[3200] Training accuracy: 0.96% Training loss: 7.2536
[3400] Training accuracy: 1.00% Training loss: 7.2538

Related

Getting Different results on Each Iteration using Long Short Term Memory[LSTM] for text classification

I am using LTSM Deep-learning technique to classify my text, First i am dividing them into text and lables using panda library and making their tokens and then dividing them into into training and text data sets,whenever i runs the code, i get different results which varies from (80 to 100)percent.
Here is my code,
tokenizer = Tokenizer(num_words=MAX_NB_WORDS, filters='!"#$%&()*+,-./:;<=>?#[\]^_`{|}~',
lower=True)
tokenizer.fit_on_texts(trainDF['texts'])
word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))
X = tokenizer.texts_to_sequences(trainDF['texts'])
X = pad_sequences(X, maxlen=MAX_SEQUENCE_LENGTH)
print('Shape of data tensor:', X.shape)
Y = pd.get_dummies(trainDF['label'])
print('Shape of label tensor:', Y.shape)
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.10, random_state = 42)
print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)
model = Sequential()
model.add(Embedding(MAX_NB_WORDS, EMBEDDING_DIM, input_length=X.shape[1]))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
variables_for_classification=6 #change it as per your number of categories
model.add(Dense(variables_for_classification, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
epochs = 5
batch_size = 64
history = model.fit(X_train, Y_train, epochs=epochs,
batch_size=batch_size,validation_split=0.1,callbacks=[EarlyStopping(monitor='val_loss', patience=3,
min_delta=0.0001)])
accr = model.evaluate(X_test,Y_test)
print('Test set\n Loss: {:0.3f}\n Accuracy: {:0.3f}'.format(accr[0],accr[1]))
Train on 794 samples, validate on 89 samples
Epoch 1/5
794/794 [==============================] - 19s 24ms/step - loss: 1.6401 - accuracy: 0.6297 - val_loss: 0.9098 - val_accuracy: 0.5843
Epoch 2/5
794/794 [==============================] - 16s 20ms/step - loss: 0.8365 - accuracy: 0.7166 - val_loss: 0.7487 - val_accuracy: 0.7753
Epoch 3/5
794/794 [==============================] - 16s 20ms/step - loss: 0.7093 - accuracy: 0.8401 - val_loss: 0.6519 - val_accuracy: 0.8652
Epoch 4/5
794/794 [==============================] - 16s 20ms/step - loss: 0.5857 - accuracy: 0.8829 - val_loss: 0.4935 - val_accuracy: 1.0000
Epoch 5/5
794/794 [==============================] - 16s 20ms/step - loss: 0.4248 - accuracy: 0.9345 - val_loss: 0.3512 - val_accuracy: 0.8652
99/99 [==============================] - 0s 2ms/step
Test set
Loss: 0.348
Accuracy: 0.869
in the last run accuracy was 100 percent.

Keras ROC different from Scikit ROC?

From the code below, it looks like evaluating the roc with keras and with scikit actually makes a difference. Does anybody know an explanation?
import tensorflow as tf
from keras.layers import Dense, Input, Dropout
from keras import Sequential
import keras
from keras.constraints import maxnorm
from sklearn.metrics import roc_auc_score
# training data: X_train, y_train
# validation data: X_valid, y_valid
# Define the custom callback we will be using to evaluate roc with scikit
class MyCustomCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self,epoch, logs=None):
y_pred = model.predict(X_valid)
print("roc evaluated with scikit = ",roc_auc_score(y_valid, y_pred))
return
# Define the model.
def model():
METRICS = [
tf.keras.metrics.BinaryAccuracy(name='accuracy'),
tf.keras.metrics.AUC(name='auc'),
]
optimizer="adam"
dropout=0.1
init='uniform'
nbr_features= vocab_size-1 #2500
dense_nparams=256
model = Sequential()
model.add(Dense(dense_nparams, activation='relu', input_shape=(nbr_features,), kernel_initializer=init, kernel_constraint=maxnorm(3)))
model.add(Dropout(dropout))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer=optimizer,metrics = METRICS)
return model
# instantiate the model
model = model()
# fit the model
history = model.fit(x=X_train, y=y_train, batch_size = 8, epochs = 8, verbose=1,validation_data = (X_valid,y_valid), callbacks=[MyCustomCallback()], shuffle=True, validation_freq=1, max_queue_size=10, workers=4, use_multiprocessing=True)
Output:
Train on 4000 samples, validate on 1000 samples
Epoch 1/8
4000/4000 [==============================] - 15s 4ms/step - loss: 0.7950 - accuracy: 0.7149 - auc: 0.7213 - val_loss: 0.7551 - val_accuracy: 0.7608 - val_auc: 0.7770
roc evaluated with scikit = 0.78766515781747
Epoch 2/8
4000/4000 [==============================] - 15s 4ms/step - loss: 0.0771 - accuracy: 0.8235 - auc: 0.8571 - val_loss: 1.0803 - val_accuracy: 0.8574 - val_auc: 0.8954
roc evaluated with scikit = 0.7795984218252997
Epoch 3/8
4000/4000 [==============================] - 14s 4ms/step - loss: 0.0085 - accuracy: 0.8762 - auc: 0.9162 - val_loss: 1.2084 - val_accuracy: 0.8894 - val_auc: 0.9284
roc evaluated with scikit = 0.7705172905961992
Epoch 4/8
4000/4000 [==============================] - 14s 4ms/step - loss: 0.0025 - accuracy: 0.8982 - auc: 0.9361 - val_loss: 1.1700 - val_accuracy: 0.9054 - val_auc: 0.9424
roc evaluated with scikit = 0.7808804338960933
Epoch 5/8
4000/4000 [==============================] - 14s 4ms/step - loss: 0.0020 - accuracy: 0.9107 - auc: 0.9469 - val_loss: 1.1887 - val_accuracy: 0.9150 - val_auc: 0.9501
roc evaluated with scikit = 0.7811174659489438
Epoch 6/8
4000/4000 [==============================] - 14s 4ms/step - loss: 0.0018 - accuracy: 0.9184 - auc: 0.9529 - val_loss: 1.2036 - val_accuracy: 0.9213 - val_auc: 0.9548
roc evaluated with scikit = 0.7822898825544409
Epoch 7/8
4000/4000 [==============================] - 14s 4ms/step - loss: 0.0017 - accuracy: 0.9238 - auc: 0.9566 - val_loss: 1.2231 - val_accuracy: 0.9258 - val_auc: 0.9579
roc evaluated with scikit = 0.7817036742516923
Epoch 8/8
4000/4000 [==============================] - 14s 4ms/step - loss: 0.0016 - accuracy: 0.9278 - auc: 0.9592 - val_loss: 1.2426 - val_accuracy: 0.9293 - val_auc: 0.9600
roc evaluated with scikit = 0.7817419052279585
As you may see, from epoch 2 onwards keras' and scikit's validation ROCs begin diverging. The same happens if I fit the model and then use keras' model.evaluate(X_valid, y_valid). Any help is greatly appreciated.
EDIT: testing the model on a separate test set, I get roc =0.76 so scikit seems to give the correct answer ( btw X_train has 4000 entries, X_valid has 1000 and test has 15000, quite an unconventional splitting but it is forced by external factors).
Also, suggestions on how to improve performance are equally appreciated.
EDIT2: To answer the reply by #arpitrathi, i modified the callbak but unfortunately without success:
class MyCustomCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self,epoch, logs=None):
y_pred = model.predict_proba(X_valid)
print("roc evaluated with scikit = ",roc_auc_score(y_valid, y_pred))
return
model = model()
history = model.fit(x=X_trainl, y=y_train, batch_size = 8, epochs = 3, verbose=1,validation_data = (X_valid,y_valid), callbacks=[MyCustomCallback()], shuffle=True, validation_freq=1, max_queue_size=10, workers=4, use_multiprocessing=True)
Train on 4000 samples, validate on 1000 samples
Epoch 1/3
4000/4000 [==============================] - 20s 5ms/step - loss: 0.8266 - accuracy: 0.7261 - auc: 0.7409 - val_loss: 0.7547 - val_accuracy: 0.7627 - val_auc: 0.7881
roc evaluated with scikit = 0.7921764130168828
Epoch 2/3
4000/4000 [==============================] - 15s 4ms/step - loss: 0.0482 - accuracy: 0.8270 - auc: 0.8657 - val_loss: 1.0831 - val_accuracy: 0.8620 - val_auc: 0.9054
roc evaluated with scikit = 0.78525915504445
Epoch 3/3
4000/4000 [==============================] - 15s 4ms/step - loss: 0.0092 - accuracy: 0.8794 - auc: 0.9224 - val_loss: 1.2226 - val_accuracy: 0.8928 - val_auc: 0.9340
roc evaluated with scikit = 0.7705555215724655
Also, if I plot training and validation accuracy, i see that they both rapidly converge to 1. Is it strange?
The problem lies in the arguments that you passed to the sklearn function for roc_auc_score() calculation. You should use model.predict_proba() instead of model.predict().
def on_epoch_end(self,epoch, logs=None):
y_pred = model.predict_proba(X_valid)
print("roc evaluated with scikit = ",roc_auc_score(y_valid, y_pred))
return
Sklearn and keras use different default parameters when computing AUC. Increasing the number of thresholds keras uses to compute AUC (i.e., increasing num_thresholds) can help the keras AUC better match the sklearn AUC.

Keras accuracy never exceeds 19%

I am taking the images from the SVHN (street view house number dataset, stanford) and I could really use some help in figuring out why my accuracy does not increase past 19%... This is essentially an MNIST tutorial with more difficult images (other numbers could be off center, blurs, shadows etc..)
I essentially take each image and subtract that image's mean then I normalize to 0-1 (divide by 255.)
The pipeline is simple enough:
2 Convolution 2d Layers (32 filters, 3x3)
MaxPool (2x2)
Dropout (.25)
2 Convolution 2d layers (64 filters, 3x3)
Max Pool (2x2)
Dropout(.25)
Flatten
Dense Relu
Dropout(.5)
Dense Softmax (10)
1792/73257 [..............................] - ETA: 3:17 - loss: 2.3241 - acc: 0.1602
1920/73257 [..............................] - ETA: 3:16 - loss: 2.3203 - acc: 0.1625
2048/73257 [..............................] - ETA: 3:14 - loss: 2.3177 - acc: 0.1621
2176/73257 [..............................] - ETA: 3:13 - loss: 2.3104 - acc: 0.1682
...
...
...
53376/73257 [====================>.........] - ETA: 51s - loss: 2.2439 - acc: 0.1879
53504/73257 [====================>.........] - ETA: 51s - loss: 2.2439 - acc: 0.1879
53632/73257 [====================>.........] - ETA: 50s - loss: 2.2439 - acc: 0.1878
53760/73257 [=====================>........] - ETA: 50s - loss: 2.2439 - acc: 0.1879
Can anyone help me figure out what I'm doing wrong? Are there any tips to figuring out why it would increase in the beginning as normal then taper off so quickly?
I am using categorical cross entropy with an rmsprop optimizer
epochs: 20
batch_size: 128
image_size: 32x32
model = Sequential()
model.add(Convolution2D(32, (3, 3),
strides=1,
activation='relu',
padding='same',
input_shape=input_shape,
data_format='channels_last'))
model.add(Convolution2D(32, (3, 3), padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2), data_format='channels_last'))
model.add(Dropout(0.25))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(model.output_shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
#METHOD1
# print('compiling model...')
# model.compile(loss='mean_squared_error',
# optimizer='sgd',
# metrics=['accuracy'])
# print('fitting model...')
#
# model.fit(X_train, y_train, batch_size=64, epochs=1, verbose=1)
# METHOD2
sgd = SGD(lr=0.05)
model.compile(loss='categorical_crossentropy',
optimizer=sgd,
metrics=['accuracy'])
model.fit(X_train, y_train,
epochs=20,
batch_size=128)
score = model.evaluate(X_test, y_test, batch_size=128)

Linear regression model accuracy is always 1.0 in tensorflow

Problem:
I am building a model that will predict housing price. So, firstly I
decided to build a Linear regression model in Tensorflow. But when I
start training I see that my accuracy is always 1
I am new to machine learning. Please, someone, tell me what's going wrong I can't figure it out. I searched in google but doesn't find any answer that solves my problem.
Here's my code
df_train = df_train.loc[:, ['OverallQual', 'GrLivArea', 'GarageArea', 'SalePrice']]
df_X = df_train.loc[:, ['OverallQual', 'GrLivArea', 'GarageArea']]
df_Y = df_train.loc[:, ['SalePrice']]
df_yy = get_dummies(df_Y)
print("Shape of df_X: ", df_X.shape)
X_train, X_test, y_train, y_test = train_test_split(df_X, df_yy, test_size=0.15)
X_train = np.asarray(X_train).astype(np.float32)
X_test = np.asarray(X_test).astype(np.float32)
y_train = np.asarray(y_train).astype(np.float32)
y_test = np.asarray(y_test).astype(np.float32)
X = tf.placeholder(tf.float32, [None, num_of_features])
y = tf.placeholder(tf.float32, [None, 1])
W = tf.Variable(tf.zeros([num_of_features, 1]))
b = tf.Variable(tf.zeros([1]))
prediction = tf.add(tf.matmul(X, W), b)
num_epochs = 20000
# calculating loss
cost = tf.reduce_mean(tf.losses.softmax_cross_entropy(onehot_labels=y, logits=prediction))
optimizer = tf.train.GradientDescentOptimizer(0.00001).minimize(cost)
correct_prediction = tf.equal(tf.argmax(prediction, axis=1), tf.argmax(y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(num_epochs):
if epoch % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={X: X_train, y: y_train})
print('step %d, training accuracy %g' % (epoch, train_accuracy))
optimizer.run(feed_dict={X: X_train, y: y_train})
print('test accuracy %g' % accuracy.eval(feed_dict={
X: X_test, y: y_test}))
Output is:
step 0, training accuracy 1
step 100, training accuracy 1
step 200, training accuracy 1
step 300, training accuracy 1
step 400, training accuracy 1
step 500, training accuracy 1
step 600, training accuracy 1
step 700, training accuracy 1
............................
............................
step 19500, training accuracy 1
step 19600, training accuracy 1
step 19700, training accuracy 1
step 19800, training accuracy 1
step 19900, training accuracy 1
test accuracy 1
EDIT:
I changed my cost function to this
cost = tf.reduce_sum(tf.pow(prediction-y, 2))/(2*1241)
But still my output is always 1.
EDIT 2:
In response to lejlot comment:
Thanks lejlot. I changed my accuracy code to this
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter("/tmp/hpp1")
writer.add_graph(sess.graph)
for epoch in range(num_epochs):
if epoch % 5:
s = sess.run(merged_summary, feed_dict={X: X_train, y: y_train})
writer.add_summary(s, epoch)
sess.run(optimizer,feed_dict={X: X_train, y: y_train})
if (epoch+1) % display_step == 0:
c = sess.run(cost, feed_dict={X: X_train, y: y_train})
print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(c), \
"W=", sess.run(W), "b=", sess.run(b))
print("Optimization Finished!")
training_cost = sess.run(cost, feed_dict={X: X_train, y: y_train})
print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')
But the output is all nan
Output:
....................................
Epoch: 19900 cost= nan W= nan b= nan
Epoch: 19950 cost= nan W= nan b= nan
Epoch: 20000 cost= nan W= nan b= nan
Optimization Finished!
Training cost= nan W= nan b= nan
You want to use linear regression, but you actually use logistic regression. Take a look at tf.losses.softmax_cross_entropy: it outputs a probability distribution, i.e. a vector of numbers that sum up to 1. In your case, the vector has size=1, hence it always outputs [1].
Here are two examples that will help you see the difference: linear regression and logistic regression.

LSTM labeling all samples as the same class

I'm trying to design an LSTM network using Keras to combine word embeddings and other features in a binary classification setting. My test set contains 250 samples per class.
When I run my model using only the word embedding layers (the "model" layer in the code), I get an average F1 of around 0.67. When I create a new branch with the other features of fixed size that I compute separately ("branch2") and merge these with the word embeddings using "concat", the predictions all revert to a single class (giving perfect recall for that class), and average F1 drops to 0.33.
Am I adding in the features and training/testing incorrectly?
def create_model(embedding_index, sequence_features, optimizer='rmsprop'):
# Branch 1: word embeddings
model = Sequential()
embedding_layer = create_embedding_matrix(embedding_index, word_index)
model.add(embedding_layer)
model.add(Convolution1D(nb_filter=32, filter_length=3, border_mode='same', activation='tanh'))
model.add(MaxPooling1D(pool_length=2))
model.add(Bidirectional(LSTM(100)))
model.add(Dropout(0.2))
model.add(Dense(2, activation='sigmoid'))
# Branch 2: other features
branch2 = Sequential()
dim = sequence_features.shape[1]
branch2.add(Dense(15, input_dim=dim, init='normal', activation='tanh'))
branch2.add(BatchNormalization())
# Merging branches to create final model
final_model = Sequential()
final_model.add(Merge([model,branch2], mode='concat'))
final_model.add(Dense(2, init='normal', activation='sigmoid'))
final_model.compile(loss='categorical_crossentropy', optimizer=optimizer,
metrics=['accuracy','precision','recall','fbeta_score','fmeasure'])
return final_model
def run(input_train, input_dev, input_test, text_col, label_col, resfile, embedding_index):
# Processing text and features
data_train, labels_train, data_test, labels_test = vectorize_text(input_train, input_test, text_col,label_col)
x_train, y_train = data_train, labels_train
x_test, y_test = data_test, labels_test
seq_train = get_sequence_features(input_train).as_matrix()
seq_test = get_sequence_features(input_test).as_matrix()
# Generating model
filepath = lstm_config.WEIGHTS_PATH
checkpoint = ModelCheckpoint(filepath, monitor='val_fmeasure', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
model = create_model(embedding_index, seq_train)
model.fit([x_train, seq_train], y_train, validation_split=0.33, nb_epoch=3, batch_size=100, callbacks=callbacks_list, verbose=1)
# Evaluating
scores = model.evaluate([x_test, seq_test], y_test, verbose=1)
time.sleep(0.2)
preds = model.predict_classes([x_test, seq_test])
preds = to_categorical(preds)
print(metrics.f1_score(y_true=y_test, y_pred=preds, average="micro"))
print(metrics.f1_score(y_true=y_test, y_pred=preds, average="macro"))
print(metrics.classification_report(y_test, preds))
Output:
Using Theano backend. Found 2999999 word vectors.
Processing text dataset Found 7165 unique tokens.
Shape of data tensor: (1996, 50)
Shape of label tensor: (1996, 2)
1996 train 500 test
Train on 1337 samples, validate on 659 samples
Epoch 1/3 1300/1337
[============================>.] - ETA: 0s - loss: 0.6767 - acc:
0.6669 - precision: 0.5557 - recall: 0.6815 - fbeta_score: 0.6120 - fmeasure: 0.6120Epoch 00000: val_fmeasure im1337/1337
[==============================] - 10s - loss: 0.6772 - acc: 0.6672 -
precision: 0.5551 - recall: 0.6806 - fbeta_score: 0.6113 - fmeasure:
0.6113 - val_loss: 0.7442 - val_acc: 0 .0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - val_fbeta_score: 0.0000e+00 - val_fmeasure: 0.0000e+00
Epoch 2/3 1300/1337
[============================>.] - ETA: 0s - loss: 0.6634 - acc:
0.7269 - precision: 0.5819 - recall: 0.7292 - fbeta_score: 0.6462 - fmeasure: 0.6462Epoch 00001: val_fmeasure di1337/1337
[==============================] - 9s - loss: 0.6634 - acc: 0.7263 -
precision: 0.5830 - recall: 0.7300 - fbeta_score: 0.6472 - fmeasure:
0.6472 - val_loss: 0.7616 - val_acc: 0. 0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - val_fbeta_score: 0.0000e+00 - val_fmeasure: 0.0000e+00
Epoch 3/3 1300/1337
[============================>.] - ETA: 0s - loss: 0.6542 - acc:
0.7354 - precision: 0.5879 - recall: 0.7308 - fbeta_score: 0.6508 - fmeasure: 0.6508Epoch 00002: val_fmeasure di1337/1337
[==============================] - 8s - loss: 0.6545 - acc: 0.7337 -
precision: 0.5866 - recall: 0.7307 - fbeta_score: 0.6500 - fmeasure:
0.6500 - val_loss: 0.7801 - val_acc: 0. 0000e+00 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - val_fbeta_score: 0.0000e+00 - val_fmeasure: 0.0000e+00 500/500 [==============================] - 0s
500/500 [==============================] - 1s
0.5 /usr/local/lib/python3.4/dist-packages/sklearn/metrics/classification.py:1074:
UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in
labels with no predicted samples. 'precision', 'predicted', average,
warn_for)
0.333333333333 /usr/local/lib/python3.4/dist-packages/sklearn/metrics/classification.py:1074:
UndefinedMetricWarning: Precision and F-score are ill-defined and
being set to 0.0 in labels with no predicted samples.
precision recall f1-score support
0 0.00 0.00 0.00 250
1 0.50 1.00 0.67 250
avg / total 0.25 0.50 0.33 500

Resources