getting the classification labels for torchvision's pretrained networks - image-processing

Pytorch's torchvision package provides pre-trained neural networks for image classification. I've been using the following code to classify an image using Alexnet (note: some of this code is from this webpage):
from PIL import Image
import torch
from torchvision import transforms
from torchvision import models
# function to transform image
transform = transforms.Compose([
transforms.Resize(224),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])])
# image
img = Image.open('/path/to/image.jpg')
img = transform(img)
img = torch.unsqueeze(img, 0)
# alexnet
alexnet = models.alexnet(pretrained=True)
alexnet.eval()
out = alexnet(img)
percents = torch.nn.functional.softmax(out, dim=1)[0] * 100
top5_vals, top5_inds = percents.topk(5)
There are 1,000 total classes, and the top5_inds variable gives me the indices of the top 5 classes. But how do I get the associated labels (e.g. snail, basketball, banana)? I can't seem to find any sort of list as part of Pytorch's documentation or the alexnet variable.

Torchvision models are pretrained on the ImageNet dataset. Due to its comprehensiveness and size, ImageNet is the most commonly used dataset for pretraining & transfer learning. As you noted, it has 1000 classes. The complete class list can be searched, or you can refer to this listing on GitHub: https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a

Related

Can I use autoencoders for dimensionality reduction with a very small dataset?

I have a numeric dataset with just 55 samples and 270 features. I'm trying to separate these samples into clusters, however, it is hard to perform clustering in such high-dimensional space. Thus, I'm thinking about using autoencoders for performance dimensionality reduction, but I'm not sure if it is possible to do that with such a small dataset. Notice that this application is useful because the idea is to allow it to deal with different datasets that have similar characteristics.
With the following code, using Mean Squared Error as the loss function, I have achieved a loss of 4.9, which I think that is high. Notice that the dataset is already normalized.
Is it possible to use autoencoders for dimensionality reduction in this case?
This is the source for building the autoencoder and training it:
import keras
import tensorflow as tf
from keras import layers
from keras import regularizers
from keras.datasets import mnist
import numpy as np
import matplotlib.pyplot as plt
from keras.callbacks import EarlyStopping
features = 0
preservationRatio = 0.99
epochs = 500
data = loadData("dataset.csv")
samples = len(data)
features = len(data[0])
x_train = data
x_test = data
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1,min_delta=0.001, patience=50)
encoding_dim = int(features*preservationRatio)
input_number = keras.Input(shape=(features,))
# Add a Dense layer with a L1 activity regularizer
encoded = layers.Dense(encoding_dim, activation='relu',
activity_regularizer=regularizers.l1(1e-7))(input_number)
decoded = layers.Dense(features, activation='sigmoid')(encoded)
autoencoder = keras.Model(input_number, decoded)
encoder = keras.Model(input_number, encoded)
# This is our encoded input
encoded_input = keras.Input(shape=(encoding_dim,))
# Retrieve the last layer of the autoencoder model
decoder_layer = autoencoder.layers[-1]
# Create the decoder model
decoder = keras.Model(encoded_input, decoder_layer(encoded_input))
autoencoder.compile(optimizer='adam', loss=tf.keras.losses.MeanSquaredError())
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
print(x_train.shape)
print(x_test.shape)
history = autoencoder.fit(x_train, x_train,
epochs=epochs,
batch_size=20,
callbacks=[es],
shuffle=True,
validation_data=(x_test, x_test),
verbose = 1)

how to get more accuracy on CNN with less number of images

currently I am working on flower Classification dataset of kaggle which has only 210 images, with this set of image I am getting accuracy of only 11% on validation set.
enter code here
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
#from tqdm import tqdm
import os
import warnings
warnings.filterwarnings('ignore')
flower_img = r'C:\Users\asus\Downloads\flower_images\flower_images'
data = pd.read_csv(r'C:\Users\asus\Downloads\flower_images\flower_labels.csv')
img = os.listdir(flower_img)[1]
image_name = [img.split('.')[-2] for img in os.listdir(flower_img)]
label_array = np.array(data['label'])
label_unique = np.unique(label_array)
names = [' phlox','rose','calendula','iris','leucanthemum maximum','bellflower','viola','rudbeckia laciniata','peony','aquilegia']
Flower_names = {}
for i in range(10):
Flower_names[i] = names[i]
print(Flower_names)
Flower_names.get(8)
x = data['label'][2]
Flower_names.get(x)
i=0
for img in os.listdir(flower_img):
#print(img)
path = os.path.join(flower_img,img)
#img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
img = cv2.imread(path)
#print(img.shape)
img = cv2.resize(img,(128,128))
data['file'][i] = np.array(img)
i+=1
data['file'][0].shape
plt.imshow(data['file'][0])
plt.show()
import keras
from keras.models import Sequential
from keras.layers import Dense,Conv2D,Activation,MaxPool2D,Dropout,Flatten
model = Sequential()
model.add(Conv2D(32,kernel_size=3,activation='relu',input_shape=(128,128,3)))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Conv2D(64,kernel_size=3,activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Conv2D(128,kernel_size=3,activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
#model.add(Conv2D(512,kernel_size=3,activation='relu'))
#model.add(MaxPool2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(512,activation='relu'))
model.add(Dense(10,activation='softmax'))
model.add(Dropout(0.25))
from keras.optimizers import Adam
model.compile(loss='categorical_crossentropy',optimizer=Adam(lr=0.002),metrics=['accuracy'])
model.summary()
x = np.array([i for i in data['file']]).reshape(-1,128,128,3)
y = np.array([i for i in data['label']])
from keras.utils import to_categorical
y = to_categorical(y)
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y)
model.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=10)
model.evaluate(x_test,y_test)
model.evaluate(x_train,y_train)
how can I increase accuracy only using this dataset also how can I predict classes for any input image.
Link of Flower color images dataset : https://www.kaggle.com/olgabelitskaya/flower-color-images
Your dataset size is very small. Convolutional neural networks are optimal when trained using very large data sets. You really want to have thousands of images (or more!) in your data set.
You can try to enhance your current data set by using various image processing techniques to increase the size of the data set. These techniques will take the original images, skew them, rotate them and do other modification to bolster the size of the training data. These techniques can be helpful, but increasing the natural size of the data set is preferred.
If you cannot increase the size of the dataset, you should examine why you need to use a CNN. There are other algorithms that may give better results when trained with a smaller data set. Take a look at Support Vector Machines or k-NN.
If you must use a CNN, Transfer Learning is a good solution. You can use the features from a trained model and apply them to your problem. I have had great success with this approach.
The things you can do:
Progressive resizing link
Image augmentation link
Transfer learning link
To be honest, there are much and much more techniques could be utilized to enhance the effectiveness of used data. Try to search about this topic. These ones are the ones that I remember in a minute. These ones that I've given link are just major example ones. You can dig better with a dedicated research.

Keras error in Dense layer, expected 4 dimensions got array with shape (1024,2) [duplicate]

This question already has an answer here:
Multi-dimensional input layers in Keras
(1 answer)
Closed 5 years ago.
I'm attempting to train a model of 3 layer Dense Neural Network using Keras with a GPU enabled Tensorflow backend.
The dataset I have is 4 million 20x40px images that I placed in directories with the name of the category they belong to.
Because of the large amount of data I can't just load it all into RAM and feed it to my model so I thought using Keras's ImageDataGenerator, specifically the function flow_from_directory() would do the trick. This yields a tuple of (x, y) where x is the numpy array of the image and y is the label of the image.
I expected the model to know to access the numpy array to be given as input for my model so I setup my input shape to be: (None,20,40,3) where None is the batch size, 20 and 40 are size of the image and 3 are the number of channels in the image. This does not work however as when I try to train my model I keep getting the error:
ValueError: Error when checking target: expected dense_3 to have 4 dimensions, but got array with shape (1024, 2)
I know the cause is that it is getting the tuple from flow_from_directoy and I guess I could change the input shape to match, however, I fear that this would render my model useless as I will be using images to make predictions not a pre-categorized tuple. So my question is, how can I get flow_from_directory to feed the image to my model and only use the tuple to validate it's training? Am I misunderstanding something here?
For reference, here is my code:
from keras.models import Model
from keras.layers import *
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import TensorBoard
# Prepare the Image Data Generator.
train_datagen = ImageDataGenerator()
test_datagen = ImageDataGenerator()
train_generator = train_datagen.flow_from_directory(
'/path/to/train_data/',
target_size=(20, 40),
batch_size=1024,
)
test_generator = test_datagen.flow_from_directory(
'/path/to/test_data/',
target_size=(20, 40),
batch_size=1024,
)
# Define input tensor.
input_t = Input(shape=(20,40,3))
# Now create the layers and pass the input tensor to it.
hidden_1 = Dense(units=32, activation='relu')(input_t)
hidden_2 = Dense(units=16)(hidden_1)
prediction = Dense(units=1)(hidden_2)
# Now put it all together and create the model.
model = Model(inputs=input_t, outputs=prediction)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
# Prepare Tensorboard callback and start training.
tensorboard = TensorBoard(log_dir='./graph', histogram_freq=0, write_graph=True, write_images=True)
print(test_generator)
model.fit_generator(
train_generator,
steps_per_epoch=2000,
epochs=100,
validation_data=test_generator,
validation_steps=800,
callbacks=[tensorboard]
)
# Save trained model.
model.save('trained_model.h5')
Your input shape is wrong for Dense layers.
Dense layers expect inputs in the shape (None,length).
You'll either need to reshape your inputs so that they become vectors:
imageBatch=imageBatch.reshape((imageBatch.shape[0],20*40*3))
Or use convolutional layers, that expect that type of input shape (None,nRows,nCols,nChannels) like in tensorflow.

How to prevent simple keras autoencoder from over compressing data?

I am trying to use the keras frontend with tensorflow backend for a simple autoencoder as a multidimensional scaling technique to plot multidimensional data into 2 dimensions. Many times when I run it (not sure how to set random seed for keras btw) one of the dimensions is collapsed to yield a 1 dimensional embedding (the plot should help explain). Why is this happening? How can I make sure the dimensions are preserved and utilized by the autoencoder? I realize this is the most simple and basic form of an autoencoder that I have implemented but I would like to build on this to make better autoencoders for this task.
from sklearn.datasets import load_iris
from sklearn import model_selection
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Load data
X = load_iris().data
Y = pd.get_dummies(load_iris().target).as_matrix()
X_tr, X_te, Y_tr, Y_te = model_selection.train_test_split(X,Y, test_size=0.3, stratify=Y.argmax(axis=1))
dims = X_tr.shape[1]
n_classes = Y_tr.shape[1]
# Autoencoder
encoding_dim = 2
# this is our input placeholder
input_data = tf.keras.Input(shape=(4,))
# "encoded" is the encoded representation of the input
encoded = tf.keras.layers.Dense(encoding_dim,
activation='relu',
)(input_data)
# "decoded" is the lossy reconstruction of the input
decoded = tf.keras.layers.Dense(4, activation='sigmoid')(encoded)
# this model maps an input to its reconstruction
autoencoder = tf.keras.models.Model(input_data, decoded)
# this model maps an input to its encoded representation
encoder = tf.keras.models.Model(input_data, encoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
network_training = autoencoder.fit(X_tr, X_tr,
epochs=100,
batch_size=5,
shuffle=True,
verbose=False,
validation_data=(X_te, X_te))
# Plot data
embeddings = encoder.predict(X_te)
plt.scatter(embeddings[:,0], embeddings[:,1], c=Y_te.argmax(axis=1), edgecolor="black", linewidth=1)
Run algorithm once
Run algorithm again

Using SVM after feature extraction for Nudity Detection Algorithm

Nudity Detection Algorithm
normalization
Zoning
Feature Extraction
Classification using SVM
Following methods are use
1.Normalization: First image is converted into .jpg format and size of 256X256.Then it is converted into YCbCr color space,for this i use OpenCV python.Here is the code.
2.Zoning: Normalized Images are then divided into three zones.It is because assumption is "Nudity of images are found mostly in Central zone".
3.Feature Extraction: In this module Image is in YCbCr,Skin pixels are filtered by thresholding in range (0,133,77),(255,173,127) and divided into three zones.and then for each zones features are calculated 2 color features(number of connected skin pixels and proportion of skin pixel to total pixel) and 2 texture features(Homogeneity and correlation).texture features are calculated using glcm (skimage.features module).here is the code
import os
import numpy as np
import cv2
from cv2 import cv
import skimage.feature as sf
total_pixels=256.0*256.0
class normalize:
def __init__(self,src,dst):
self.src=src
self.dst=dst+"_1.jpg"
def resize(self):
x,y=256,256
src=cv2.imread(self.src,1)
src=cv2.resize(src,(x,y))
cv2.imwrite(self.dst,src)
dst=cv2.imread(self.dst,1)
return dst
"""Segmentation module is used to segment out skin pixels in YCrCb color space"""
def segmentation(src):
img=src.copy()
img=cv2.cvtColor(src,cv.CV_BGR2YCrCb)
dst=cv2.inRange(img,(0,133,77),(255,173,127))
return dst
"""Image Zoning and feature extraction module"""
class features:
def __init__(self,src):
self.zone1=src
self.zone2=src[30:226,30:226]
self.zone3=src[60:196,60:196]
def createglcm(self,zone):
return sf.greycomatrix(zone,[1],[0,np.pi/4,np.pi/2,-np.pi/2,-np.pi/4,np.pi*25/12],normed=True)
def getCorrelation(self,glcm):
return sf.greycoprops(glcm,'correlation')
def getHomogeneity(self,glcm):
return sf.greycoprops(glcm,'homogeneity')
def getcolorfeatures(self,zone):
contours, hierarchy = cv2.findContours(zone,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
skin_pixel_connected=0
for i in range(len(contours)):
skin_pixel_connected=skin_pixel_connected+cv2.contourArea(contours[i])
return [skin_pixel_connected,skin_pixel_connected/total_pixels]
Now i have retrieved a list of various features as given in code. How to make feature vector for svm from python lists.How to use SVM for training by using nude and non-nude image(i have 5000 images) and then for detection.? Can any body suggest me.
You create a SVM the objekt.
Train your SVM, using the fit method with your training picture.
Use the predict method to predict on your test/data.
code:
from sklearn import svm
clf = svm.SVC()
clf.fit(X,y)
clf.predict(X_test)
For the feature Vector X simply merge your features into one np array for every train data.
After cross-validation strategy,C=100.00 and gamma=0.07
this is what my code looks:
from sklearn.svm import SVC
classifier=SVC(kernel='rbf',C=100.0,gamma=0.07,cache_size=800)
classifier.fit(np.array(featurespace),np.array(classes))
classifier.predict(X_test)

Resources