Model Not Learning with many training data - image-processing

So Im trying to make a colorizer using Convolutional neural network. I searched for some models form the internet for this and I think they work because I have tested them on one image input the gray scale and made the model memorize the image. It doesnt look like exactly the original image but the gray scale becomes colored which I think is atleast OK.
So I tried to feet a lot of data on the model and some epochs around 10 to 30 and the model outputs black and white image when processed. Im not really sure why this is happening. Please help.
from PIL import Image
import numpy
import glob
import os
import cv2
import numpy as np
from tqdm import tqdm
import torch
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from skimage import io, color
#Neural Network model
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
# class lanscapes():
if torch.cuda.is_available():
device=torch.device("cuda:0")
print("running on the GPU")
else:
device=torch.device("cpu")
print("running on the CPU")
label = 'training'
IMG_SIZE=800
size=5
REBUILD_DATA = True
class landscapes():
# imagePath = glob.glob(imageFolderPath + '/*.JPG')
def make_training_data(self):
global training_data
training_data=[]
for f in tqdm(os.listdir(label)):
path = os.path.join(label, f)
img = cv2.imread(path)
img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
training_data.append(img)
training_data=numpy.asarray(training_data)
f, axarr = plt.subplots(2)
axarr[0].imshow(training_data[0])
axarr[1].imshow(training_data[1])
def rgb2lab(self,training_data):
global L
global ab
L=np.zeros((size,800,800))
ab=np.zeros((size,800,800,2))
for i in tqdm(range(size)):
L[i]= color.rgb2lab(1.0/255*training_data[i])[:,:,0] #grayscale image
ab[i] = color.rgb2lab(1.0/255*training_data[i])[:,:,1:]
ab=ab/128
# L=L/100
L=torch.Tensor([L]).reshape(size,1,800,800)
ab=torch.Tensor([ab]).reshape(size,2,800,800)
return L,ab
if REBUILD_DATA:
td=landscapes()
td.make_training_data()
# training_data=np.load("training_data.npy",allow_pickle=True)
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1=nn.Conv2d(1,32, 3,stride=(1, 1), padding=(1, 1)) #conv layer with 3 kernel size
self.conv2=nn.Conv2d(32,32, 3,stride=(2, 2), padding=(1, 1))
self.conv3=nn.Conv2d(32,64, 3,stride=(1, 1), padding=(1, 1))
self.conv3a=nn.Conv2d(64,64, 3,stride=(2, 2), padding=(1, 1))
self.conv4=nn.Conv2d(64,128, 3,stride=(1, 1), padding=(1, 1))
self.conv5=nn.Conv2d(128,128, 3,stride=(2, 2), padding=(1, 1))
self.conv6=nn.Conv2d(128,256, 3,stride=(1, 1), padding=(1, 1))
self.conv7=nn.Conv2d(256,128, 3,stride=(1, 1), padding=(1, 1))
self.conv8=nn.Conv2d(128,64, 3,stride=(1, 1), padding=(1, 1))
self.conv9=nn.Conv2d(64,32, 3,stride=(1, 1), padding=(1, 1))
self.conv10=nn.Conv2d(32,16, 3,stride=(1, 1), padding=(1, 1))
self.conv11=nn.Conv2d(16,2, 3,stride=(1, 1), padding=(1, 1))
self.up=nn.Upsample(scale_factor=2)
def forward(self,x):
x=F.relu(self.conv1(x))
x=F.relu(self.conv2(x))
x=F.relu(self.conv3(x))
x=F.relu(self.conv3a(x))
x=F.relu(self.conv4(x))
x=F.relu(self.conv5(x))
x=F.relu(self.conv6(x))
x=F.relu(self.conv7(x))
x=F.relu(self.conv8(x))
x=self.up(x)
x=F.relu(self.conv9(x))
x=self.up(x)
x=F.relu(self.conv10(x))
x=torch.tanh(self.conv11(x))
x=self.up(x)
# x=torch.tanh(self.conv8(x))
# return torch.tanh(y,dim=1) #output
return x
# net=Net()
net = Net().to(device)
optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function=nn.MSELoss()
Lb=np.zeros((1,800,800))
def fwd_pass(X,y,train=False):#checking if youre training
if train:
net.zero_grad()
outputs=net(X)
loss=loss_function(outputs,y)
if train:
loss.backward()
optimizer.step()
return loss
def train():
EPOCHS=10
for epoch in range(EPOCHS):
for i in tqdm(range(size)):
# Lb=L[i].reshape[1,1,800,800]
loss=fwd_pass(L[i].view(1,1,800,800).to(device),ab[i].view(1,2,800,800).to(device),train=True)
print(f"Epoch: {epoch}. Loss: {loss}")
def test():
X="datasets_298806_1217826_00000000_(2).jpg"
path=X
path = cv2.imread(path)
patha=cv2.cvtColor(path, cv2.COLOR_BGR2RGB)
path = cv2.resize(patha, (800,800))
gs= color.rgb2lab(1.0/255*path)[:,:,0] #transform test to input
abt = color.rgb2lab(1.0/255*path)[:,:,1:]
# gs/100
abt=abt/128
gs=torch.Tensor([gs]).reshape(1,1,800,800)
abt=torch.Tensor([abt]).reshape(1,2,800,800)
gs=gs.cuda()
abt=abt.cuda()
losst=fwd_pass(gs.to(device),abt.to(device),train=False)
print(f" Loss: {losst}")
out=net(gs)
out=out*128
canvas=np.zeros((800,800,3))
gs=gs.reshape(800,800)
gs=gs.cpu().detach().numpy()
# out=out.cpu().data.numpy()
canvas[:,:,0] = gs
out=out.cpu().detach().numpy()
out=out.reshape(800,800,2)
canvas[:,:,1:] = out
canvas=color.lab2rgb(canvas)
f, axarr = plt.subplots(2)
axarr[0].imshow(canvas)
# axarr[1].imshow(X)
axarr[1].imshow(patha)
plt.imsave('model.jpg', canvas)
plt.imsave('orig.jpg', path)
L,ab=td.rgb2lab(training_data)
L=L.cuda()
ab=ab.cuda()
train()
test()
# output=net(L)
# f, axarr = plt.subplots(2)
# axarr[0].imshow(L[1])
# canvas=np.zeros((800,800,3))
# # axarr[1].imshow(ab[1])
# canvas[:,:,0] = L[1].reshape(800,800)
# canvas[:,:,1:] = ab[1]
# canvas=color.lab2rgb(canvas)
# axarr[1].imshow(canvas)

I think the main problem is that you should either use batching, or accumulate gradients, or reduce LR.
Also, I suggest to plot your train loss. If train.loss decreases normally, add validation set and plot val.loss as well. You may need to add BatchNorm, augmentations, reduce image size, tune optimizer...
You can also find some inspiration in this article or find a good implementation of an auto-encoder colorization.

Related

How to get prediction and confidence of that prediction using resnet

I have a binary classifier which predicts whether the image is positive or negative. I am using model.predict for getting the detections. So basically what I want is the class index and the confidence value with which it belongs to that class. I am able to get the detections and able to show it on the image, but for background images also it is showing some false predictions so I would like to remove those by setting a threshold for the confidence. For information about the training file and testing file I have asked a question on StackOverflow, please refer the link "Resnet is showing wrong predictions even without any object"
My Resnet code:
# import the necessary packages
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import AveragePooling2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.convolutional import ZeroPadding2D
from keras.layers.core import Activation
from keras.layers.core import Dense
from keras.layers import Flatten
from keras.layers import Input
from keras.models import Model
from keras.layers import add
from keras.regularizers import l2
from keras import backend as K
class ResNet:
#staticmethod
def residual_module(data, K, stride, chanDim, red=False,
reg=0.0001, bnEps=2e-5, bnMom=0.9):
# the shortcut branch of the ResNet module should be
# initialize as the input (identity) data
shortcut = data
# the first block of the ResNet module are the 1x1 CONVs
bn1 = BatchNormalization(axis=chanDim, epsilon=bnEps,
momentum=bnMom)(data)
act1 = Activation("relu")(bn1)
conv1 = Conv2D(int(K * 0.25), (1, 1), use_bias=False,
kernel_regularizer=l2(reg))(act1)
# the second block of the ResNet module are the 3x3 CONVs
bn2 = BatchNormalization(axis=chanDim, epsilon=bnEps,
momentum=bnMom)(conv1)
act2 = Activation("relu")(bn2)
conv2 = Conv2D(int(K * 0.25), (3, 3), strides=stride,
padding="same", use_bias=False,
kernel_regularizer=l2(reg))(act2)
# the third block of the ResNet module is another set of 1x1
# CONVs
bn3 = BatchNormalization(axis=chanDim, epsilon=bnEps,
momentum=bnMom)(conv2)
act3 = Activation("relu")(bn3)
conv3 = Conv2D(K, (1, 1), use_bias=False,
kernel_regularizer=l2(reg))(act3)
# if we are to reduce the spatial size, apply a CONV layer to
# the shortcut
if red:
shortcut = Conv2D(K, (1, 1), strides=stride,
use_bias=False, kernel_regularizer=l2(reg))(act1)
# add together the shortcut and the final CONV
x = add([conv3, shortcut])
# return the addition as the output of the ResNet module
return x
#staticmethod
def build(width, height, depth, classes, stages, filters,
reg=0.0001, bnEps=2e-5, bnMom=0.9):
# initialize the input shape to be "channels last" and the
# channels dimension itself
inputShape = (height, width, depth)
chanDim = -1
# if we are using "channels first", update the input shape
# and channels dimension
if K.image_data_format() == "channels_first":
inputShape = (depth, height, width)
chanDim = 1
# set the input and apply BN
inputs = Input(shape=inputShape)
x = BatchNormalization(axis=chanDim, epsilon=bnEps,
momentum=bnMom)(inputs)
# apply CONV => BN => ACT => POOL to reduce spatial size
x = Conv2D(filters[0], (5, 5), use_bias=False,
padding="same", kernel_regularizer=l2(reg))(x)
x = BatchNormalization(axis=chanDim, epsilon=bnEps,
momentum=bnMom)(x)
x = Activation("relu")(x)
x = ZeroPadding2D((1, 1))(x)
x = MaxPooling2D((3, 3), strides=(2, 2))(x)
# loop over the number of stages
for i in range(0, len(stages)):
# initialize the stride, then apply a residual module
# used to reduce the spatial size of the input volume
stride = (1, 1) if i == 0 else (2, 2)
x = ResNet.residual_module(x, filters[i + 1], stride,
chanDim, red=True, bnEps=bnEps, bnMom=bnMom)
# loop over the number of layers in the stage
for j in range(0, stages[i] - 1):
# apply a ResNet module
x = ResNet.residual_module(x, filters[i + 1],
(1, 1), chanDim, bnEps=bnEps, bnMom=bnMom)
# apply BN => ACT => POOL
x = BatchNormalization(axis=chanDim, epsilon=bnEps,
momentum=bnMom)(x)
x = Activation("relu")(x)
x = AveragePooling2D((8, 8))(x)
# softmax classifier
x = Flatten()(x)
x = Dense(classes, kernel_regularizer=l2(reg))(x)
x = Activation("softmax")(x)
# create the model
model = Model(inputs, x, name="resnet")
# return the constructed network architecture
return model
Any kind of suggestion to get rid of my this problem would be really helpful

How can I improve my Neural Network accucary ( Speaker Recognition - MFCC )

Im working on a speaker recognition Neural Network.
What I am doing is taking wav files [ of the Bing Bang Theory first espiode :-) ], than convert it to MFCC coeffs than I make it as an input to an open source api of Neural Network (MLPClassifier) and as output I define a unique vector to each speaker ( Let's say : [1,0,0,0] - sheldon; [0,1,0,0] - Penny; and ect... ), I take 50 random values for testing and the others for fitting ( training )
This is my code, At the begining I got about random accucary for the NN but after some help of amazing guy I improved it to ~42% but I want more :) about 70% :
from sklearn.neural_network import MLPClassifier
import python_speech_features
import scipy.io.wavfile as wav
import numpy as np
from os import listdir
from os.path import isfile, join
from random import shuffle
import matplotlib.pyplot as plt
from tqdm import tqdm
from random import randint
import random
winner = [] # this array count how much Bingo we had when we test the NN
random_winner = []
win_len = 0.04 # in seconds
step = win_len / 2
nfft = 2048
for TestNum in tqdm(range(20)): # in every round we build NN with X,Y that out of them we check 50 after we build the NN
X = []
Y = []
onlyfiles = [f for f in listdir("FinalAudios/") if isfile(join("FinalAudios/", f))] # Files in dir
names = [] # names of the speakers
for file in onlyfiles: # for each wav sound
# UNESSECERY TO UNDERSTAND THE CODE
if " " not in file.split("_")[0]:
names.append(file.split("_")[0])
else:
names.append(file.split("_")[0].split(" ")[0])
only_speakers = [] + names
#print only_speakers
names = list(dict.fromkeys(names)) # names of speakers
print names
vector_names = [] # vector for each name
i = 0
vector_for_each_name = [0] * len(names)
for name in names:
vector_for_each_name[i] += 1
vector_names.append(np.array(vector_for_each_name))
vector_for_each_name[i] -= 1
i += 1
for f in onlyfiles:
if " " not in f.split("_")[0]:
f_speaker = f.split("_")[0]
else:
f_speaker = f.split("_")[0].split(" ")[0]
fs, audio = wav.read("FinalAudios/" + f) # read the file
try:
mfcc_feat = python_speech_features.mfcc(audio, samplerate=fs, winlen=win_len,
winstep=step, nfft=nfft, appendEnergy=False)
flat_list = [item for sublist in mfcc_feat for item in sublist]
X.append(np.array(flat_list))
Y.append(np.array(vector_names[names.index(f_speaker)]))
except IndexError:
pass
Z = list(zip(X, Y))
shuffle(Z) # WE SHUFFLE X,Y TO PERFORM RANDOM ON THE TEST LEVEL
X, Y = zip(*Z)
X = list(X)
Y = list(Y)
X = np.asarray(X)
Y = np.asarray(Y)
Y_test = Y[:50] # CHOOSE 50 FOR TEST, OTHERS FOR TRAIN
X_test = X[:50]
X = X[50:]
Y = Y[50:]
print len(X)
clf = MLPClassifier(solver='lbfgs', alpha=3e-2, hidden_layer_sizes=(50, 20), random_state=2) # create the NN
clf.fit(X, Y) # Train it
print list(clf.predict_proba([X[0]])[0])
print list(Y_test[0])
for sample in range(len(X_test)): # add 1 to winner array if we correct and 0 if not, than in the end it plot it
arr = list(clf.predict([X_test[sample]])[0])
if arr.index(max(arr)) == list(Y_test[sample]).index(1):
winner.append(1)
else:
winner.append(0)
if only_speakers[randint(0, len(only_speakers) - 1)] == only_speakers[randint(0, len(only_speakers) - 1)]:
random_winner.append(1)
else:
random_winner.append(0)
# plot winner
plot_x = []
plot_y = []
for i in range(1, len(winner)):
plot_y.append(sum(winner[0:i])*1.0/len(winner[0:i]))
plot_x.append(i)
plot_random_x = []
plot_random_y = []
for i in range(1, len(random_winner)):
plot_random_y.append(sum(random_winner[0:i])*1.0/len(random_winner[0:i]))
plot_random_x.append(i)
plt.plot(plot_x, plot_y, 'r', label='machine learning')
plt.plot(plot_random_x, plot_random_y, 'b', label='random')
plt.xlabel('Number Of Samples')
# naming the y axis
plt.ylabel('Success Rate')
# giving a title to my graph
plt.title('Success Rate : Random Vs ML!')
# function to show the plot
plt.show()
This is my zip file that contains the code and the audio file : https://ufile.io/eggjm1gw
Somebody have an idea how can I improve my accucary?
Edit :
I improved my data set and put convolution model and got 60% accucarry, which is ok but also not good enoguh
import python_speech_features
import scipy.io.wavfile as wav
import numpy as np
from os import listdir
import os
import shutil
from os.path import isfile, join
from random import shuffle
from matplotlib import pyplot
from tqdm import tqdm
from random import randint
import tensorflow as tf
from ast import literal_eval as str2arr
from tempfile import TemporaryFile
#win_len = 0.04 # in seconds
#step = win_len / 2
#nfft = 2048
win_len = 0.05 # in seconds
step = win_len
nfft = 16384
results = []
outfile_x = None
outfile_y = None
winner = []
for TestNum in tqdm(range(40)): # We check it several times
if not outfile_x: # if path not exist we create it
X = [] # inputs
Y = [] # outputs
onlyfiles = [f for f in listdir("FinalAudios") if isfile(join("FinalAudios", f))] # Files in dir
names = [] # names of the speakers
for file in onlyfiles: # for each wav sound
# UNESSECERY TO UNDERSTAND THE CODE
if " " not in file.split("_")[0]:
names.append(file.split("_")[0])
else:
names.append(file.split("_")[0].split(" ")[0])
only_speakers = [] + names
namesWithoutDuplicate = list(dict.fromkeys(names))
namesWithoutDuplicateCopy = namesWithoutDuplicate[:]
for name in namesWithoutDuplicateCopy: # we remove low samples files
if names.count(name) < 107:
namesWithoutDuplicate.remove(name)
names = namesWithoutDuplicate
print(names) # print it
vector_names = [] # output for each name
i = 0
for name in names:
vector_for_each_name = i
vector_names.append(np.array(vector_for_each_name))
i += 1
for f in onlyfiles: # for all the files
if " " not in f.split("_")[0]:
f_speaker = f.split("_")[0]
else:
f_speaker = f.split("_")[0].split(" ")[0]
if f_speaker in namesWithoutDuplicate:
fs, audio = wav.read("FinalAudios\\" + f) # read the file
try:
# compute MFCC
mfcc_feat = python_speech_features.mfcc(audio, samplerate=fs, winlen=win_len, winstep=step, nfft=nfft, appendEnergy=False)
#flat_list = [item for sublist in mfcc_feat for item in sublist]
# Create output + inputs
for i in mfcc_feat:
X.append(np.array(i))
Y.append(np.array(vector_names[names.index(f_speaker)]))
except IndexError:
pass
else:
if not os.path.exists("TooLowSamples"): # if path not exist we create it
os.makedirs("TooLowSamples")
shutil.move("FinalAudios\\" + f, "TooLowSamples\\" + f)
outfile_x = TemporaryFile()
np.save(outfile_x, X)
outfile_y = TemporaryFile()
np.save(outfile_y, Y)
# ------------------- RANDOMIZATION, UNNECESSARY TO UNDERSTAND THE CODE ------------------- #
else:
outfile_x.seek(0)
X = np.load(outfile_x)
outfile_y.seek(0)
Y = np.load(outfile_y)
Z = list(zip(X, Y))
shuffle(Z) # WE SHUFFLE X,Y TO PERFORM RANDOM ON THE TEST LEVEL
X, Y = zip(*Z)
X = list(X)
Y = list(Y)
lenX = len(X)
# ------------------- RANDOMIZATION, UNNECESSARY TO UNDERSTAND THE CODE ------------------- #
y_test = np.asarray(Y[:4000]) # CHOOSE 100 FOR TEST, OTHERS FOR TRAIN
x_test = np.asarray(X[:4000]) # CHOOSE 100 FOR TEST, OTHERS FOR TRAIN
x_train = np.asarray(X[4000:]) # CHOOSE 100 FOR TEST, OTHERS FOR TRAIN
y_train = np.asarray(Y[4000:]) # CHOOSE 100 FOR TEST, OTHERS FOR TRAIN
x_val = x_train[-4000:] # FROM THE TRAIN CHOOSE 100 FOR VALIDATION
y_val = y_train[-4000:] # FROM THE TRAIN CHOOSE 100 FOR VALIDATION
x_train = x_train[:-4000] # FROM THE TRAIN CHOOSE 100 FOR VALIDATION
y_train = y_train[:-4000] # FROM THE TRAIN CHOOSE 100 FOR VALIDATION
x_train = x_train.reshape(np.append(x_train.shape, (1, 1))) # RESHAPE FOR INPUT
x_test = x_test.reshape(np.append(x_test.shape, (1, 1))) # RESHAPE FOR INPUT
x_val = x_val.reshape(np.append(x_val.shape, (1, 1))) # RESHAPE FOR INPUT
features_shape = x_val.shape
# -------------- OUR TENSOR FLOW NEURAL NETWORK MODEL -------------- #
model = tf.keras.models.Sequential([
tf.keras.layers.Input(name='inputs', shape=(13, 1, 1), dtype='float32'),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same', strides=1, name='block1_conv', input_shape=(13, 1, 1)),
tf.keras.layers.MaxPooling2D((3, 3), strides=(2,2), padding='same', name='block1_pool'),
tf.keras.layers.BatchNormalization(name='block1_norm'),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same', strides=1, name='block2_conv',
input_shape=(13, 1, 1)),
tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block2_pool'),
tf.keras.layers.BatchNormalization(name='block2_norm'),
tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same', strides=1, name='block3_conv',
input_shape=(13, 1, 1)),
tf.keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block3_pool'),
tf.keras.layers.BatchNormalization(name='block3_norm'),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(64, activation='relu', name='dense'),
tf.keras.layers.BatchNormalization(name='dense_norm'),
tf.keras.layers.Dropout(0.2, name='dropout'),
tf.keras.layers.Dense(10, activation='softmax', name='pred')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
# -------------- OUR TENSOR FLOW NEURAL NETWORK MODEL -------------- #
print("fitting")
history = model.fit(x_train, y_train, epochs=15, validation_data=(x_val, y_val))
print("testing")
results.append(model.evaluate(x_test, y_test)[1])
print(results)
print(sum(results)/len(results))
for i in range(10000):
f_1 = only_speakers[randint(0, len(only_speakers) - 1)]
f_2 = only_speakers[randint(0, len(only_speakers) - 1)]
if " " not in f_1.split("_")[0]:
f_speaker_1 = f_1.split("_")[0]
else:
f_speaker_1 =f_1.split("_")[0].split(" ")[0]
if " " not in f_2.split("_")[0]:
f_speaker_2 = f_2.split("_")[0]
else:
f_speaker_2 =f_2.split("_")[0].split(" ")[0]
if f_speaker_2 == f_speaker_1:
winner.append(1)
else:
winner.append(0)
print(sum(winner)/len(winner))
#]
# if onlyfiles[randint(len(onlyfiles) - 1)] == onlyfiles[randint(len(onlyfiles) - 1)]
#pyplot.plot(history.history['loss'], label='train')
#pyplot.plot(history.history['val_loss'], label='test') Q
#pyplot.legend()
#pyplot.show()
Readin your post these are the following things I could suggest you fix/explore
42% is not that impressive of an accuracy for the task you have at hand, consider the way you are cross-validating e.g. how do you split between a validation, test and training dataset
Your dataset seems very limited. Your task is to identify the speaker. A single episode might not be enough data for this task.
You might want to consider Deep Neural Network libraries such as Keras and Tensorflow. Convolutions is something you can apply directly to the MFC Graph.
If you decide using Tensorflow or Keras consider Triplet-Loss, where you preset a positive and negative example.
Consider reading the current state of the art for your task: https://github.com/grausof/keras-sincnet
Consider reading https://arxiv.org/abs/1503.03832 and adopting it for speech recognition.
The easiest thing you can do to improve your results is adding CNN layers to extract features from the MFCC

How to save model in tensorflow federated

How to save the model in the blow code
if you want to run the code, please visit https://github.com/tensorflow/federated
and download federated_learning_for_image_classification.ipynb.
I will appreciate it if you told me how to save the model of federated learning in tutorials federated_learning_for_image_classification.ipynb.
from __future__ import absolute_import, division, print_function
import tensorflow_federated as tff
from matplotlib import pyplot as plt
import tensorflow as tf
import six
import numpy as np
from six.moves import range
import warnings
import collections
import nest_asyncio
import h5py_character
from tensorflow.keras import layers
nest_asyncio.apply()
warnings.simplefilter('ignore')
tf.compat.v1.enable_v2_behavior()
np.random.seed(0)
NUM_CLIENTS = 1
NUM_EPOCHS = 1
BATCH_SIZE = 20
SHUFFLE_BUFFER = 500
num_classes = 3755
if six.PY3:
tff.framework.set_default_executor(
tff.framework.create_local_executor(NUM_CLIENTS))
data_train = h5py_character.load_characters_data()
print(len(data_train.client_ids))
example_dataset = data_train.create_tf_dataset_for_client(
data_train.client_ids[0])
def preprocess(dataset):
def element_fn(element):
# element['data'] = tf.expand_dims(element['data'], axis=-1)
return collections.OrderedDict([
# ('x', tf.reshape(element['data'], [-1])),
('x', tf.reshape(element['data'], [64, 64, 1])),
('y', tf.reshape(element['label'], [1])),
])
return dataset.repeat(NUM_EPOCHS).map(element_fn).shuffle(
SHUFFLE_BUFFER).batch(BATCH_SIZE)
preprocessed_example_dataset = preprocess(example_dataset)
print(iter(preprocessed_example_dataset).next())
sample_batch = tf.nest.map_structure(
lambda x: x.numpy(), iter(preprocessed_example_dataset).next())
def make_federated_data(client_data, client_ids):
return [preprocess(client_data.create_tf_dataset_for_client(x))
for x in client_ids]
sample_clients = data_train.client_ids[0:NUM_CLIENTS]
federated_train_data = make_federated_data(data_train, sample_clients)
def create_compiled_keras_model():
model = tf.keras.Sequential([
layers.Conv2D(input_shape=(64, 64, 1), filters=64, kernel_size=(3, 3), strides=(1, 1),
padding='same', activation='relu'),
layers.MaxPool2D(pool_size=(2, 2), padding='same'),
layers.Conv2D(filters=128, kernel_size=(3, 3), padding='same'),
layers.MaxPool2D(pool_size=(2, 2), padding='same'),
layers.Conv2D(filters=256, kernel_size=(3, 3), padding='same'),
layers.MaxPool2D(pool_size=(2, 2), padding='same'),
layers.Flatten(),
layers.Dense(1024, activation='relu'),
layers.Dense(3755, activation='softmax')
])
model.compile(
optimizer=tf.keras.optimizers.Adam(),
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
# metrics=['accuracy'])
metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
return model
def model_fn():
keras_model = create_compiled_keras_model()
global model_to_save
model_to_save = keras_model
print(keras_model.summary())
return tff.learning.from_compiled_keras_model(keras_model, sample_batch)
iterative_process = tff.learning.build_federated_averaging_process(model_fn)
state = iterative_process.initialize()
state, metrics = iterative_process.next(state, federated_train_data)
print('round 1, metrics={}'.format(metrics))
for round_num in range(2, 110):
state, metrics = iterative_process.next(state, federated_train_data)
print('round {:2d}, metrics={}'.format(round_num, metrics))
Roughly, we will be using the object here, and its save_checkpoint/load_checkpoint methods. In particular, you can instantiate a FileCheckpointManager, and ask it to save state (almost) directly.
state in your example is an instance of tff.python.common_libs.anonymous_tuple.AnonymousTuple (IIRC), which is not compatible with tf.convert_to_tensor, as is needed by save_checkpoint and declared in its docstring. The general solution often used in TFF research code is to introduce a Python attrs class to convert away from the anonymous tuple as soon as state is returned--see here for an example.
Assuming the above, the following sketch should work:
# state assumed an anonymous tuple, previously created
# N some integer
ckpt_manager = FileCheckpointManager(...)
ckpt_manager.save_checkpoint(ServerState.from_anon_tuple(state), round_num=N)
And to restore from this checkpoint, at any time you can call:
state = iterative_process.initialize()
ckpt_manager = FileCheckpointManager(...)
restored_state = ckpt_manager.load_latest_checkpoint(
ServerState.from_anon_tuple(state))
One thing to note: the code pointers linked above are generally in tff.python.research..., which is not included in the pip package; so the preferred way to get at them is to either fork the code into your own project, or pull down the repo and build it from source.
Thanks for your interest in TFF!
Wouldn't model.save_weights apply to this problem? I understand that the FileCheckpointManager would do a more complete job (capturing weights per round) but I guess as far as the final federated averaged model is concerned the parameter space should be available in save_weights.
You can use the class FileCheckpointManager in
https://github.com/tensorflow/federated/blob/master/tensorflow_federated/python/simulation/checkpoint_manager.py
However, this class is not supported by released version (v0.18.0) of TFF. You should copy this file to your project directory, so that you can import FileCheckpointManager.
'''
# PASTE YOUR CODE BEFORE HERE
# Required:
iterative_process = tff.learning.build_federated_averaging_process(model_fn)
state = iterative_process.initialize()
'''
from checkpoint_manager import FileCheckpointManager
fcm = FileCheckpointManager('checkpoint/')
# Save model
round_num = 110 # It depends on rounds you have trained
fcm.save_checkpoint(state, round_num)
# Load model
state, round_num = fcm.load_latest_checkpoint(state)
state, metrics = iterative_process.next(state, federated_train_data)

Display result of convolution in PyTorch

PyTorch newbie here. I wrote a script (code below) that performs the following operations: load an image, perform a 2D convolution operation and then display the output and the input.
At present I have the image below, which seems off. How can I plot the feature map correctly?
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import matplotlib.pyplot as plt
import imageio
import sys
A = imageio.imread('LiT.png')
# Define how the convolution operation works
conv2 = nn.Conv2d(in_channels=3, out_channels=3, kernel_size=3, stride=1, padding=1)
image_d = torch.FloatTensor(np.asarray(A.reshape(1, 3, A.shape[0] , A.shape[1])))
fc = conv2(image_d)
fc1 = fc.permute(0, 2, 3, 1).reshape([516, 780, 3])
plt.figure(figsize=(16,8))
plt.subplot(1,2,1)
plt.imshow(A)
plt.subplot(1,2,2)
plt.imshow(fc1.data.numpy())
plt.show()
The issue with your code is this line
image_d = torch.FloatTensor(np.asarray(A.reshape(1, 3, A.shape[0] , A.shape[1])))
You can't just reshape the image you need to transpose the channels. As a remark for the future, if you get a stripy result like you did it's most likely some permutation/transposition or reshaping operation that's not correct.
Other than that I also scaled the input image to [0, 1] to show it properly. Below is the working code:
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import matplotlib.pyplot as plt
import imageio
import sys
A = imageio.imread('LiT.png')
# Define how the convolution operation works
conv2 = nn.Conv2d(in_channels=3, out_channels=3, kernel_size=3, stride=1, padding=1)
# from [H, W, C] to [C, H, W]
transposed_image = A.transpose((2, 0, 1))
# add batch dim
transposed_image = np.expand_dims(transposed_image, 0)
image_d = torch.FloatTensor(transposed_image)
fc = conv2(image_d)
fc1 = fc.permute(0, 2, 3, 1)[0]
result = fc1.data.numpy()
max_ = np.max(result)
min_ = np.min(result)
result -= min_
result /= max_
plt.figure(figsize=(16,8))
plt.subplot(1,2,1)
plt.imshow(A)
plt.subplot(1,2,2)
plt.imshow(result)
plt.show()
To my understanding, the problem lies in how you are permuting channels position in the image by using reshape. Instead, 'np.transpose or tensor.permute should be used. Using torch for permutation:
image_d = torch.FloatTensor(np.asarray(A)).unsqueeze(0).permute(0,3,1,2)
Or, if we want to handle the permutation part in numpy:
image_d = np.transpose(np.asarray(A), (2,0,1))
image_d = torch.FloatTensor(image_d).unsqueeze(0)

Has anyone successfully trained Squeezenet with residual connections?

I have trained the two versions of Squeezenet, both with success, thanks #forresti !
When training the one with residual connections, I am stucked. Whatever learning policy I took, the one shipped in this repo, or the plainly step, I cannot train it to the results given in the paper. The accuracy is a bit lower than Squeezenet v1.0....
I know that I should post this in that repo, but I can't find issues tab there....
Anyone could shed me some light? Thanks in advance!
====================EDIT=============================
I firstly adopted the solver hyperparameters shipped with SqueezeNet-v1.0. Then, I changed the learning policy from poly to step, keeping the remaining parameters untouched and closely monitored the loss and accuracy, when they became apparently flat, I changed the learning rate by a factor of 0.4. In both cases, I got top-5 accuracies 81.9x% and 79.8x%, lower than the benchmark provided in the paper, seems rather weird....
You can use newest SqueezeNet v1.1 version of Squezenet from: https://github.com/rcmalli/keras-squeezenet
Model Definition:
from keras import backend as K
from keras.layers import Input, Convolution2D, MaxPooling2D, Activation, concatenate, Dropout
from keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D
from keras.models import Model
from keras.utils.layer_utils import get_source_inputs #https://stackoverflow.com/questions/68862735/keras-vggface-no-module-named-keras-engine-topology
from tensorflow.keras.utils import get_file
from keras.utils import layer_utils
sq1x1 = "squeeze1x1"
exp1x1 = "expand1x1"
exp3x3 = "expand3x3"
relu = "relu_"
WEIGHTS_PATH = "https://github.com/rcmalli/keras-squeezenet/releases/download/v1.0/squeezenet_weights_tf_dim_ordering_tf_kernels.h5"
WEIGHTS_PATH_NO_TOP = "https://github.com/rcmalli/keras-squeezenet/releases/download/v1.0/squeezenet_weights_tf_dim_ordering_tf_kernels_notop.h5"
# Modular function for Fire Node
def fire_module(x, fire_id, squeeze=16, expand=64):
s_id = 'fire' + str(fire_id) + '/'
if K.image_data_format() == 'channels_first':
channel_axis = 1
else:
channel_axis = 3
x = Convolution2D(squeeze, (1, 1), padding='valid', name=s_id + sq1x1)(x)
x = Activation('relu', name=s_id + relu + sq1x1)(x)
left = Convolution2D(expand, (1, 1), padding='valid', name=s_id + exp1x1)(x)
left = Activation('relu', name=s_id + relu + exp1x1)(left)
right = Convolution2D(expand, (3, 3), padding='same', name=s_id + exp3x3)(x)
right = Activation('relu', name=s_id + relu + exp3x3)(right)
x = concatenate([left, right], axis=channel_axis, name=s_id + 'concat')
return x
# Original SqueezeNet from paper.
def SqueezeNet(include_top=True, weights='imagenet',
input_tensor=None, input_shape=None,
pooling=None,
classes=1000):
"""Instantiates the SqueezeNet architecture."""
if weights not in {'imagenet', None}:
raise ValueError('The `weights` argument should be either '
'`None` (random initialization) or `imagenet` '
'(pre-training on ImageNet).')
input_shape = input_shape
if input_tensor is None:
img_input = Input(shape=input_shape)
else:
if not K.is_keras_tensor(input_tensor):
img_input = Input(tensor=input_tensor, shape=input_shape)
else:
img_input = input_tensor
x = Convolution2D(64, (3, 3), strides=(2, 2), padding='valid', name='conv1')(img_input)
x = Activation('relu', name='relu_conv1')(x)
x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool1')(x)
x = fire_module(x, fire_id=2, squeeze=16, expand=64)
x = fire_module(x, fire_id=3, squeeze=16, expand=64)
x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool3')(x)
x = fire_module(x, fire_id=4, squeeze=32, expand=128)
x = fire_module(x, fire_id=5, squeeze=32, expand=128)
x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), name='pool5')(x)
x = fire_module(x, fire_id=6, squeeze=48, expand=192)
x = fire_module(x, fire_id=7, squeeze=48, expand=192)
x = fire_module(x, fire_id=8, squeeze=64, expand=256)
x = fire_module(x, fire_id=9, squeeze=64, expand=256)
if include_top:
# It's not obvious where to cut the network...
# Could do the 8th or 9th layer... some work recommends cutting earlier layers.
x = Dropout(0.5, name='drop9')(x)
x = Convolution2D(classes, (1, 1), padding='valid', name='conv10')(x)
x = Activation('relu', name='relu_conv10')(x)
x = GlobalAveragePooling2D()(x)
x = Activation('softmax', name='loss')(x)
else:
if pooling == 'avg':
x = GlobalAveragePooling2D()(x)
elif pooling=='max':
x = GlobalMaxPooling2D()(x)
elif pooling==None:
pass
else:
raise ValueError("Unknown argument for 'pooling'=" + pooling)
#x = Dense(10, activation= 'softmax')(x)
# Ensure that the model takes into account
# any potential predecessors of `input_tensor`.
if input_tensor is not None:
inputs = get_source_inputs(input_tensor)
else:
inputs = img_input
model = Model(inputs, x, name='squeezenet')
# load weights
if weights == 'imagenet':
if include_top:
weights_path = get_file('squeezenet_weights_tf_dim_ordering_tf_kernels.h5',
WEIGHTS_PATH,
cache_subdir='models')
else:
weights_path = get_file('squeezenet_weights_tf_dim_ordering_tf_kernels_notop.h5',
WEIGHTS_PATH_NO_TOP,
cache_subdir='models')
model.load_weights(weights_path)
if K.backend() == 'theano':
layer_utils.convert_all_kernels_in_model(model)
return model
Example Usage:
import numpy as np
from keras_squeezenet import SqueezeNet
from keras.applications.imagenet_utils import preprocess_input, decode_predictions
from keras.preprocessing import image
model = SqueezeNet()
img = image.load_img('../images/cat.jpeg', target_size=(227, 227))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
preds = model.predict(x)
print('Predicted:', decode_predictions(preds))

Resources