Why different intermediate layer ouput of CNN in keras? - machine-learning

I am using this code to perform some experiment, I want to use intermediate layer representation of layer mainly before the fully connected layer(or last layer) of CNN.
from __future__ import print_function
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import Conv1D, GlobalMaxPooling1D
from keras.datasets import imdb
# set parameters:
max_features = 5000
maxlen = 400
batch_size = 100
embedding_dims = 50
filters = 250
kernel_size = 3
hidden_dims = 250
epochs = 100
print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')
print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
print('Build model...')
model = Sequential()
# we start off with an efficient embedding layer which maps
# our vocab indices into embedding_dims dimensions
model.add(Embedding(max_features,
embedding_dims,
input_length=maxlen))
model.add(Dropout(0.2))
# we add a Convolution1D, which will learn filters
# word group filters of size filter_length:
model.add(Conv1D(filters,
kernel_size,
padding='valid',
activation='relu',
strides=1))
# we use max pooling:
model.add(GlobalMaxPooling1D())
# We add a vanilla hidden layer:
model.add(Dense(hidden_dims))
model.add(Dropout(0.2))
model.add(Activation('relu'))#<======== I need output after this.
# We project onto a single unit output layer, and squash it with a sigmoid:
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='adam', metrics=['accuracy'])
To get the intermediate layer representation of penultimate layer I used following code.
CODE1
get_layer_output = K.function([model.layers[0].input, K.learning_phase()],
[model.layers[6].output])
# output in test mode = 0
layer_output_test = get_layer_output([x_test, 0])[0]
# output in train mode = 1
layer_output_train = get_layer_output([x_train, 1])[0]
print(layer_output_train)
print(layer_output_train.shape)
CODE2
def get_activations(model, layer, X_batch):
get_activations = K.function([model.layers[0].input, K.learning_phase()], [model.layers[layer].output,])
activations = get_activations([X_batch,1])
return activations
import numpy as np
X_train=np.array(get_activations(model=model,layer=6, X_batch=x_train)[0], dtype=np.float32)
print(X_train)
print(X_train.shape)
Which one is correct as I am getting/printing different output for above two codes? I want to use the above correct output to multiply by weights and optimise by custom optimiser.

If you pass 1 to K.learning_phase() you will get different results every time. But both codes give the same result.

Using a higher level approach, you can do this:
from keras.models import Model
newModel = Model(model.inputs,model.layers[6].output)
Do whatever you want with newModel. You can train it (and affect the original model), and use it to predict values.

Related

Target size (torch.Size([32, 9])) must be the same as input size (torch.Size([32, 10]))

I have 10 classes. I have a model such as;
from brevitas.nn import QuantLinear, QuantReLU
import torch.nn as nn
# Setting seeds for reproducibility
torch.manual_seed(0)
model = nn.Sequential(
QuantLinear(input_size, hidden1, bias=True, weight_bit_width=weight_bit_width),
nn.BatchNorm1d(hidden1),
nn.Dropout(0.5),
QuantReLU(bit_width=act_bit_width),
QuantLinear(hidden1, hidden2, bias=True, weight_bit_width=weight_bit_width),
nn.BatchNorm1d(hidden2),
nn.Dropout(0.5),
QuantReLU(bit_width=act_bit_width),
QuantLinear(hidden2, hidden3, bias=True, weight_bit_width=weight_bit_width),
nn.BatchNorm1d(hidden3),
nn.Dropout(0.5),
QuantReLU(bit_width=act_bit_width),
QuantLinear(hidden3, num_classes, bias=True, weight_bit_width=weight_bit_width)
)
model.to(device)
and I have defined my training phase as:
def train(model, train_loader, optimizer, criterion):
losses = []
# ensure model is in training mode
model.train()
for i, data in enumerate(train_loader, 0):
inputs, target = data['pointcloud'].to(device).float(), data['category'].to(device)
target = torch.nn.functional.one_hot(target)
optimizer.zero_grad()
# forward pass
output = model(inputs)
loss = criterion(output, target.float())
# backward pass + run optimizer to update weights
loss.backward()
optimizer.step()
# keep track of loss value
losses.append(loss.data.cpu().numpy())
return losses
As I run the training code:
import numpy as np
from sklearn.metrics import accuracy_score
from tqdm import tqdm, trange
# Setting seeds for reproducibility
torch.manual_seed(0)
np.random.seed(0)
running_loss = []
running_test_acc = []
t = trange(num_epochs, desc="Training loss", leave=True)
for epoch in t:
loss_epoch = train(model, train_loader, optimizer,criterion)
test_acc = test(model, valid_loader)
t.set_description("Training loss = %f test accuracy = %f" % (np.mean(loss_epoch), test_acc))
t.refresh() # to show immediately the update
running_loss.append(loss_epoch)
running_test_acc.append(test_acc)
I get an error as:
Target size (torch.Size([32, 9])) must be the same as input size
(torch.Size([32, 10]))
Please help me about what can be possibly the solution. I have added one hot encoding because I have seen some solutions like that before.
The code error is pretty straightforward - the criterion (that you didn't show here in the code) expects both the input and the target arguments to be the same size, but they're not.
The problem is that you're using torch.nn.functional.one_hot(target) without telling it how many classes you need for one-hot encoding; it is then infered as the largest values in target +1 (see: https://pytorch.org/docs/stable/generated/torch.nn.functional.one_hot.html). You should change it to torch.nn.functional.one_hot(target, num_classes=10)

After hyperparameter tuning accuracy remains the same

I was trying to hyper tune param but after I did it, the accuracy score has not changed at all, what I do wrong?
# Log reg
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(C=0.3326530612244898,max_iter=100,tol=0.01)
logreg.fit(X_train,y_train)
from sklearn.metrics import confusion_matrix
​
y_pred = logreg.predict(X_test)
​
print('Accuracy of log reg is: ', logreg.score(X_test,y_test))
​
confusion_matrix(y_test,y_pred)
# 0.9181286549707602 - acurracy before tunning
Output:
Accuracy of log reg is: 0.9181286549707602
array([[ 54, 9],
[ 5, 103]])
Here is me Using Grid Search CV:
from sklearn.model_selection import GridSearchCV
params ={'tol':[0.01,0.001,0.0001],
'max_iter':[100,150,200],
'C':np.linspace(1,20)/10}
grid_model = GridSearchCV(logreg,param_grid=params,cv=5)
grid_model_result = grid_model.fit(X_train,y_train)
print(grid_model_result.best_score_,grid_model_result.best_params_)
Output:
0.8867405063291139 {'C': 0.3326530612244898, 'max_iter': 100, 'tol': 0.01}
The problem was that in the first chunk you evaluate the model's performance on the test set, while in the GridSearchCV you only looked at the performance on the training set after hyperparameter optimization.
The code below shows that both procedures, when used to predict the test set labels, perform equally well in terms of accuracy (~0.93).
Note, you might want to consider using a hyperparameter grid with other solvers and a larger range of max_iter because I obtained convergence warnings.
# Load packages
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
# Load the dataset and split in X and y
df = pd.read_csv('Breast_cancer_data.csv')
X = df.iloc[:, 0:5]
y = df.iloc[:, 5]
# Perform train and test split (80/20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Initialize a model
Log = LogisticRegression(n_jobs=-1)
# Initialize a parameter grid
params = [{'tol':[0.01,0.001,0.0001],
'max_iter':[100,150,200],
'C':np.linspace(1,20)/10}]
# Perform GridSearchCV and store the best parameters
grid_model = GridSearchCV(Log,param_grid=params,cv=5)
grid_model_result = grid_model.fit(X_train,y_train)
best_param = grid_model_result.best_params_
# This step is only to prove that both procedures actually result in the same accuracy score
Log2 = LogisticRegression(C=best_param['C'], max_iter=best_param['max_iter'], tol=best_param['tol'], n_jobs=-1)
Log2.fit(X_train, y_train)
# Perform two predictions one straight from the GridSearch and the other one with manually inputting the best params
y_pred1 = grid_model_result.best_estimator_.predict(X_test)
y_pred2 = Log2.predict(X_test)
# Compare the accuracy scores and see that both are the same
print("Accuracy:",metrics.accuracy_score(y_test, y_pred1))
print("Accuracy:",metrics.accuracy_score(y_test, y_pred2))

Keras Regressor giving different prediction for my input everytime

I built a Keras regressor using the following code:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import numpy as ny
import pandas
from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(2)
X = ny.array([[1,2], [3,4], [5,6], [7,8], [9,10]])
sc_X=StandardScaler()
X_train = sc_X.fit_transform(X)
Y = ny.array([3, 4, 5, 6, 7])
Y=ny.reshape(Y,(-1,1))
sc_Y=StandardScaler()
Y_train = sc_Y.fit_transform(Y)
N = 5
def brain():
#Create the brain
br_model=Sequential()
br_model.add(Dense(3, input_dim=2, kernel_initializer='normal',activation='relu'))
br_model.add(Dense(2, kernel_initializer='normal',activation='relu'))
br_model.add(Dense(1,kernel_initializer='normal'))
#Compile the brain
br_model.compile(loss='mean_squared_error',optimizer='adam')
return br_model
def predict(X,sc_X,sc_Y,estimator):
prediction = estimator.predict(sc_X.fit_transform(X))
return sc_Y.inverse_transform(prediction)
estimator = KerasRegressor(build_fn=brain, epochs=1000, batch_size=5,verbose=0)
# print "Done"
estimator.fit(X_train,Y_train)
prediction = estimator.predict(X_train)
print predict(X,sc_X,sc_Y,estimator)
X_test = ny.array([[1.5,4.5], [7,8], [9,10]])
print predict(X_test,sc_X,sc_Y,estimator)
The issue I face is that the code is not predicting the same value (for example, it predicting 6.64 for [9,10] in the first prediction (X) and 6.49 for [9,10] in the second prediction (X_test) )
The full output is this:
[2.9929883 4.0016675 5.0103474 6.0190268 6.6434317]
[3.096634 5.422326 6.4955378]
Why do I get different values and how do I resolve them?
The problem lies in this line of code:
prediction = estimator.predict(sc_X.fit_transform(X))
You are fitting a new scaler every time when you predict values for new data. This is where differences come from. Try:
prediction = estimator.predict(sc_X.transform(X))
In this case, you use a pretrained scaler.

Keras LSTM input features and incorrect dimensional data input

So I'm trying to practice how to use LSTMs in Keras and all parameter (samples, timesteps, features). 3D list is confusing me.
So I have some stock data and if the next item in the list is above the threshold of 5 which is +-2.50 it buys OR sells, if it is in the middle of that threshold it holds, these are my labels: my Y.
For my features my X I have a dataframe of [500, 1, 3] for my 500 samples and each timestep is 1 since each data is 1 hour increment and 3 for 3 features. But I get this error:
ValueError: Error when checking model input: expected lstm_1_input to have 3 dimensions, but got array with shape (500, 3)
How can I fix this code and what am I doing wrong?
import json
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
"""
Sample of JSON file
{"time":"2017-01-02T01:56:14.000Z","usd":8.14},
{"time":"2017-01-02T02:56:14.000Z","usd":8.16},
{"time":"2017-01-02T03:56:15.000Z","usd":8.14},
{"time":"2017-01-02T04:56:16.000Z","usd":8.15}
"""
file = open("E.json", "r", encoding="utf8")
file = json.load(file)
"""
If the price jump of the next item is > or < +-2.50 the append 'Buy or 'Sell'
If its in the range of +- 2.50 then append 'Hold'
This si my classifier labels
"""
data = []
for row in range(len(file['data'])):
row2 = row + 1
if row2 == len(file['data']):
break
else:
difference = file['data'][row]['usd'] - file['data'][row2]['usd']
if difference > 2.50:
data.append((file['data'][row]['usd'], 'SELL'))
elif difference < -2.50:
data.append((file['data'][row]['usd'], 'BUY'))
else:
data.append((file['data'][row]['usd'], 'HOLD'))
"""
add the price the time step which si 1 and the features which is 3
"""
frame = pd.DataFrame(data)
features = pd.DataFrame()
# train LSTM
for x in range(500):
series = pd.Series(data=[500, 1, frame.iloc[x][0]])
features = features.append(series, ignore_index=True)
labels = frame.iloc[16000:16500][1]
# test
#yt = frame.iloc[16500:16512][0]
#xt = pd.get_dummies(frame.iloc[16500:16512][1])
# create LSTM
model = Sequential()
model.add(LSTM(3, input_shape=features.shape, activation='relu', return_sequences=False))
model.add(Dense(2, activation='relu'))
model.add(Dense(1, activation='relu'))
model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])
model.fit(x=features.as_matrix(), y=labels.as_matrix())
"""
ERROR
Anaconda3\envs\Final\python.exe C:/Users/Def/PycharmProjects/Ether/Main.py
Using Theano backend.
Traceback (most recent call last):
File "C:/Users/Def/PycharmProjects/Ether/Main.py", line 62, in <module>
model.fit(x=features.as_matrix(), y=labels.as_matrix())
File "\Anaconda3\envs\Final\lib\site-packages\keras\models.py", line 845, in fit
initial_epoch=initial_epoch)
File "\Anaconda3\envs\Final\lib\site-packages\keras\engine\training.py", line 1405, in fit
batch_size=batch_size)
File "\Anaconda3\envs\Final\lib\site-packages\keras\engine\training.py", line 1295, in _standardize_user_data
exception_prefix='model input')
File "\Anaconda3\envs\Final\lib\site-packages\keras\engine\training.py", line 121, in _standardize_input_data
str(array.shape))
ValueError: Error when checking model input: expected lstm_1_input to have 3 dimensions, but got array with shape (500, 3)
"""
Thanks.
This is my first post here I wish that could be useful I will try to do my best
First you need to create 3 dimension array to work with input_shape in keras you can watch this in keras documentation or in a better way:
from keras.models import Sequential
Sequential?
Linear stack of layers.
Arguments
layers: list of layers to add to the model.
# Note
The first layer passed to a Sequential model
should have a defined input shape. What that
means is that it should have received an input_shape
or batch_input_shape argument,
or for some type of layers (recurrent, Dense...)
an input_dim argument.
Example
```python
model = Sequential()
# first layer must have a defined input shape
model.add(Dense(32, input_dim=500))
# afterwards, Keras does automatic shape inference
model.add(Dense(32))
# also possible (equivalent to the above):
model = Sequential()
model.add(Dense(32, input_shape=(500,)))
model.add(Dense(32))
# also possible (equivalent to the above):
model = Sequential()
# here the batch dimension is None,
# which means any batch size will be accepted by the model.
model.add(Dense(32, batch_input_shape=(None, 500)))
model.add(Dense(32))
After that how to transform arrays 2 dimensions in 3 dimmension
check np.newaxis
Useful commands that help you more than you expect:
Sequential?,
-Sequential??,
-print(list(dir(Sequential)))
Best

How to make one layer output two layer, and one layer connected to two layers in Keras?

I want to make a model in Keras, some connections of layers like this:
MaxPooling
/\
/ \
pooled poolmask convLayer
\ /
\ /
upsample
This type of connection is as Segnet, and it's easy to do in Caffe. But I don't know how to implement with keras.
Anybody could help me?
It's easy in Keras too, but you need to use Keras Functional API.
Here you can find an example https://keras.io/getting-started/functional-api-guide/
And the code:
from keras.layers import Input, Embedding, LSTM, Dense
from keras.models import Model
# Headline input: meant to receive sequences of 100 integers, between 1 and 10000.
# Note that we can name any layer by passing it a "name" argument.
main_input = Input(shape=(100,), dtype='int32', name='main_input')
# This embedding layer will encode the input sequence
# into a sequence of dense 512-dimensional vectors.
x = Embedding(output_dim=512, input_dim=10000, input_length=100)(main_input)
# A LSTM will transform the vector sequence into a single vector,
# containing information about the entire sequence
lstm_out = LSTM(32)(x)
auxiliary_input = Input(shape=(5,), name='aux_input')
x = keras.layers.concatenate([lstm_out, auxiliary_input])
auxiliary_output = Dense(1, activation='sigmoid', name='aux_output')(lstm_out)
# We stack a deep densely-connected network on top
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
# And finally we add the main logistic regression layer
main_output = Dense(1, activation='sigmoid', name='main_output')(x)
model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output, auxiliary_output])
model.compile(optimizer='rmsprop', loss='binary_crossentropy',
loss_weights=[1., 0.2])
model.fit([headline_data, additional_data], [labels, labels],
epochs=50, batch_size=32)

Resources