Concatenating layers in Keras (None,512) and (18577,4) - machine-learning

How do I concatenate 2 layers in keras, when one of layers has its dimensions (None,512) and the other has dimensions (18577,4). I tried using Concatenate
concat_layer = Concatenate()([z1,agp]
But this throw me an error telling:
ValueError: A `Concatenate` layer requires inputs with matching shapes except for the concat axis. Got inputs shapes: [(None, 512), (18577, 4)]
The model looks something like this:
a1= (Convolution2D(32, filter_dim, activation='linear',
padding='same',kernel_regularizer=regularizers.l2(reg)))(input_img)
b1 = (BatchNormalization())(a1)
c1 = (PReLU())(b1)
d1 = (Convolution2D(32, filter_dim, activation='linear',kernel_regularizer=regularizers.l2(reg)))(c1)
e1 = (BatchNormalization())(d1)
f1 = (PReLU())(e1)
g1 = (MaxPooling2D(pool_size=(2,2)))(f1)
h1 = (Dropout(0.2))(g1)
i1= (Convolution2D(64, filter_dim, activation='linear', padding='same',kernel_regularizer=regularizers.l2(reg)))(h1)
j1 = (BatchNormalization())(i1)
k1 = (PReLU())(j1)
l1 = (Convolution2D(64, filter_dim, activation='linear',kernel_regularizer=regularizers.l2(reg)))(k1)
m1 = (BatchNormalization())(k1)
n1 = (PReLU())(m1)
o1 = (MaxPooling2D(pool_size=(2,2)))(n1)
p1 = (Dropout(0.2))(o1)
q1= (Convolution2D(128, filter_dim, activation='linear', padding='same',kernel_regularizer=regularizers.l2(reg)))(p1)
r1=q1
s1 = (BatchNormalization())(r1)
t1 = (PReLU())(s1)
u1 = (Convolution2D(128, filter_dim, activation='linear',kernel_regularizer=regularizers.l2(reg)))(t1)
v1 = (BatchNormalization())(u1)
w1 = (PReLU())(v1)
x1 = (MaxPooling2D(pool_size=(3,3)))(w1)
y1 = (Dropout(0.2))(x1)
z1 = (Flatten())(y1)
agp=tf.convert_to_tensor(agp,np.float32)
z1 = Concatenate(axis=1)([z1,agp])
a2 = (Dense(128, activation='linear',kernel_regularizer=regularizers.l2(reg)))(z1)
b2 = (BatchNormalization())(a2)
c2 = (PReLU())(b2)
d2 = (Dropout(0.2))(c2)
e2 = (Dense(32, activation='linear',kernel_regularizer=regularizers.l2(reg)))(d2)
f2 = (BatchNormalization())(e2)
g2 = (PReLU())(f2)
h2 = (Dropout(0.3))(g2)
My input image has dimensions (32,32,3). I want to concatenate z1(None,512) with agp (18577,4)

#!/usr/bin/env python
def create_model(nb_classes, input_shape):
"""Create a NN model."""
# from keras.layers import Dropout
from keras.layers import Activation, Input
from keras.layers import Dense, Concatenate
from keras.models import Model
input_ = Input(shape=input_shape)
x = input_
# Branch in two directions - this can be more
# complex, of course
x1 = Dense(512, activation='relu')(x)
x2 = Dense(4, activation='relu')(x)
# And this is how you use concatenation
x = Concatenate(axis=-1)([x1, x2])
# And then finish it
x = Dense(nb_classes, activation='softmax')(x)
model = Model(inputs=input_, outputs=x)
return model
model = create_model(10, (512, ))
print(model.summary())
gives
Using TensorFlow backend.
____________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
====================================================================================================
input_1 (InputLayer) (None, 512) 0
____________________________________________________________________________________________________
dense_1 (Dense) (None, 512) 262656 input_1[0][0]
____________________________________________________________________________________________________
dense_2 (Dense) (None, 4) 2052 input_1[0][0]
____________________________________________________________________________________________________
concatenate_1 (Concatenate) (None, 516) 0 dense_1[0][0]
dense_2[0][0]
____________________________________________________________________________________________________
dense_3 (Dense) (None, 10) 5170 concatenate_1[0][0]
====================================================================================================
Total params: 269,878
Trainable params: 269,878
Non-trainable params: 0
____________________________________________________________________________________________________
None

Related

Autoencoder to convert 2d to 3d layers ambiguity

i got around some references and research papers and taking idea from one of them i thought to go ahead and implement the same the image reference-
So, here we are inputing a 2d input and the model outputs a 3d model of the same.
The network code which i have written is as follows:
Edit
image = Input(shape=(None, None, 3))
# Encoder
l1 = Conv2D(64, (3,3), strides = (2), padding='same', activation='leaky_relu')(image)
l2 = MaxPooling2D()(l1)
l3 = Conv2D(32, (5,5), strides = (2), padding='same', activation='leaky_relu')(l2)
l4 = MaxPooling2D(padding='same')(l3)
l5 = Conv2D(16, (7,7), strides = (2), padding='same', activation='leaky_relu')(l4)
l6 = MaxPooling2D(padding='same')(l5)
l7 = Conv2D(8, (5, 5), strides = (2), padding = 'same', activation = 'leaky_relu')(l6)
l8 = MaxPooling2D(padding='same')(l7)
l9 = Conv2D(4, (3, 3), strides = (2), padding = 'same', activation = 'leaky_relu')(l8)
l10 = MaxPooling2D(padding='same')(l9)
l11 = Conv2D(2, (4, 4), strides = (2), padding = 'same', activation = 'leaky_relu')(l10)
l12 = MaxPooling2D(padding='same')(l11)
l13 = Conv2D(1, (2, 2), strides = (2), padding = 'same', activation = 'leaky_relu')(l12)
# latent variable z
l14 = Reshape((60,512))(l13)
print(l14.shape)#-->output=(None, 60, 512)
l15 = Dense((512), activation = 'leaky_relu')(l14)
print(l15.shape) #-->output=(None, 60, 512)
l16 = Dense((128), activation = 'leaky_relu')(l15)
print(l16.shape)#-->output=(60, 128)
l17 = Reshape((60,128))(l16)
print(l17.shape) #-->output=(60, 128)
#Decoder
l18 = UpSampling3D(size = (3,3,3))(l17) #-->throws error->IndexError: list index out of range
l19 = Conv3DTranspose(60, (8, 8, 8), strides = (64), padding='same', activation = 'leaky_relu') (l17)
l20 = UpSampling3D((3,3,3))(l19)
l21 = Conv3DTranspose(60, (16,16,16), strides =(32), padding='same', activation = 'leaky_relu')(l20)
l22 = UpSampling3D((3,3,3))(l21)
l23 = Conv3DTranspose(60, (32, 32, 32), strides = (32), padding='same', activation = 'lealy_relu')(l22)
l24 = UpSampling3D((3,3,3))(l23)
l25 = Conv3DTranspose(60, (64, 64, 64), strides = (24), padding='same', activation = 'leaky_relu')(l24)
l26 = UpSampling3D((3,3,3))(l25)
l27 = Conv3DTranspose(60, (64, 64, 64), strides = (1), padding='same', activation = 'leaky_relu')(l26)
model3D = Model(image, l27)
This is giving me endless errors i solved some initially and seems to get stuck at this one really bad!!
the error persists at l17, and says:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
/tmp/ipykernel_33/907378238.py in <module>
27
28 #Decoder
---> 29 l18 = UpSampling3D(size = (3,3,3))(l17) #-->throws error->IndexError: list index out of range
30 l19 = Conv3DTranspose(60, (8, 8, 8), strides = (64), padding='same', activation = 'leaky_relu') (l17)
31 l20 = UpSampling3D((3,3,3))(l19)
/opt/conda/lib/python3.7/site-packages/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
975 if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
976 return self._functional_construction_call(inputs, args, kwargs,
--> 977 input_list)
978
979 # Maintains info about the `Layer.call` stack.
/opt/conda/lib/python3.7/site-packages/keras/engine/base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
1113 # Check input assumptions set after layer building, e.g. input shape.
1114 outputs = self._keras_tensor_symbolic_call(
-> 1115 inputs, input_masks, args, kwargs)
1116
1117 if outputs is None:
/opt/conda/lib/python3.7/site-packages/keras/engine/base_layer.py in _keras_tensor_symbolic_call(self, inputs, input_masks, args, kwargs)
846 return tf.nest.map_structure(keras_tensor.KerasTensor, output_signature)
847 else:
--> 848 return self._infer_output_signature(inputs, args, kwargs, input_masks)
849
850 def _infer_output_signature(self, inputs, args, kwargs, input_masks):
/opt/conda/lib/python3.7/site-packages/keras/engine/base_layer.py in _infer_output_signature(self, inputs, args, kwargs, input_masks)
886 self._maybe_build(inputs)
887 inputs = self._maybe_cast_inputs(inputs)
--> 888 outputs = call_fn(inputs, *args, **kwargs)
889
890 self._handle_activity_regularization(inputs, outputs)
/opt/conda/lib/python3.7/site-packages/keras/layers/convolutional.py in call(self, inputs)
2720 def call(self, inputs):
2721 return backend.resize_volumes(
-> 2722 inputs, self.size[0], self.size[1], self.size[2], self.data_format)
2723
2724 def get_config(self):
/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
204 """Call target, and fall back on dispatchers if there is a TypeError."""
205 try:
--> 206 return target(*args, **kwargs)
207 except (TypeError, ValueError):
208 # Note: convert_to_eager_tensor currently raises a ValueError, not a
/opt/conda/lib/python3.7/site-packages/keras/backend.py in resize_volumes(x, depth_factor, height_factor, width_factor, data_format)
3215 output = repeat_elements(x, depth_factor, axis=1)
3216 output = repeat_elements(output, height_factor, axis=2)
-> 3217 output = repeat_elements(output, width_factor, axis=3)
3218 return output
3219 else:
/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
204 """Call target, and fall back on dispatchers if there is a TypeError."""
205 try:
--> 206 return target(*args, **kwargs)
207 except (TypeError, ValueError):
208 # Note: convert_to_eager_tensor currently raises a ValueError, not a
/opt/conda/lib/python3.7/site-packages/keras/backend.py in repeat_elements(x, rep, axis)
3248 x_shape = x.shape.as_list()
3249 # For static axis
-> 3250 if x_shape[axis] is not None:
3251 # slices along the repeat axis
3252 splits = tf.split(value=x,
IndexError: list index out of range```
```
At this point i seem to be directionless, any help would be really appreciated. thanks in advance
The shape of l16 is:
l16.shape
TensorShape([None, 60, 8192])
and now you want to change the shape [60, 8192] into a shape [4,4,4,128] with the call Reshape((4,4,4,128))(l16). But 60 * 8192 = 491520 and 4 * 4 * 4 * 128 = 8192. So those two shapes are incompatible (491520 != 8192). That's why the error message correctly states:
ValueError: total size of new array must be unchanged, input_shape = [60, 8192], output_shape = [4, 4, 4, 128]```
The total number of cells must be the same before and after a reshape. E.g., you can change a (4,) tensor into a (2,2) tensor, but not e.g. into a (3,2) tensor.
The origin lies with l14, which you give the shape [60, 512]:
l14.shape
TensorShape([None, 60, 512])
Now, when you apply a Dense layer to a 2-dim shape like this, it will be applied to the last dimension, i.e. the first dimension of the shape stays the same. That is why l15 still has the shape [60, 512]:
l15.shape
TensorShape([None, 60, 512])
Similarly, l16 will have a shape [60, 128 * 4 * 4 * 4] = [60, 8192]. Then, this is the input into the line for l17 where Reshape chokes as explained above.

Working with customised Alexnet for face recognition

I am currently trying to fit my customised cnn model (Alexnet) with the input shape of (224, 224, 1) as I have the the image shape of 224 x 224 and I am dealing with black and white image.
So this is where I am trying to load the data and then get I got the dataset sizes such as the number of samples, features, and height and widths of the images, and finally the number of classes
lfw_people = fetch_lfw_people(min_faces_per_person = 70, resize = 2.39)
n_samples, h, w = lfw_people.images.shape
X = lfw_people.data
n_features = X.shape[1]
y = lfw_people.target
target_names = lfw_people.target_names
n_classes = target_names.shape[0]
after that, I splitted the data using the train test split and reshape with height and width of the image and then I reduced the height to the same length as width which makes it 224 x 224. The result of the counts of y_train and y test is
y_train Count: Counter({3: 384, 1: 176, 6: 108, 2: 94, 4: 84, 0: 64, 5: 56})
y_test Count: Counter({3: 146, 1: 60, 6: 36, 2: 27, 4: 25, 5: 15, 0: 13})
and then I am trying to convert both y_train and y_test to categorical which 7 classes
y_train = to_categorical(
y_train,
num_classes = len(set(y)),
dtype = 'uint8'
)
y_test = to_categorical(
y_test,
num_classes = len(set(y)),
dtype = 'uint8'
)
here is my code for my model where it has the 8 layers in total:
model = Sequential()
# 1st Convolutional Layer
model.add(Conv2D(filters = 96, input_shape = (224, 224, 1),
kernel_size = (11, 11), strides = (4, 4),
padding = 'valid'))
model.add(Activation('relu'))
#Max-Pooling
model.add(MaxPooling2D(pool_size = (2, 2),
strides = (2, 2), padding = 'valid'))
# Batch Normalisation
model.add(BatchNormalization())
# 2nd Convolutional Layer
model.add(Conv2D(filters = 256, kernel_size = (11, 11),
strides = (1, 1), padding = 'valid'))
model.add(Activation('relu'))
# Max-Pooling
model.add(Activation('relu'))
# Batch Normalisation
model.add(BatchNormalization())
# 3rd Convolutional Layer
model.add(Conv2D(filters = 384, kernel_size = (3, 3),
strides = (1, 1), padding = 'valid'))
model.add(Activation('relu'))
# Batch Normalization
model.add(BatchNormalization())
# 4th Convolutional Lauer
model.add(Conv2D(filters = 384, kernel_size = (3, 3),
strides = (1, 1), padding = 'valid'))
model.add(Activation('relu'))
# Batch Normalisation
model.add(BatchNormalization())
# 5th Convolutional Layer
model.add(Conv2D(filters = 256, kernel_size = (3, 3),
strides = (1, 1), padding = 'valid'))
model.add(Activation('relu'))
# Max-pooling
model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2),
padding = 'valid'))
# Batch Normalisation
model.add(BatchNormalization())
# Flattening
model.add(Flatten())
# 1st Dense Layer
model.add(Dense(4096, input_shape = (224*224*1, )))
model.add(Activation('relu'))
# Add Dropout to prevent overfitting
model.add(Dropout(0.4))
# Batch Normalisatoin
model.add(BatchNormalization())
# 2nd Dense Layer
model.add(Dense(4096))
model.add(Activation('relu'))
#Add Dropout
model.add(Dropout(0.4))
# Batch Normalisation
model.add(BatchNormalization())
# output softmax layer
model.add(Dense(7))
model.add(Activation('softmax'))
This is my augmentation where I am trying to generate the image
# Data Augmentation
datagen = ImageDataGenerator(
featurewise_center = True, # set input mean to 0 over the dataset
samplewise_center = True, # set each sample mean to 0
featurewise_std_normalization = True, # divide inputs by std of the dataset
samplewise_std_normalization = True, # divide each inputs by its std
zca_whitening = False, # dimension reduction
rotation_range = 20, # randomly rotate images in the range 5 degrees
zoom_range = 0.1, # Randomly zoom image 10%
width_shift_range = 0.2, # randomly shift images horizontally 10%
height_shift_range = 0.2, # randomly shift images vertically 10%
horizontal_flip = True, # randomly flip images
# vertical_flip = 0.2 # Random flip images
vertical_flip = 0.8
)
history = model.fit(datagen.flow(X_train, y_train, batch_size = batch_size),
epochs = 100, validation_data = (X_test, y_test),
steps_per_epoch = X_train.shape[0] // batch_size,
verbose = 0)
After I ran the model, it gave me the validation accuracy of 0.45 and this my confusion matrix which tells me that it kept predicting 'class 3'
| 0 1 2 3 4 5 6
---------------------------------
0| 0 0 0 13 0 0 0
1| 0 0 0 60 0 0 0
2| 0 0 0 27 0 0 0
3| 0 0 0 146 0 0 0
4| 0 0 0 25 0 0 0
5| 0 0 0 15 0 0 0
6| 0 0 0 36 0 0 0
So how to make it predict classes other than 3?

how to do custom keras layer matrix multiplication

Layers:
Input shape (None,75)
Hidden layer 1 - shape is (75,3)
Hidden layer 2 - shape is (3,1)
For the last layer, the output must be calculated as ( (H21*w1)*(H22*w2)*(H23*w3)), where H21,H22,H23 will be the outcome of Hidden layer 2, and w1,w2,w3 will be constant weight which are not trainable. So how to write a lambda function for the above outcome
def product(X):
return X[0]*X[1]
keras_model = Sequential()
keras_model.add(Dense(75,
input_dim=75,activation='tanh',name="layer1" ))
keras_model.add(Dense(3 ,activation='tanh',name="layer2" ))
keras_model.add(Dense(1,name="layer3"))
cross1=keras_model.add(Lambda(lambda x:product,output_shape=(1,1)))([layer2,layer3])
print(cross1)
NameError: name 'layer2' is not defined
Use the functional API model
inputs = Input((75,)) #shape (batch, 75)
output1 = Dense(75, activation='tanh',name="layer1" )(inputs) #shape (batch, 75)
output2 = Dense(3 ,activation='tanh',name="layer2" )(output1) #shape (batch, 3)
output3 = Dense(1,name="layer3")(output2) #shape (batch, 1)
cross1 = Lambda(lambda x: x[0] * x[1])([output2, output3]) #shape (batch, 3)
model = Model(inputs, cross1)
Please notice that the shapes are totally different from what you expect.
I will suggest you to do it via a customized layer instead of the Lambda layer. Why? A customized will give you more freedom to do stuffs, and it is also more transparent in terms of viewing your desired weights. More precisely, if you do it through Lambda layer, the constant weight will not be saved as a part of the model, but it will if you use a customized layer.
Here is an example
from keras import backend as K
from keras.layers import *
from keras.models import *
import numpy as np
class MyLayer(Layer) :
# see https://keras.io/layers/writing-your-own-keras-layers/
def __init__(self,
w_vec=None,
allow_training=False,
**kwargs) :
self._w_vec = w_vec
assert allow_training or (w_vec is not None), \
"ERROR: non-trainable w_vec must be initialized"
self.allow_training = allow_training
super().__init__(**kwargs)
return
def build(self, input_shape) :
batch_size, num_feats = input_shape
self.w_vec = self.add_weight(shape=(1, num_feats),
name='w_vec',
initializer='uniform', # <- use your own preferred initializer
trainable=self.allow_training,)
if self._w_vec is not None :
# predefined w_vec
assert self._w_vec.shape[1] == num_feats, \
"ERROR: initial w_vec shape mismatches the input shape"
# set it to the weight
self.set_weights([self._w_vec]) # <- set weights to the supplied one
super().build(input_shape)
return
def call(self, x) :
# Given:
# x = [H21, H22, H23]
# w_vec = [w1, w2, w3]
# Step 1: output elem_prod
# elem_prod = [H21*w1, H22*w2, H23*w3]
elem_prod = x * self.w_vec
# Step 2: output ret
# ret = (H21*w1) * (H22*w2) * (H23*w3)
ret = K.prod(elem_prod, axis=-1, keepdims=True)
return ret
def compute_output_shape(self, input_shape) :
return (input_shape[0], 1)
def make_test_cases(w_vec=None, allow_training=False):
x = Input(shape=(75,))
y = Dense(75, activation='tanh', name='fc1')(x)
y = Dense(3, activation='tanh', name='fc2')(y)
y = MyLayer(w_vec, allow_training, name='core')(y)
y = Dense(1, name='fc3')(y)
net = Model(inputs=x, outputs=y, name='{}-{}'.format( 'randomInit' if w_vec is None else 'assignInit',
'trainable' if allow_training else 'nontrainable'))
print(net.name)
print(net.layers[-2].get_weights()[0])
print(net.summary())
return net
And you may run the following test cases to see the differences (pay attention to the first and the last lines in the print out, which gives you the initial values and the number of constant parameters, respectively)
a. Constant weights, non-trainable
m1 = make_test_cases(w_vec=np.arange(3).reshape([1,3]), allow_training=False)
will give you
assignInit-nontrainable [[0. 1. 2.]]
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_4 (InputLayer) (None, 75) 0
_________________________________________________________________
fc1 (Dense) (None, 75) 5700
_________________________________________________________________
fc2 (Dense) (None, 3) 228
_________________________________________________________________
core (MyLayer) (None, 1) 3
_________________________________________________________________
fc3 (Dense) (None, 1) 2
=================================================================
Total params: 5,933
Trainable params: 5,930
Non-trainable params: 3
_________________________________________________________________
b. Constant weights, trainable
m2 = make_test_cases(w_vec=np.arange(3).reshape([1,3]), allow_training=True)
will give you
assignInit-trainable [[0. 1. 2.]]
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_5 (InputLayer) (None, 75) 0
_________________________________________________________________
fc1 (Dense) (None, 75) 5700
_________________________________________________________________
fc2 (Dense) (None, 3) 228
_________________________________________________________________
core (MyLayer) (None, 1) 3
_________________________________________________________________
fc3 (Dense) (None, 1) 2
=================================================================
Total params: 5,933
Trainable params: 5,933
Non-trainable params: 0
_________________________________________________________________
c. Random weights, trainable
m3 = make_test_cases(w_vec=None, allow_training=True)
will give you
randomInit-trainable [[ 0.02650297 -0.02010062 -0.03771694]]
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_6 (InputLayer) (None, 75) 0
_________________________________________________________________
fc1 (Dense) (None, 75) 5700
_________________________________________________________________
fc2 (Dense) (None, 3) 228
_________________________________________________________________
core (MyLayer) (None, 1) 3
_________________________________________________________________
fc3 (Dense) (None, 1) 2
=================================================================
Total params: 5,933
Trainable params: 5,933
Non-trainable params: 0
_________________________________________________________________
Final remark
I will say it is unclear which case may work better in advance for your problem, but trying all three sounds like a good plan.

Keras target dimensions mismatch

Attempting a single-label classification problem with num_classes = 73
Here's my simplified Keras model:
num_classes = 73
batch_size = 4
train_data_list = [training_file_names list here..]
validation_data_list = [ validation_file_names list here..]
training_generator = DataGenerator(train_data_list, batch_size, num_classes)
validation_generator = DataGenerator(validation_data_list, batch_size, num_classes)
model = Sequential()
model.add(Conv1D(32, 3, strides=1, input_shape=(15,120), activation="relu"))
model.add(Conv1D(16, 3, strides=1, activation="relu"))
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax'))
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss="categorical_crossentropy",optimizer=sgd,metrics=['accuracy'])
model.fit_generator(generator=training_generator, epochs=100,
validation_data=validation_generator)
Here's my DataGenerator's __get_item__ method:
def __get_item__(self):
X = np.zeros((self.batch_size,15,120))
y = np.zeros((self.batch_size, 1 ,self.n_classes))
for i in range(self.batch_size):
X_row = some_method_that_gives_X_of_15x20_dim()
target = some_method_that_gives_target()
one_hot = keras.utils.to_categorical(target, num_classes=self.n_classes)
X[i] = X_row
y[i] = one_hot
return X, y
Since my X values are correctly returned with dimension (batch_size, 15, 120), I am not showing it here. My issue is with the y value returned.
y returned from this generator method has a shape of (batch_size, 1, 73) as one hot encoded label for the 73 classes, which I think is the correct shape to return.
However Keras gives the following error for the last layer:
ValueError: Error when checking target: expected dense_1 to have 2
dimensions, but got array with shape (4, 1, 73)
Since the batch size is 4, I think the target batch should also be 3 dimensional (4,1,73). Why is then Keras expecting the last layer to be 2 dimensions ?
you model' s summary shows that in the output layer there should be only 2 dimensions, (None, 73)
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv1d_7 (Conv1D) (None, 13, 32) 11552
_________________________________________________________________
conv1d_8 (Conv1D) (None, 11, 16) 1552
_________________________________________________________________
flatten_5 (Flatten) (None, 176) 0
_________________________________________________________________
dense_4 (Dense) (None, 73) 12921
=================================================================
Total params: 26,025
Trainable params: 26,025
Non-trainable params: 0
_________________________________________________________________
Since dimension of your target is (batch_size, 1, 73), you can just change to (batch_size, 73) in order for your model to run

TimeDistributed(Dense) vs Dense in Keras - Same number of parameters

I'm building a model that converts a string to another string using recurrent layers (GRUs). I have tried both a Dense and a TimeDistributed(Dense) layer as the last-but-one layer, but I don't understand the difference between the two when using return_sequences=True, especially as they seem to have the same number of parameters.
My simplified model is the following:
InputSize = 15
MaxLen = 64
HiddenSize = 16
inputs = keras.layers.Input(shape=(MaxLen, InputSize))
x = keras.layers.recurrent.GRU(HiddenSize, return_sequences=True)(inputs)
x = keras.layers.TimeDistributed(keras.layers.Dense(InputSize))(x)
predictions = keras.layers.Activation('softmax')(x)
The summary of the network is:
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 64, 15) 0
_________________________________________________________________
gru_1 (GRU) (None, 64, 16) 1536
_________________________________________________________________
time_distributed_1 (TimeDist (None, 64, 15) 255
_________________________________________________________________
activation_1 (Activation) (None, 64, 15) 0
=================================================================
This makes sense to me as my understanding of TimeDistributed is that it applies the same layer at all timepoints, and so the Dense layer has 16*15+15=255 parameters (weights+biases).
However, if I switch to a simple Dense layer:
inputs = keras.layers.Input(shape=(MaxLen, InputSize))
x = keras.layers.recurrent.GRU(HiddenSize, return_sequences=True)(inputs)
x = keras.layers.Dense(InputSize)(x)
predictions = keras.layers.Activation('softmax')(x)
I still only have 255 parameters:
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 64, 15) 0
_________________________________________________________________
gru_1 (GRU) (None, 64, 16) 1536
_________________________________________________________________
dense_1 (Dense) (None, 64, 15) 255
_________________________________________________________________
activation_1 (Activation) (None, 64, 15) 0
=================================================================
I wonder if this is because Dense() will only use the last dimension in the shape, and effectively treat everything else as a batch-like dimension. But then I'm no longer sure what the difference is between Dense and TimeDistributed(Dense).
Update Looking at https://github.com/fchollet/keras/blob/master/keras/layers/core.py it does seem that Dense uses the last dimension only to size itself:
def build(self, input_shape):
assert len(input_shape) >= 2
input_dim = input_shape[-1]
self.kernel = self.add_weight(shape=(input_dim, self.units),
It also uses keras.dot to apply the weights:
def call(self, inputs):
output = K.dot(inputs, self.kernel)
The docs of keras.dot imply that it works fine on n-dimensional tensors. I wonder if its exact behavior means that Dense() will in effect be called at every time step. If so, the question still remains what TimeDistributed() achieves in this case.
TimeDistributedDense applies a same dense to every time step during GRU/LSTM Cell unrolling. So the error function will be between predicted label sequence and the actual label sequence. (Which is normally the requirement for sequence to sequence labeling problems).
However, with return_sequences=False, Dense layer is applied only once at the last cell. This is normally the case when RNNs are used for classification problem. If return_sequences=True then Dense layer is applied to every timestep just like TimeDistributedDense.
So for as per your models both are same, but if you change your second model to return_sequences=False, then Dense will be applied only at the last cell. Try changing it and the model will throw as error because then the Y will be of size [Batch_size, InputSize], it is no more a sequence to sequence but a full sequence to label problem.
from keras.models import Sequential
from keras.layers import Dense, Activation, TimeDistributed
from keras.layers.recurrent import GRU
import numpy as np
InputSize = 15
MaxLen = 64
HiddenSize = 16
OutputSize = 8
n_samples = 1000
model1 = Sequential()
model1.add(GRU(HiddenSize, return_sequences=True, input_shape=(MaxLen, InputSize)))
model1.add(TimeDistributed(Dense(OutputSize)))
model1.add(Activation('softmax'))
model1.compile(loss='categorical_crossentropy', optimizer='rmsprop')
model2 = Sequential()
model2.add(GRU(HiddenSize, return_sequences=True, input_shape=(MaxLen, InputSize)))
model2.add(Dense(OutputSize))
model2.add(Activation('softmax'))
model2.compile(loss='categorical_crossentropy', optimizer='rmsprop')
model3 = Sequential()
model3.add(GRU(HiddenSize, return_sequences=False, input_shape=(MaxLen, InputSize)))
model3.add(Dense(OutputSize))
model3.add(Activation('softmax'))
model3.compile(loss='categorical_crossentropy', optimizer='rmsprop')
X = np.random.random([n_samples,MaxLen,InputSize])
Y1 = np.random.random([n_samples,MaxLen,OutputSize])
Y2 = np.random.random([n_samples, OutputSize])
model1.fit(X, Y1, batch_size=128, nb_epoch=1)
model2.fit(X, Y1, batch_size=128, nb_epoch=1)
model3.fit(X, Y2, batch_size=128, nb_epoch=1)
print(model1.summary())
print(model2.summary())
print(model3.summary())
In the above example architecture of model1 and model2 are sample (sequence to sequence models) and model3 is a full sequence to label model.
Here is a piece of code that verifies TimeDistirbuted(Dense(X)) is identical to Dense(X):
import numpy as np
from keras.layers import Dense, TimeDistributed
import tensorflow as tf
X = np.array([ [[1, 2, 3],
[4, 5, 6],
[7, 8, 9],
[10, 11, 12]
],
[[3, 1, 7],
[8, 2, 5],
[11, 10, 4],
[9, 6, 12]
]
]).astype(np.float32)
print(X.shape)
(2, 4, 3)
dense_weights = np.array([[0.1, 0.2, 0.3, 0.4, 0.5],
[0.2, 0.7, 0.9, 0.1, 0.2],
[0.1, 0.8, 0.6, 0.2, 0.4]])
bias = np.array([0.1, 0.3, 0.7, 0.8, 0.4])
print(dense_weights.shape)
(3, 5)
dense = Dense(input_dim=3, units=5, weights=[dense_weights, bias])
input_tensor = tf.Variable(X, name='inputX')
output_tensor1 = dense(input_tensor)
output_tensor2 = TimeDistributed(dense)(input_tensor)
print(output_tensor1.shape)
print(output_tensor2.shape)
(2, 4, 5)
(2, ?, 5)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
output1 = sess.run(output_tensor1)
output2 = sess.run(output_tensor2)
print(output1 - output2)
And the difference is:
[[[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]]
[[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]]]

Resources