I have to add a k-max pooling layer in CNN model to detect fake reviews. Please can you let me know how to implement it using keras.
I searched the internet but I got no good resources.
As per this paper, k-Max Pooling is a pooling operation that is a generalisation of the max pooling over the time dimension used in the Max-TDNN sentence model
and different from the local max pooling operations applied in a convolutional network for object recognition (LeCun et al., 1998).
The k-max pooling operation makes it possible
to pool the k most active features in p that may be
a number of positions apart; it preserves the order
of the features, but is insensitive to their specific
positions.
There are few resources which show how to implement it in tensorflow or keras:
How to implement K-Max pooling in Tensorflow or Keras?
https://github.com/keras-team/keras/issues/373
New Pooling Layers For Varying-Length Convolutional Networks
Keras implementation of K-Max Pooling with TensorFlow Backend
There seems to be a solution here as #Anubhav_Singh suggested. This response got almost 5 times more thumbs up (24) than thumbs down (5) on the github keras issues link. I am just quoting it as-is here and let people try it out and say whether it worked for them or not.
Original author: arbackus
from keras.engine import Layer, InputSpec
from keras.layers import Flatten
import tensorflow as tf
class KMaxPooling(Layer):
"""
K-max pooling layer that extracts the k-highest activations from a sequence (2nd dimension).
TensorFlow backend.
"""
def __init__(self, k=1, **kwargs):
super().__init__(**kwargs)
self.input_spec = InputSpec(ndim=3)
self.k = k
def compute_output_shape(self, input_shape):
return (input_shape[0], (input_shape[2] * self.k))
def call(self, inputs):
# swap last two dimensions since top_k will be applied along the last dimension
shifted_input = tf.transpose(inputs, [0, 2, 1])
# extract top_k, returns two tensors [values, indices]
top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=True, name=None)[0]
# return flattened output
return Flatten()(top_k)
Note: it was reported to be running very slow (though it worked for people).
Check this out. Not thoroughly tested but works fine for me. Let me know what you think. P.S. Latest tensorflow version.
tf.nn.top_k does not preserve the order of occurrence of values. So, that is the think that need to be worked upon
import tensorflow as tf
from tensorflow.keras import layers
class KMaxPooling(layers.Layer):
"""
K-max pooling layer that extracts the k-highest activations from a sequence (2nd dimension).
TensorFlow backend.
"""
def __init__(self, k=1, axis=1, **kwargs):
super(KMaxPooling, self).__init__(**kwargs)
self.input_spec = layers.InputSpec(ndim=3)
self.k = k
assert axis in [1,2], 'expected dimensions (samples, filters, convolved_values),\
cannot fold along samples dimension or axis not in list [1,2]'
self.axis = axis
# need to switch the axis with the last elemnet
# to perform transpose for tok k elements since top_k works in last axis
self.transpose_perm = [0,1,2] #default
self.transpose_perm[self.axis] = 2
self.transpose_perm[2] = self.axis
def compute_output_shape(self, input_shape):
input_shape_list = list(input_shape)
input_shape_list[self.axis] = self.k
return tuple(input_shape_list)
def call(self, x):
# swap sequence dimension to get top k elements along axis=1
transposed_for_topk = tf.transpose(x, perm=self.transpose_perm)
# extract top_k, returns two tensors [values, indices]
top_k_vals, top_k_indices = tf.math.top_k(transposed_for_topk,
k=self.k, sorted=True,
name=None)
# maintain the order of values as in the paper
# sort indices
sorted_top_k_ind = tf.sort(top_k_indices)
flatten_seq = tf.reshape(transposed_for_topk, (-1,))
shape_seq = tf.shape(transposed_for_topk)
len_seq = tf.shape(flatten_seq)[0]
indices_seq = tf.range(len_seq)
indices_seq = tf.reshape(indices_seq, shape_seq)
indices_gather = tf.gather(indices_seq, 0, axis=-1)
indices_sum = tf.expand_dims(indices_gather, axis=-1)
sorted_top_k_ind += indices_sum
k_max_out = tf.gather(flatten_seq, sorted_top_k_ind)
# return back to normal dimension but now sequence dimension has only k elements
# performing another transpose will get the tensor back to its original shape
# but will have k as its axis_1 size
transposed_back = tf.transpose(k_max_out, perm=self.transpose_perm)
return transposed_back
Here is my implementation of k-max pooling as explained in the comment of #Anubhav Singh above (the order of topk is preserved)
def test60_simple_test(a):
# swap last two dimensions since top_k will be applied along the last dimension
#shifted_input = tf.transpose(a) #[0, 2, 1]
# extract top_k, returns two tensors [values, indices]
res = tf.nn.top_k(a, k=3, sorted=True, name=None)
b = tf.sort(res[1],axis=0,direction='ASCENDING',name=None)
e=tf.gather(a,b)
#e=e[0:3]
return (e)
a = tf.constant([7, 2, 3, 9, 5], dtype = tf.float64)
print('*input:',a)
print('**output', test60_simple_test(a))
The result:
*input: tf.Tensor([7. 2. 3. 9. 5.], shape=(5,), dtype=float64)
**output tf.Tensor([7. 9. 5.], shape=(3,), dtype=float64)
Here is a Pytorch version implementation of k-max pooling:
import torch
def kmax_pooling(x, dim, k):
index = x.topk(k, dim = dim)[1].sort(dim = dim)[0]
return x.gather(dim, index)
Hope it would help.
Related
I have several graphs and I use these graphs to train a model hoping to identify the category of each node. I have three categories, and here are the formulation of one of my graph (using the data object):
Data(x=[100,64],pos=[100,2],edge_index=[2,546],y=[100],edge_weight=[546])
x is the embedding of every node, here have 100 nodes and each node have a 64-dimension feature, pos is the coordinate of each node. A two-dimensional vector in edge_index represents an edge between two points. And the edge_weight is the weight corresponding to each edge.
Here is my GCN model:
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
# GCN model with 2 layers
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = GCNConv(data.num_features, 16)
self.conv2 = GCNConv(16, int(data.num_classes))
def forward(self):
x, edge_index = data.x, data.edge_index
x = F.relu(self.conv1(x, edge_index))
x = F.dropout(x, training=self.training)
x = self.conv2(x, edge_index)
return F.log_softmax(x, dim=1)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = data.to(device)
model = Net().to(device)
and here is my training code:
for epoch in range(epoches):
for img_path in img_paths: #in order to build the graph
data=build_graph(img_path)
model.train()
optimizer.zero_grad()
output=GCNmodel(data)
F.nll_loss(output,target).backward()
optimizer.step()
but using this code, every node in one graph have the same output, especially have the negative num.
enter image description here
Note:The category 1 and 2 of each picture is balanced, the category 3 in every image is samller. But the data should not be the main reason for this result
It confuse me lots of time, I am very grateful, if you can help me.
hello guys i am new in machine learning. I am implementing federated learning on with LSTM to predict the next label in a sequence. my sequence looks like this [2,3,5,1,4,2,5,7]. for example, the intention is predict the 7 in this sequence. So I tried a simple federated learning with keras. I used this approach for another model(Not LSTM) and it worked for me, but here it always overfits on 2. it always predict 2 for any input. I made the input data so balance, means there are almost equal number for each label in last index (here is 7).I tested this data on simple deep learning and greatly works. so it seems to me this data mybe is not suitable for LSTM or any other issue. Please help me. This is my Code for my federated learning. Please let me know if more information is needed, I really need it. Thanks
def get_lstm(units):
"""LSTM(Long Short-Term Memory)
Build LSTM Model.
# Arguments
units: List(int), number of input, output and hidden units.
# Returns
model: Model, nn model.
"""
model = Sequential()
inp = layers.Input((units[0],1))
x = layers.LSTM(units[1], return_sequences=True)(inp)
x = layers.LSTM(units[2])(x)
x = layers.Dropout(0.2)(x)
out = layers.Dense(units[3], activation='softmax')(x)
model = Model(inp, out)
optimizer = keras.optimizers.Adam(lr=0.01)
seqLen=8 -1;
global_model = Mymodel.get_lstm([seqLen, 64, 64, 15]) # 14 categories we have , array start from 0 but never can predict zero class
global_model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=tf.keras.metrics.SparseTopKCategoricalAccuracy(k=1))
def main(argv):
for comm_round in range(comms_round):
print("round_%d" %( comm_round))
scaled_local_weight_list = list()
global_weights = global_model.get_weights()
np.random.shuffle(train)
temp_data = train[:]
# data divided among ten users and shuffled
for user in range(10):
user_data = temp_data[user * userDataSize: (user+1)*userDataSize]
X_train = user_data[:, 0:seqLen]
X_train = np.asarray(X_train).astype(np.float32)
Y_train = user_data[:, seqLen]
Y_train = np.asarray(Y_train).astype(np.float32)
local_model = Mymodel.get_lstm([seqLen, 64, 64, 15])
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
local_model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=tf.keras.metrics.SparseTopKCategoricalAccuracy(k=1))
local_model.set_weights(global_weights)
local_model.fit(X_train, Y_train)
scaling_factor = 1 / 10 # 10 is number of users
scaled_weights = scale_model_weights(local_model.get_weights(), scaling_factor)
scaled_local_weight_list.append(scaled_weights)
K.clear_session()
average_weights = sum_scaled_weights(scaled_local_weight_list)
global_model.set_weights(average_weights)
predictions=global_model.predict(X_test)
for i in range(len(X_test)):
print('%d,%d' % ((np.argmax(predictions[i])), Y_test[i]),file=f2 )
I could find some reasons for my problem, so I thought I can share it with you:
1- the proportion of different items in sequences are not balanced. I mean for example I have 1000 of "2" and 100 of other numbers, so after a few rounds the model fitted on 2 because there are much more data for specific numbers.
2- I changed my sequences as there are not any two items in a sequence while both have same value. so I could remove some repetitive data from the sequences and make them more balance. maybe it is not the whole presentation of activities but in my case it makes sense.
I have a series of vectors representing a signal over time. I'd like to classify parts of the signal into two categories: 1 or 0. The reason for using LSTM is that I believe the network will need knowledge of the entire signal to classify.
My problem is developing the PyTorch model. Below is the class I've come up with.
class LSTMClassifier(nn.Module):
def __init__(self, input_dim, hidden_dim, label_size, batch_size):
self.lstm = nn.LSTM(input_dim, hidden_dim)
self.hidden2label = nn.Linear(hidden_dim, label_size)
self.hidden = self.init_hidden()
def init_hidden(self):
return (torch.zeros(1, self.batch_size, self.hidden_dim),
torch.zeros(1, self.batch_size, self.hidden_dim))
def forward(self, x):
lstm_out, self.hidden = self.lstm(x, self.hidden)
y = self.hidden2label(lstm_out[-1])
log_probs = F.log_softmax(y)
return log_probs
However this model is giving a bunch of shape errors, and I'm having trouble understanding everything going on. I looked at this SO question first.
You should follow PyTorch documentation, especially inputs and outputs part, always.
This is how the classifier should look like:
import torch
import torch.nn as nn
class LSTMClassifier(nn.Module):
def __init__(self, input_dim, hidden_dim, label_size):
super().__init__()
self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
self.hidden2label = nn.Linear(hidden_dim, label_size)
def forward(self, x):
_, (h_n, _) = self.lstm(x)
return self.hidden2label(h_n.reshape(x.shape[0], -1))
clf = LSTMClassifier(100, 200, 1)
inputs = torch.randn(64, 10, 100)
clf(inputs)
Points to consider:
always use super().__init__() as it registers modules in your neural networks, allows for hooks etc.
Use batch_first=True so you can pass inputs of shape (batch, timesteps, n_features)
No need to init_hidden with zeros, it is the default value if left uninitialized
No need to pass self.hidden each time to LSTM. Moreover, you should not do that. It means that elements from each batch of data are somehow next steps, while batch elements should be disjoint and you probably do not need that.
_, (h_n, _) returns last hidden cell from last timestep, exactly of shape: (num_layers * num_directions, batch, hidden_size). In our case num_layers and num_directions is 1 so we get (1, batch, hidden_size) tensor as output
Reshape to (batch, hidden_size) so it can be passed through linear layer
Return logits without activation. Only one if it is a binary case. Use torch.nn.BCEWithLogitsLoss as loss for binary case and torch.nn.CrossEntropyLoss for multiclass case. Also sigmoid is proper activation for binary case, while softmax or log_softmax is appropriate for multiclass.
For binary only one output is needed. Any value below 0 (if returning unnormalized probabilities as in this case) is considered negative, anything above positive.
I'm trying to create an unpooling layer using Keras with the TensorFlow backend. The unpooling operation that I am trying to implement is described in this paper. This is the same unpooling operation that SegNet uses.
Unpooling: In the convnet, the max pooling operation
is non-invertible, however we can obtain an approximate
inverse by recording the locations of the
maxima within each pooling region in a set of switch
variables. In the deconvnet, the unpooling operation
uses these switches to place the reconstructions from
the layer above into appropriate locations, preserving
the structure of the stimulus.
Most of my code is an adaptation of this implementation from an older version of Keras.
So far I have written a custom layer and it is performing the unpooling operation correctly. The problem I am running into is when Keras tries to compute the gradient during the back-prop stage, I get the error:
raise ValueError('An operation has `None` for gradient. '
ValueError: An operation has `None` for gradient. Please make sure that all of your ops have a gradient defined (i.e. are differentiable). Common ops without gradient: K.argmax, K.round, K.eval.
As far as I understand, this error is being caused because I must be using an operation that Keras doesn't know the derivative for.
Here is my code:
from keras import backend as K
from keras.engine.topology import Layer
from keras.layers import UpSampling2D
import numpy as np
import operator
class MyUnpooler(Layer):
# Initialisations
def __init__(self, pool_layer, pool_input, size=(2,2), **kwargs):
self.pool_layer = pool_layer
self.pool_input = pool_input
self.size = size
super(MyUnpooler, self).__init__(**kwargs)
# This method would be used to create weights
# I don't have any trainable weights but this must be implemented
def build(self, input_shape):
super(MyUnpooler, self).build(input_shape)
# This method is for the layers' logic
# x is always input to the layer
def call(self, x):
# Zeros for later
zeros = K.zeros_like(self.pool_input)
# Mask template
upsampled = UpSampling2D(size=self.size)(self.pool_layer)
upsampled_shape = upsampled.get_shape().as_list()[1:]
input_shape = self.pool_input.get_shape().as_list()[1:]
size_diff = map(operator.sub, input_shape, upsampled_shape)
unfiltered_mask = K.spatial_2d_padding(upsampled, padding=((0,size_diff[1]),(0,size_diff[2])))
# Create the mask
self.mask = K.tf.equal(self.pool_input, unfiltered_mask)
assert self.mask.get_shape().as_list() == self.pool_input.get_shape().as_list()
self.unpooled = K.tf.where(self.mask, self.pool_input, zeros)
return K.tf.to_float(self.unpooled)
def compute_output_shape(self, input_shape):
# input_shape is not actually the input shape we need...
# We need to be able to UpSample the layer to calculate the dimensions
upsampled = UpSampling2D(size=self.size)(self.pool_layer)
upsampled_shape = upsampled.get_shape().as_list()[1:]
inp_shape = self.pool_input.get_shape().as_list()[1:]
size_diff = map(operator.sub, inp_shape, upsampled_shape)
unf_mask = K.spatial_2d_padding(upsampled, padding=((0,size_diff[1]),(0,size_diff[2])))
return tuple(unf_mask.get_shape().as_list())
If there is a better way of doing the unpooling operation I am also open to completely disregarding my attempt so far.
In below code of a single hidden layer neural network I'm attempting to parallelize the gradient descent operation across two GPU's. I'm just attempting to thinking about this conceptually at the moment. There does not appear to be very much literature on how to perform this. Reading Training Multi-GPU on Tensorflow: a simpler way? does not provide a concrete answer. In below code I've added two functions runOnGPU1() & runOnGPU1() which is a conceptual idea of how to split the training of the network across two GPU's. Can these two loops be split in order to share the computation across multiple GPU's ?
import numpy as np
import tensorflow as tf
sess = tf.InteractiveSession()
# a batch of inputs of 2 value each
inputs = tf.placeholder(tf.float32, shape=[None, 2])
# a batch of output of 1 value each
desired_outputs = tf.placeholder(tf.float32, shape=[None, 1])
# [!] define the number of hidden units in the first layer
HIDDEN_UNITS = 4
# connect 2 inputs to 3 hidden units
# [!] Initialize weights with random numbers, to make the network learn
weights_1 = tf.Variable(tf.truncated_normal([2, HIDDEN_UNITS]))
# [!] The biases are single values per hidden unit
biases_1 = tf.Variable(tf.zeros([HIDDEN_UNITS]))
# connect 2 inputs to every hidden unit. Add bias
layer_1_outputs = tf.nn.sigmoid(tf.matmul(inputs, weights_1) + biases_1)
# [!] The XOR problem is that the function is not linearly separable
# [!] A MLP (Multi layer perceptron) can learn to separe non linearly separable points ( you can
# think that it will learn hypercurves, not only hyperplanes)
# [!] Lets' add a new layer and change the layer 2 to output more than 1 value
# connect first hidden units to 2 hidden units in the second hidden layer
weights_2 = tf.Variable(tf.truncated_normal([HIDDEN_UNITS, 2]))
# [!] The same of above
biases_2 = tf.Variable(tf.zeros([2]))
# connect the hidden units to the second hidden layer
layer_2_outputs = tf.nn.sigmoid(
tf.matmul(layer_1_outputs, weights_2) + biases_2)
# [!] create the new layer
weights_3 = tf.Variable(tf.truncated_normal([2, 1]))
biases_3 = tf.Variable(tf.zeros([1]))
logits = tf.nn.sigmoid(tf.matmul(layer_2_outputs, weights_3) + biases_3)
# [!] The error function chosen is good for a multiclass classification taks, not for a XOR.
error_function = 0.5 * tf.reduce_sum(tf.subtract(logits, desired_outputs) * tf.subtract(logits, desired_outputs))
train_step = tf.train.GradientDescentOptimizer(0.05).minimize(error_function)
sess.run(tf.global_variables_initializer())
training_inputs = [[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]
training_outputs = [[0.0], [1.0], [1.0], [0.0]]
def runOnGPU1() :
for i in range(5):
_, loss = sess.run([train_step, error_function],
feed_dict={inputs: np.array(training_inputs),
desired_outputs: np.array(training_outputs)})
print(loss)
def runOnGPU2() :
for i in range(5):
_, loss = sess.run([train_step, error_function],
feed_dict={inputs: np.array(training_inputs),
desired_outputs: np.array(training_outputs)})
print(loss)
runOnGPU1()
runOnGPU2()