I am new to Keras. I am trying to merge the output layers of three pretrained models in Keras. Each model has two separate inputs, but of different dimensions, and a Dense layer output.
model1 = MyModel1() #returns keras.engine.training.Model
model2 = MyModel2() #returns keras.engine.training.Model
model3 = MyModel3() #returns keras.engine.training.Model
x = merge([model1(model1.input),
model2(model2.input),
model3(model3.input)],
mode='concat', concat_axis=1)
# add some trainable layers here...
# and a final softmax layer
x = Dense(2, activation='softmax')(x)
return Model(input=[model1.input,
model2.input,
model3.input],
output=x)
Since model?.input returns a list of Tensors, this does not work. I tried different things and nothing seems to work. Is there an easy solution to this problem?
EDIT:
Adapted working solution from indraforyou for multiple inputs in each model.
from keras.models import Model
from keras.layers import Input, Dense, merge
def MyModel1():
inp1 = Input(batch_shape=(None,32,))
inp2 = Input(batch_shape=(None,32))
x = Dense(8)(inp1)
y = Dense(8)(inp2)
merged = merge([x, y], mode='concat', concat_axis=-1)
out = Dense(8)(merged)
return Model(input=[inp1,inp2], output=out)
def MyModel2():
inp1 = Input(batch_shape=(None,10,))
inp2 = Input(batch_shape=(None,10,))
x = Dense(4)(inp1)
y = Dense(4)(inp2)
merged = merge([x, y], mode='concat', concat_axis=-1)
out = Dense(4)(merged)
return Model(input=[inp1,inp2], output=out)
def MyModel3():
inp1 = Input(batch_shape=(None,12,))
inp2 = Input(batch_shape=(None,12,))
x = Dense(6)(inp1)
y = Dense(6)(inp1)
merged = merge([x, y], mode='concat', concat_axis=-1)
out = Dense(6)(merged)
return Model(input=[inp1,inp2], output=out)
model1 = MyModel1()
model2 = MyModel2()
model3 = MyModel3()
x = merge([model1.output,
model2.output,
model3.output],
mode='concat', concat_axis=-1)
x = Dense(2, activation='softmax')(x)
merged = Model(input=[model1.input[0], model1.input[1],
model2.input[0], model2.input[1],
model3.input[0], model3.input[1]],
output=x)
merged.summary()
The models object is not a callable function. This should fix the problem:
x = merge([model1.output,
model2.output,
model3.output],
mode='concat', concat_axis=1)
Updating the working code
from keras.models import Model
from keras.layers import Input, Dense, merge
def MyModel1():
inp = Input(batch_shape=(None,32,))
out = Dense(8)(inp)
return Model(input=inp, output=out)
def MyModel2():
inp = Input(batch_shape=(None,10,))
out = Dense(4)(inp)
return Model(input=inp, output=out)
def MyModel3():
inp = Input(batch_shape=(None,12,))
out = Dense(6)(inp)
return Model(input=inp, output=out)
model1 = MyModel1()
model2 = MyModel2()
model3 = MyModel3()
x = merge([model1.output,
model2.output,
model3.output],
mode='concat', concat_axis=1)
x = Dense(2, activation='softmax')(x)
merged = Model(input=[model1.input,
model2.input,
model3.input],
output=x)
merged.summary()
Related
I would like to re-create the following keras model in PyTorch.
vocab_size = 22
maxlen = 200
embed_dim = 256
num_heads = 2
feed_forward_dim = 256
batch_size = 128
decoders = 5
def create_model():
inputs = layers.Input(shape=(maxlen,), dtype=tf.int32)
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
decoder_blocks = []
for i in range(decoders):
decoder_blocks.append(DecoderBlock(embed_dim, num_heads, feed_forward_dim))
for i in range(len(decoder_blocks)):
x = decoder_blocks[i](x)
outputs = layers.Dense(vocab_size)(x)
model = keras.Model(inputs=inputs, outputs=[outputs, x])
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
loss=[loss_fn, None],
)
return model
model = create_model()
Here are the Decoder and the TokenAndPositionEmbedding layers along with the Causal Attention Mask
def causal_attention_mask(batch_size, n_dest, n_src, dtype):
i = tf.range(n_dest)[:, None]
j = tf.range(n_src)
m = i >= j - n_src + n_dest
mask = tf.cast(m, dtype)
mask = tf.reshape(mask, [1, n_dest, n_src])
mult = tf.concat(
[tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)], 0
)
return tf.tile(mask, mult)
class DecoderBlock(layers.Layer):
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
super(DecoderBlock, self).__init__()
self.att = layers.MultiHeadAttention(num_heads, embed_dim)
self.ffn = keras.Sequential(
[layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
)
self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = layers.Dropout(rate)
self.dropout2 = layers.Dropout(rate)
def call(self, inputs):
input_shape = tf.shape(inputs)
batch_size = input_shape[0]
seq_len = input_shape[1]
causal_mask = causal_attention_mask(batch_size, seq_len, seq_len, tf.bool)
attention_output = self.att(inputs, inputs, attention_mask=causal_mask)
attention_output = self.dropout1(attention_output)
out1 = self.layernorm1(inputs + attention_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output)
return self.layernorm2(out1 + ffn_output)
class TokenAndPositionEmbedding(layers.Layer):
def __init__(self, maxlen, vocab_size, embed_dim):
super(TokenAndPositionEmbedding, self).__init__()
self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
def call(self, x):
maxlen = tf.shape(x)[-1]
positions = tf.range(start=0, limit=maxlen, delta=1)
positions = self.pos_emb(positions)
x = self.token_emb(x)
return x + positions
For reference, this code is copied directly from: https://keras.io/examples/generative/text_generation_with_miniature_gpt/
I have tried to create equivalent architecture in PyTorch using nn.TransformerDecoderLayer. Apologies for not including my own code, but I have been completely unsuccessful.
I am a beginer learning to using torch_geometric to build my GNN models. I refered the sample of the pyG example of node classification and build my own dataset, however, I tried to use my GPU to run the code and it tells me that it run out of memory, maybe my dataset is too large to allocate the GPU memory? I don't know. I shared an machine of 8 A100 with my classmates. Could you please give me some suggestions, thank you!
from torch_geometric.nn import GATConv,GCNConv
from torch_geometric.data import Dataset,DataLoader,HeteroData,Data
import torch.nn as nn
from torch_geometric.nn import DataParallel
from torch_geometric.loader import DataListLoader
import torch.nn.functional as F
import torch
import pandas as pd
from transformers import BertTokenizer,BertModel
import pickle
import time
from tqdm import tqdm
from numba import jit
import json
from torch.optim import lr_scheduler
import matplotlib.pyplot as plt
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "4,5,6,7"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
plt.grid(True)
plt.grid(color='gray',
linestyle='--',
linewidth=1,
alpha=0.3)
begin = time.time()
punctuation = "!#$%&'\(\)-*+,-./:;<=>?#\\\[\]^_`{|}~():;,。【】·、“”‘’《》\"%……——·"
def dataCleanifier(s):
for i in punctuation:
s.replace(i," ")
s = s.replace(" "," ")
s = s.replace("\n","")
return s
class BertClassifier(nn.Module):
def __init__(self,bertType:str,max_length,tag_size):
super(BertClassifier,self).__init__()
self.bertType = bertType
self.tokenizer = BertTokenizer.from_pretrained(self.bertType)
self.encoder = BertModel.from_pretrained(self.bertType)
self.outputDim = self.encoder.pooler.dense.out_features
self.max_length = max_length
self.tag_size = tag_size
self.dropout = nn.Dropout(0.1)
self.activation = nn.LeakyReLU(0.1)
self.convs = nn.ModuleList(
[nn.Conv2d(1, 512, (k, self.outputDim)) for k in (2,3,4)])
self.fc_cnn = nn.Linear(512 * len((2,3,4)), self.tag_size)
def conv_and_pool(self, x, conv):
x = F.relu(conv(x)).squeeze(3)
x = F.max_pool1d(x, x.size(2)).squeeze(2)
return x
def forward(self,x):
x = self.tokenizer.batch_encode_plus(x,return_tensors="pt",max_length=self.max_length,truncation=True,padding="max_length")
attention = x["attention_mask"]
x = x["input_ids"]
x = x.cuda(2)
x = self.encoder(x,attention_mask=attention.cuda(2))['last_hidden_state'][:]
x = x.unsqueeze(1)
encoded = torch.cat([self.conv_and_pool(x,conv) for conv in self.convs],1)
x = self.fc_cnn(encoded)
x = self.activation(x)
# x = F.softmax(x,dim=1)
return x,encoded
class ContrastiveLoss(nn.Module):
def __init__(self):
super(ContrastiveLoss, self).__init__()
def forward(self,representations,label,y_hat):
n = label.shape[0]
T = 0.5
similarity_matrix = F.cosine_similarity(representations.unsqueeze(1), representations.unsqueeze(0), dim=2)
mask = torch.ones_like(similarity_matrix) * (label.expand(n, n).eq(label.expand(n, n).t()))
mask_no_sim = torch.ones_like(mask) - mask
mask_dui_jiao_0 = torch.ones(n ,n) - torch.eye(n, n )
similarity_matrix = torch.exp(similarity_matrix/T)
similarity_matrix = similarity_matrix*mask_dui_jiao_0
sim = mask*similarity_matrix
no_sim = similarity_matrix - sim
no_sim_sum = torch.sum(no_sim , dim=1)
no_sim_sum_expend = no_sim_sum.repeat(n, 1).T
sim_sum = sim + no_sim_sum_expend
loss = torch.div(sim , sim_sum)
loss = mask_no_sim + loss + torch.eye(n, n )
#接下来就是算一个批次中的loss了
loss = -torch.log(loss) #求-log
loss = torch.sum(torch.sum(loss, dim=1) )/(2*n)+nn.CrossEntropyLoss()(y_hat,label)
return loss
class GAT(nn.Module):
def __init__(self, hidden_channels) -> None:
super().__init__()
self.conv1 = GATConv(data.num_features,hidden_channels)
self.conv2 = GATConv(hidden_channels,9)
self.activation = nn.ReLU()
def forward(self,x,edge_index):
x = self.conv1(x,edge_index)
x = self.activation(x)
# print(x)
# x = F.dropout(x,p=0.2)
x = self.conv2(x,edge_index)
return x
x=None
y=None
edge_index = None
train_mask = None
with open("X.pkl","rb") as f1:
x = pickle.load(f1)
with open("Y.pkl","rb") as f2:
y = pickle.load(f2)
y = y.long()
with open("edge_index.pkl","rb") as f3:
edge_index = pickle.load(f3)
# print(edge_index.shape)
with open("train_mask.pkl","rb") as f4:
train_mask = pickle.load(f4)
data = Data(x=x,y=y,edge_index=edge_index)
data.train_mask = train_mask
model = GAT(hidden_channels=32)
model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
scheduler = lr_scheduler.StepLR(optimizer, 100, 0.8)
criterion = ContrastiveLoss()
def train():
model.train()
optimizer.zero_grad() # Clear gradients.
out = model(data.x,data.edge_index) # Perform a single forward pass.
loss = criterion(data.x[data.train_mask], data.y[data.train_mask],out[data.train_mask]) # Compute the loss solely based on the training nodes.
loss.backward() # Derive gradients.
optimizer.step() # Update parameters based on gradients.
return loss
def test():
model.eval()
out = model(data.x, data.edge_index)
pred = out.argmax(dim=1) # Use the class with highest probability.
test_correct = pred[data.train_mask] == data.y[data.train_mask] # Check against ground-truth labels.
test_acc = int(test_correct.sum()) / int(data.train_mask.sum()) # Derive ratio of correct predictions.
return test_acc
accs = []
for epoch in range(1, 1025):
loss = train()
print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}',end=" ")
acc = test()
print("acc:",acc)
accs.append(acc)
scheduler.step()
plt.plot(range(len(accs)),accs)
print(time.time()-begin)
with open("./accs_gat_GCL.pkl","wb") as f1:
pickle.dump(accs,f1)
plt.savefig("./res_GAT_GCL.png",dpi=600)
I have tried to use DataPararel to use multiple GPU to load my model and dataset but failed.
I am writing an autoencoder model for an image encoding-decoding problem.
I want to understand the node distribution in each layer of the model suitable for images.
For the below code I am using 10 images of shape (21*28*3).
class Autoencoder(Chain):
def __init__(self, activation=F.relu):
super().__init__()
with self.init_scope():
# encoder part
self.l1 = L.Linear(1764,882)
self.l2 = L.Linear(882,441)
# decoder part
self.l3 = L.Linear(441,882)
self.l4 = L.Linear(882,1764)
self.activation = activation
def forward(self,x):
h = self.encode(x)
x_recon = self.decode(h)
return x_recon
def __call__(self,x):
x_recon = self.forward(x)
loss = F.mean_squared_error(h, x)
return loss
def encode(self, x):
h = F.dropout(self.activation(self.l1(x)))
return self.activation(self.l2(x))
def decode(self, h, train=True):
h = self.activation(self.l3(h))
return self.l4(x)
gpu_id = 0
n_epoch = 5
batch_size = 2
model = Autoencoder()
optimizer = optimizers.SGD(lr=0.05).setup(model)
train_iter = iterators.SerialIterator(xs,batch_size)
valid_iter = iterators.SerialIterator(xs,batch_size)
updater = training.StandardUpdater(train_iter,optimizer)
trainer = training.Trainer(updater,(n_epoch,"epoch"),out="result")
from chainer.training import extensions
trainer.extend(extensions.Evaluator(valid_iter, model, device=gpu_id))
While running trainer.run():
InvalidType:
Invalid operation is performed in: LinearFunction (Forward)
Expect: x.shape[1] == W.shape[1]
Actual: 1764 != 882
I want to understand how node distribution works layer wise in a model. Please suggest any resource. Also how to assign nodes in layers in case of small number of training images.
I built a two layered LSTM model(keras model) for a movie review dataset from kaggle : Dataset
While training the model, every epoch was giving the same accuracy of 0.5098.
Then I thought it might not be learning the long distance dependencies.Then instead of LSTM I used bidirectional LSTM. But, still model's accuracy while training was 0.5098 for every epoch. I trained the model for 8 hours/35 epochs on CPU. Then I stopped training.
Code:
import pandas as pd
from sentiment_utils import *
import keras
import keras.backend as k
import numpy as np
train_data = pd.read_table('train.tsv')
X_train = train_data.iloc[:,2]
Y_train = train_data.iloc[:,3]
from sklearn.preprocessing import OneHotEncoder
Y_train = Y_train.reshape(Y_train.shape[0],1)
ohe = OneHotEncoder(categorical_features=[0])
Y_train = ohe.fit_transform(Y_train).toarray()
maxLen = len(max(X_train, key=len).split())
words_to_index, index_to_words, word_to_vec_map = read_glove_vectors("glove/glove.6B.50d.txt")
m = X_train.shape[0]
def read_glove_vectors(path):
with open(path, encoding='utf8') as f:
words = set()
word_to_vec_map = {}
for line in f:
line = line.strip().split()
cur_word = line[0]
words.add(cur_word)
word_to_vec_map[cur_word] = np.array(line[1:], dtype=np.float64)
i = 1
words_to_index = {}
index_to_words = {}
for w in sorted(words):
words_to_index[w] = i
index_to_words[i] = w
i = i + 1
return words_to_index, index_to_words, word_to_vec_map
def sentance_to_indices(X_train, words_to_index, maxLen, dash_index_list, keys):
m = X_train.shape[0]
X_indices = np.zeros((m, maxLen))
for i in range(m):
if i in dash_index_list:
continue
sentance_words = X_train[i].lower().strip().split()
j = 0
for word in sentance_words:
if word in keys:
X_indices[i, j] = words_to_index[word]
j += 1
return X_indices
def pretrained_embedding_layer(word_to_vec_map, words_to_index):
emb_dim = word_to_vec_map['pen'].shape[0]
vocab_size = len(words_to_index) + 1
emb_matrix = np.zeros((vocab_size, emb_dim))
for word, index in words_to_index.items():
emb_matrix[index, :] = word_to_vec_map[word]
emb_layer= keras.layers.embeddings.Embedding(vocab_size, emb_dim, trainable= False)
emb_layer.build((None,))
emb_layer.set_weights([emb_matrix])
return emb_layer
def get_model(input_shape, word_to_vec_map, words_to_index):
sentance_indices = keras.layers.Input(shape = input_shape, dtype='int32')
embedding_layer = pretrained_embedding_layer(word_to_vec_map, words_to_index)
embeddings = embedding_layer(sentance_indices)
X = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=True))(embeddings)
X = keras.layers.Dropout(0.5)(X)
X = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=True))(X)
X = keras.layers.Dropout(0.5)(X)
X = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=False))(X)
X = keras.layers.Dropout(0.5)(X)
X = keras.layers.Dense(5)(X)
X = keras.layers.Activation('softmax')(X)
model = keras.models.Model(sentance_indices, X)
return model
model = get_model((maxLen,), word_to_vec_map,words_to_index)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
dash_index_list = []
for i in range(m):
if '-' in X_train[i]:
dash_index_list.append(i)
keys = []
for key in word_to_vec_map.keys():
keys.append(key)
X_train_indices = sentance_to_indices(X_train, words_to_index, maxLen, dash_index_list, keys)
model.fit(X_train_indices, Y_train, epochs = 50, batch_size = 32, shuffle=True)
I think the way you defined the model architecture doesn't make sense! Try looking at this example on IMDB movie reviews with LSTM on Keras github repo: Trains an LSTM model on the IMDB sentiment classification task.
I haved trained a named entity recognition model, after saving it and loading it back it is giving correct prediction on the same IPython session, but whenever I close the session and open it again, the loaded model prediction randomly. Can you help me with that?
I have saved the model in hdf5 format using:
Model.save("filename")
And I am loading it using:
Model.load_model("filename")
here is my full code
import pandas as pd
import numpy as np
import os
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from keras.models import Model, Input,load_model
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout,
Bidirectional
from nltk import pos_tag, word_tokenize,sent_tokenize
data = pd.read_csv("E:\ml tut\entity recognition\exdataset.csv",
encoding="latin1")
data = data.fillna(method="ffill")
words = list(set(data["Word"].values))
words.append("ENDPAD")
n_words = len(words); n_words
tags = list(set(data["Tag"].values))
n_tags = len(tags); n_tags
class SentenceGetter(object):
def __init__(self, data):
self.n_sent = 1
self.data = data
self.empty = False
agg_func = lambda s: [((w, p), t) for w, p, t in
zip(s["Word"].values.tolist(),s["POS"].values.tolist(),
s["Tag"].values.tolist())]
self.grouped = self.data.groupby("Sentence #").apply(agg_func)
self.sentences = [s for s in self.grouped]
def get_next(self):
try:
s = self.grouped["Sentence: {}".format(self.n_sent)]
self.n_sent += 1
return s
except:
return None
getter = SentenceGetter(data)
sent = getter.get_next()
print(sent)
sentences = getter.sentences
max_len = 50
word2idx = {w: i for i, w in enumerate(words)}
tag2idx = {t: i for i, t in enumerate(tags)}
input = Input(shape=(max_len,))
model = Embedding(input_dim=n_words, output_dim=50, input_length=max_len)
(input)
model = Dropout(0.1)(model)
model = Bidirectional(LSTM(units=100, return_sequences=True,
recurrent_dropout=0.1))(model)
out = TimeDistributed(Dense(n_tags, activation="softmax"))(model)
if os.path.exists('my_model.h5'):
print("loading model")
model = load_model('my_model.h5')
else:
print("training model")
X = [[word2idx[w[0][0]] for w in s] for s in sentences]
X = pad_sequences(maxlen=max_len, sequences=X, padding="post",
value=n_words - 1)
y = [[tag2idx[w[1]] for w in s] for s in sentences]
y = pad_sequences(maxlen=max_len, sequences=y, padding="post",
value=tag2idx["O"])
y = [to_categorical(i, num_classes=n_tags) for i in y]
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.1)
model = Model(input, out)
model.compile(optimizer="rmsprop", loss="categorical_crossentropy",
metrics=["accuracy"])
model.fit(X_tr, np.array(y_tr), batch_size=32, epochs=5,
validation_split=0.1, verbose=1)
model.save('my_model.h5')
my_input="Albert Einstein is a great guy,he lives in berlin, Germany."
print("--------------")
test_sentence = word_tokenize(my_input)
x_test_sent = pad_sequences(sequences=[[word2idx.get(w, 0) for w in
test_sentence]],padding="post", value=0, maxlen=max_len)
i = 0
p = model.predict(np.array([x_test_sent[i]]))
p = np.argmax(p, axis=-1)
print("{:15}||{}".format("Word", "Prediction"))
print(30 * "=")
for w, pred in zip(test_sentence, p[0]):
if w != 0:
print("{:15}: {}".format(w, tags[pred]))
please save your tags (tags = list(set(data["Tag"].values))) in pickle while generating your model.. This is will solve your problem.
There fore you need to save the following:
1.tags
2.model
3.word2idx
import pandas as pd
import numpy as np
import os
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from keras.models import Model, Input,load_model
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout,
Bidirectional
from nltk import pos_tag, word_tokenize,sent_tokenize
data = pd.read_csv("E:\ml tut\entity recognition\exdataset.csv",
encoding="latin1")
data = data.fillna(method="ffill")
words = list(set(data["Word"].values))
words.append("ENDPAD")
n_words = len(words); n_words
tags = list(set(data["Tag"].values))
save your tags in pickle or any other format
n_tags = len(tags); n_tags
class SentenceGetter(object):
def __init__(self, data):
self.n_sent = 1
self.data = data
self.empty = False
agg_func = lambda s: [((w, p), t) for w, p, t in
zip(s["Word"].values.tolist(),s["POS"].values.tolist(),
s["Tag"].values.tolist())]
self.grouped = self.data.groupby("Sentence #").apply(agg_func)
self.sentences = [s for s in self.grouped]
def get_next(self):
try:
s = self.grouped["Sentence: {}".format(self.n_sent)]
self.n_sent += 1
return s
except:
return None
getter = SentenceGetter(data)
sent = getter.get_next()
print(sent)
sentences = getter.sentences
max_len = 50
word2idx = {w: i for i, w in enumerate(words)}
save your word2idx in pickle or any other format
tag2idx = {t: i for i, t in enumerate(tags)}
input = Input(shape=(max_len,))
model = Embedding(input_dim=n_words, output_dim=50, input_length=max_len)
(input)
model = Dropout(0.1)(model)
model = Bidirectional(LSTM(units=100, return_sequences=True,
recurrent_dropout=0.1))(model)
out = TimeDistributed(Dense(n_tags, activation="softmax"))(model)
if os.path.exists('my_model.h5'):
print("loading model")
model = load_model('my_model.h5')
else:
print("training model")
X = [[word2idx[w[0][0]] for w in s] for s in sentences]
X = pad_sequences(maxlen=max_len, sequences=X, padding="post",
value=n_words - 1)
y = [[tag2idx[w[1]] for w in s] for s in sentences]
y = pad_sequences(maxlen=max_len, sequences=y, padding="post",
value=tag2idx["O"])
y = [to_categorical(i, num_classes=n_tags) for i in y]
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.1)
model = Model(input, out)
model.compile(optimizer="rmsprop", loss="categorical_crossentropy",
metrics=["accuracy"])
model.fit(X_tr, np.array(y_tr), batch_size=32, epochs=5,
validation_split=0.1, verbose=1)
model.save('my_model.h5')
my_input="Albert Einstein is a great guy,he lives in berlin, Germany."
print("--------------")
test_sentence = word_tokenize(my_input)
x_test_sent = pad_sequences(sequences=[[word2idx.get(w, 0) for w in
test_sentence]],padding="post", value=0, maxlen=max_len)
i = 0
p = model.predict(np.array([x_test_sent[i]]))
p = np.argmax(p, axis=-1)
print("{:15}||{}".format("Word", "Prediction"))
print(30 * "=")
for w, pred in zip(test_sentence, p[0]):
if w != 0:
print("{:15}: {}".format(w, tags[pred]))