I am building a chatbot using this seq2seq model, but there seems to be a problem before i can even train my model since i encountered this error. Does anyone know how to solve it?
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Embedding, LSTM, Input
enc_input = Input(shape=(80,))
dec_input = Input(shape=(80,))
VOCAB_SIZE = len(vocab)
embed = Embedding(VOCAB_SIZE+1, output_dim=50,input_length = 80,trainable = True)
enc_embed = embed(enc_input)
enc_lstm = LSTM(473,return_sequences=True,return_state=True)
enc_op, h, c = enc_lstm(enc_embed)
enc_states = [h,c]
dec_embed = embed(dec_input)
dec_lstm = LSTM(473,return_sequences=True,return_state=True)
dec_op,_,_ = dec_lstm(dec_embed,initial_state=enc_states)
dense = Dense(VOCAB_SIZE+1, activation='softmax')
dense_op = dense(dec_op)
model = Model([enc_input,dec_input],dense_op)
model.compile(loss = 'categorical_crossentropy',metrics=['acc'],optimizer=['adam'])
model.summary()
model.fit([encoder_input,decoder_input],decoder_final_output, epochs=40)
ERROR:
AttributeError: in user code:
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py:853 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py:842 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py:835 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py:791 train_step
self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
AttributeError: 'ListWrapper' object has no attribute 'minimize'
Error occurred on this line:
model.fit([encoder_input,decoder_input],decoder_final_output, epochs=40)
Related
I have a language model:
from transformers import RobertaTokenizer
from transformers import RobertaModel
import torch.nn as nn
import torch
checkpoint = 'roberta-base'
test_question = ['this is a string', 'this is another string but longer']
tokenizer = RobertaTokenizer.from_pretrained(checkpoint)
I'm trying to change the head of the model to have 4 linear layers with 512 neurons each:
class QModel(nn.Module):
def __init__(self):
super(QModel, self).__init__()
self.base_model = RobertaModel.from_pretrained(checkpoint)
self.dropout = nn.Dropout(0.5)
self.linear1 = nn.Linear(12288, 512)
self.linear2 = nn.Linear(512, 512)
self.linear3 = nn.Linear(512, 512)
self.linear4 = nn.Linear(512, 512)
def forward(self, x):
input_ids, attn_mask = torch.tensor(x['input_ids']), torch.tensor(x['attention_mask'])
outputs = self.base_model(input_ids, attention_mask=attn_mask)
# new head
outputs = self.dropout(outputs[0])
outputs = outputs.view(-1, 12288)
outputs = self.linear1(outputs)
outputs = self.dropout(outputs)
outputs = self.linear2(outputs)
outputs = self.dropout(outputs)
outputs = self.linear3(outputs)
outputs = self.dropout(outputs)
outputs = self.linear4(outputs)
return outputs
model = QModel()
model(tokenizer(test_question, padding=True))
But if I change the input size:
test_question = ['this is a string', 'this is another string but longer', 'another input']
I get the error:
RuntimeError: shape '[-1, 12288]' is invalid for input of size 18432
I understand that it arises from the 12288 value in linear1, but I'm not sure how to flatten it in the appropriate way to accept multiple inputs
This is my first attempt to solve task on the kaggle. This is page of the task - https://www.kaggle.com/c/bike-sharing-demand.
I wrote this code (I have some excess code lines, because I am not really sure what I need now):
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import keras
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import matplotlib.pyplot as plt
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
train_data = pd.read_csv('/kaggle/input/bike-sharing-demand/train.csv')
train_targets = train_data[['casual', 'registered', 'count']]
train_datetime_helper = train_data[['datetime']]
dt = pd.DatetimeIndex(train_data['datetime'])
train_data['day'] = dt.day
train_data['month'] = dt.month
train_data['year'] = dt.year
train_data['hour'] = dt.hour
train_data['dow'] = dt.dayofweek
train_data['woy'] = dt.weekofyear
train_data = train_data.drop(['casual', 'registered', 'count', 'datetime'], axis=1)
test_data = pd.read_csv('/kaggle/input/bike-sharing-demand/test.csv')
test_datetime_helper = test_data[['datetime']]
dt = pd.DatetimeIndex(test_data['datetime'])
test_data['day'] = dt.day
test_data['month'] = dt.month
test_data['year'] = dt.year
test_data['hour'] = dt.hour
test_data['dow'] = dt.dayofweek
test_data['woy'] = dt.weekofyear
test_data = test_data.drop(['datetime'], axis=1)
mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std
test_data -= mean
test_data /= std
from keras import models
from keras import layers
from keras.layers import Dense, Conv2D, Flatten
def build_model():
model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(train_data.shape[1], train_targets.shape[1])))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
return model
k = 4
num_val_samples = len(train_data) // k
num_epochs = 100
all_scores = []
for i in range(k):
print('Processing fold #', i)
val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]
val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]
partial_train_data = np.concatenate(
[train_data[:i * num_val_samples],
train_data[(i + 1) * num_val_samples:]], axis=0)
partial_train_targets = np.concatenate(
[train_targets[:i * num_val_samples],
train_targets[(i + 1) * num_val_samples:]], axis=0)
model = build_model()
model.fit(partial_train_data, partial_train_targets,
epochs=num_epochs, batch_size=1, verbose=0)
val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
all_scores.append(val_mae)
I have got this error. Can you explain how can I solve it enter image description here
you specified wrong the input dimension of your model. try to define your first layer in this way
model.add(layers.Dense(64, activation='relu', input_shape=(train_data.shape[1],)))
This error seems to go if i remove the Flatten() layer.
I am trying to use my model but it is giving this runtime error :
You must compile your model before using it.
I don't understand what is wrong, I've tried using lesser Dense layers but it is not working.
Can someone help me with this? PLZ explain with code.
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Flatten
train_directory = 'D:\D_data\Rock_Paper_Scissors\Train'
training_datgagen = ImageDataGenerator(rescale = 1./255)
training_generator = training_datgagen.flow_from_directory(
train_directory,
target_size = (150,150),
class_mode = 'categorical')
validation_directory = 'D:\D_data\Rock_Paper_Scissors\Test'
validation_datagen = ImageDataGenerator(rescale= 1./255)
validation_generator = validation_datagen.flow_from_directory(
validation_directory,
target_size = (150,150),
class_mode = 'categorical'
)
model = Sequential()
model.add(Flatten())
model.add(Dense(128, input_shape = (22500,)))
model.add(Dense(64, activation = 'relu'))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(16, activation = 'relu'))
model.add(Dense(3, activation = 'softmax'))
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy',metrics = ['accuracy'])
model.fit_generator(training_generator,epochs = 15,validation_data = validation_generator,verbose =1)
Error:
File "C:\Users\Ankit\.spyder-py3\temp.py", line 33, in <module>
model.fit_generator(training_generator,epochs = 15,validation_data = validation_generator,verbose =1)
File "C:\Users\Ankit\anaconda3\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:\Users\Ankit\anaconda3\lib\site-packages\keras\engine\training.py", line 1732, in fit_generator
initial_epoch=initial_epoch)
File "C:\Users\Ankit\anaconda3\lib\site-packages\keras\engine\training_generator.py", line 42, in fit_generator
model._make_train_function()
File "C:\Users\Ankit\anaconda3\lib\site-packages\keras\engine\training.py", line 303, in _make_train_function
raise RuntimeError('You must compile your model before using it.')
RuntimeError: You must compile your model before using it.
This error happens because your network is inconsistent, the input_shape parameter should be given to the first layer in your network, but you are giving it to the second layer.
The solution is simple, put the right input_shape to your Flatten layer.
The following code is throwing an error:
TypeError: can't pickle _thread.lock objects
I can see that it likely has to do with passing the previous method in as a function in def fit(self, c_m). But I think this is correct via the documentations: https://keras.io/scikit-learn-api/
I may be making a rookie mistake if anyone sees the error in my code I would appreciate help.
np.random.seed(7)
y_dic = []
class NN:
def __init__(self):
self.X = None
self.y = None
self.model = None
def clean_data(self):
seed = 7
np.random.seed(seed)
dataset = pd.read_csv('/Users/isaac/pca_rfe_tsne_comparisons/Vital_intrusions.csv', delimiter=',', skiprows=0)
dataset = dataset.iloc[:,1:6]
self.X = dataset.iloc[:, 1:5]
Y = dataset.iloc[:, 0]
for y in Y:
if y >= 8:
y_dic.append(1)
else:
y_dic.append(0)
self.y = y_dic
self.X = np.asmatrix(stats.zscore(self.X, axis=0, ddof=1))
self.y = to_categorical(self.y)
def create_model(self):
self.model = Sequential()
self.model.add(Dense(4, input_dim=4, activation='relu'))
self.model.add(Dense(4, activation='relu'))
self.model.add(Dense(2, activation='sigmoid'))
self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
pass
def fit(self, c_m):
model = KerasClassifier(build_fn=c_m, verbose=0)
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
pdb.set_trace()
grid_result = grid.fit(self.X, self.y)
return (grid_result)
def results(self, grid_results):
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
def main():
nn = NN()
nn.clean_data()
nn.create_model()
grid_results = nn.fit(nn.create_model)
nn.results(grid_results)
if __name__ == "__main__":
main()
Ok, a follow up to this. Thanks for your comments #MarcinMożejko. You were right about this. There were more errors I should mention. In def fit(), I wrote model = KerasClassifier, not self.model=Keras Classifier. I wanted to mention that incase anyone is looking at the code. I'm now getting a new error on the same line:
AttributeError: 'NoneType' object has no attribute 'loss'.
I can track this back to scikit_learn.py:
loss_name = self.model.loss
if hasattr(loss_name, '__name__'):
loss_name = loss_name.__name__
if loss_name == 'categorical_crossentropy' and len(y.shape) != 2:
y = to_categorical(y)
I'm not sure how to solve this as I set the loss term in self.model.compile. I tried changing it to binary_crossentropy but that had no effect. any further thoughts?
The problem lies in this line of code:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
Unfortunately - for now, keras is not supporting applying pickle to your model which is needed for sklearn to apply multiprocessing (here you may read the discussion on this). In order to make this code work you should set:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
This is a variational autoencoder network, I have to define a sampling method to generate latent z, I thinks it might be something wrong with this. This py file is doing training, the other py file is doing predicting online, so I need to save the keras model, there is nothing wrong with saving model, but when I load model from 'h5' file, it shows an error:
NameError: name 'latent_dim' is not defined
The following is code:
df_test = df[df['label']==cluster_num].iloc[:,:data_num.shape[1]]
data_scale_ = preprocessing.StandardScaler().fit(df_test.values)
data_num_ = data_scale.transform(df_test.values)
models_deep_learning_scaler.append(data_scale_)
batch_size = data_num_.shape[0]//10
original_dim = data_num_.shape[1]
latent_dim = data_num_.shape[1]*2
intermediate_dim = data_num_.shape[1]*10
nb_epoch = 1
epsilon_std = 0.001
x = Input(shape=(original_dim,))
init_drop = Dropout(0.2, input_shape=(original_dim,))(x)
h = Dense(intermediate_dim, activation='relu')(init_drop)
z_mean = Dense(latent_dim)(h)
z_log_var = Dense(latent_dim)(h)
def sampling(args):
z_mean, z_log_var = args
epsilon = K.random_normal(shape=(latent_dim,), mean=0.,
std=epsilon_std)
return z_mean + K.exp(z_log_var / 2) * epsilon
# note that "output_shape" isn't necessary with the TensorFlow backend
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])
# we instantiate these layers separately so as to reuse them later
decoder_h = Dense(intermediate_dim, activation='relu')
decoder_mean = Dense(original_dim, activation='linear')
h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)
def vae_loss(x, x_decoded_mean):
xent_loss = original_dim * objectives.mae(x, x_decoded_mean)
kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
return xent_loss + kl_loss
vae = Model(x, x_decoded_mean)
vae.compile(optimizer=Adam(lr=0.01), loss=vae_loss)
train_ratio = 0.95
train_num = int(data_num_.shape[0]*train_ratio)
x_train = data_num_[:train_num,:]
x_test = data_num_[train_num:,:]
vae.fit(x_train, x_train,
shuffle=True,
nb_epoch=nb_epoch,
batch_size=batch_size,
validation_data=(x_test, x_test))
vae.save('./models/deep_learning_'+str(cluster_num)+'.h5')
del vae
from keras.models import load_model
vae = load_model('./models/deep_learning_'+str(cluster_num)+'.h5')
It shows error:
NameError: name 'latent_dim' is not defined
For variational loss you are using many variable not known by Keras module. You need to pass them through custom_objects param of load_model function.
In your case:
vae.save('./vae_'+str(cluster_num)+'.h5')
vae.summary()
del vae
from keras.models import load_model
vae = load_model('./vae_'+str(cluster_num)+'.h5', custom_objects={'latent_dim': latent_dim, 'epsilon_std': epsilon_std, 'vae_loss': vae_loss})
vae.summary()
If you load model (.h5) file in your new py file, you can use load_model('/.h5', compile = False).
Because you do not need to any custom objects (i.e loss function or latent_dim, etc) in prediction step.