I have a language model:
from transformers import RobertaTokenizer
from transformers import RobertaModel
import torch.nn as nn
import torch
checkpoint = 'roberta-base'
test_question = ['this is a string', 'this is another string but longer']
tokenizer = RobertaTokenizer.from_pretrained(checkpoint)
I'm trying to change the head of the model to have 4 linear layers with 512 neurons each:
class QModel(nn.Module):
def __init__(self):
super(QModel, self).__init__()
self.base_model = RobertaModel.from_pretrained(checkpoint)
self.dropout = nn.Dropout(0.5)
self.linear1 = nn.Linear(12288, 512)
self.linear2 = nn.Linear(512, 512)
self.linear3 = nn.Linear(512, 512)
self.linear4 = nn.Linear(512, 512)
def forward(self, x):
input_ids, attn_mask = torch.tensor(x['input_ids']), torch.tensor(x['attention_mask'])
outputs = self.base_model(input_ids, attention_mask=attn_mask)
# new head
outputs = self.dropout(outputs[0])
outputs = outputs.view(-1, 12288)
outputs = self.linear1(outputs)
outputs = self.dropout(outputs)
outputs = self.linear2(outputs)
outputs = self.dropout(outputs)
outputs = self.linear3(outputs)
outputs = self.dropout(outputs)
outputs = self.linear4(outputs)
return outputs
model = QModel()
model(tokenizer(test_question, padding=True))
But if I change the input size:
test_question = ['this is a string', 'this is another string but longer', 'another input']
I get the error:
RuntimeError: shape '[-1, 12288]' is invalid for input of size 18432
I understand that it arises from the 12288 value in linear1, but I'm not sure how to flatten it in the appropriate way to accept multiple inputs
I have trouble making my neural network train. I have defined the neural network as follows:
shared = embedding_layer
inputA = keras.Input(shape=(40, ), name="anchor") # Variable-length sequence of ints
inputP = keras.Input(shape=(40, ), name="positive") # Variable-length sequence of ints
inputN = keras.Input(shape=(40, ), name="negative") # Binary vectors of size num_tags
concatenated = layers.concatenate([inputA, inputP, inputN])
embedded_A = shared(concatenated)
encoded = Dense(900, activation = "relu")(embedded_A)
encoded = Dense(600, activation = "relu")(encoded)
encoded = Dense(300, activation = "relu")(encoded)
encoded = Dense(100, activation = "relu")(encoded)
decoded = Dense(100, activation = "relu")(encoded)
decoded = Dense(300, activation = "relu")(decoded)
decoded = Dense(600, activation = "relu")(decoded)
decoded = Dense(900, activation = "relu")(decoded)
predictionsA = Dense(40, activation="sigmoid", name ='outA')(decoded)
predictionsP = Dense(40, activation="sigmoid", name ='outB')(decoded)
predictionsN = Dense(40, activation="sigmoid", name ='outC')(decoded)
ml_model = keras.Model(
inputs=[inputA, inputP, inputN],
outputs=[predictionsA, predictionsP, predictionsN]
)
ml_model.compile(
optimizer='adam',
loss='mse'
)
ml_model.fit(
{"anchor": anchor, "positive": positive, "negative": negative},
{"outA": anchor, "outB": positive, 'outC': negative},
epochs=2)
Which schematically looks like
The embeddingment layer is defined as follows:
embedding_m = model.syn0
embedding_layer = Embedding(len(vocab),
300,
weights=[embedding_m],
input_length=40,
trainable=True)
What I feed into the network is three numpy arrays of shape (120000, 40) which look like this:
array([[ 2334, 23764, 7590, ..., 3000001, 3000001, 3000001],
[3000000, 1245, 1124, ..., 3000001, 3000001, 3000001],
[ 481, 491, 5202, ..., 3000001, 3000001, 3000001],
...,
[3000000, 125, 20755, ..., 3000001, 3000001, 3000001],
[1217971, 168575, 239, ..., 9383, 1039, 87315],
[ 12990, 91, 258231, ..., 3000001, 3000001, 3000001]])
And the input is the same as the output, as I am making an auto-encoder decoder.
The error I get is:
Dimensions must be equal, but are 120 and 32 for '{{node mean_squared_error/SquaredDifference}} = SquaredDifference[T=DT_FLOAT](model_3/outA/Sigmoid, mean_squared_error/Cast)' with input shapes: [32,120,40], [32,40].
But I can't seem to find out why, or how to fix it... Any ideas? I can provide more examples if needed. I suspect that there is some dimension error, as I would ideally like my output to be of shape (120000,40) exactly as my input.
Fixed version of the problematic endcoder - decoder:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.layers import Dense
#shared = embedding_layer
#Simulate that...
shared=Dense(1,activation="relu")
inputA = keras.Input(shape=(40, ), name="anchor") # Variable-length sequence of ints
inputP = keras.Input(shape=(40, ), name="positive") # Variable-length sequence of ints
inputN = keras.Input(shape=(40, ), name="negative") # Binary vectors of size num_tags
concatenated = layers.concatenate([inputA, inputP, inputN])
embedded_A = shared(concatenated)
encoded = Dense(900, activation = "relu")(embedded_A)
encoded = Dense(600, activation = "relu")(encoded)
encoded = Dense(300, activation = "relu")(encoded)
encoded = Dense(100, activation = "relu")(encoded)
#decoded = Dense(100, activation = "relu")(encoded)
decoded = Dense(300, activation = "relu")(encoded)
decoded = Dense(600, activation = "relu")(decoded)
decoded = Dense(900, activation = "relu")(decoded)
predictionsA = Dense(40, activation="sigmoid", name ='outA')(decoded)
predictionsP = Dense(40, activation="sigmoid", name ='outB')(decoded)
predictionsN = Dense(40, activation="sigmoid", name ='outC')(decoded)
ml_model = keras.Model(
inputs=[inputA, inputP, inputN],
outputs=[predictionsA, predictionsP, predictionsN]
)
ml_model.compile(
optimizer='adam',
loss='mse'
)
#Simulate...
anchor=tf.random.uniform((100,40))
positive=tf.random.uniform((100,40))
negative=tf.random.uniform((100,40))
ml_model.fit(
{"anchor": anchor, "positive": positive, "negative": negative},
{"outA": anchor, "outB": positive, 'outC': negative},
epochs=2)
Remove the one "decoded" row to fix your network structure:
max_length = 50
tokenizer = RobertaTokenizer.from_pretrained('roberta-large', do_lower_case=True)
encodings = tokenizer.batch_encode_plus(comments,max_length=max_length,pad_to_max_length=True, truncation=True) # tokenizer's encoding method
train_inputs = encodings['input_ids']
train_masks = encodings['attention_mask']
train_inputs = torch.tensor(train_inputs)
train_labels = torch.tensor(train_labels)
train_masks = torch.tensor(train_masks)
batch_size = 48
train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
model = RobertaForSequenceClassification.from_pretrained('roberta-large', num_labels=num_labels)
model.cuda()
Hi, i'm using HuggingFace library for classification, i want to concatenate two types of BERT together. this is not the entire code. i just want you to know how i've used tokenizer and encodings. now i have 2 questions:
1: how can i see the created vectors? it's dimension and the vector it self. 2: in which step should i concatenate two BERT together? their vectors? or their output (logit) maybe?
I'm planning to have the following design:
However my code doesn't seem working:
import numpy as np
from keras.models import Model
from keras.layers import Dense, Input, Concatenate
from keras import optimizers
trainX1 = np.array([[1,2],[3,4],[5,6],[7,8]]) # fake training data
trainY1 = np.array([[1],[2],[3],[4]]) # fake label
trainX2 = np.array([[2,3],[4,5],[6,7]])
trainY2 = np.array([[1],[2],[3]])
trainX3 = np.array([[0,1],[2,3]])
trainY3 = np.array([[1],[2]])
numFeatures = 2
trainXList = [trainX1, trainX2, trainX3]
trainYStack = np.vstack((trainY1,trainY2,trainY3))
inputList = []
modelList = []
for i,_ in enumerate(trainXList):
tempInput= Input(shape = (numFeatures,))
m = Dense(10, activation='tanh')(tempInput)
inputList.append(tempInput)
modelList.append(m)
mAll = Concatenate()(modelList)
out = Dense(1, activation='tanh')(mAll)
model = Model(inputs=inputList, outputs=out)
rmsp = optimizers.rmsprop(lr=0.00001)
model.compile(optimizer=rmsp,loss='mse', dropout = 0.1)
model.fit(trainXList, trainYStack, epochs = 1, verbose=0)
The error message says that my input data sets are not having the same shape, but after I padded my training set to make number of samples = 4 for all 3 sets, I still get errors saying dimension is not right. May I know how I can design this network properly? Thanks!
p.s. Here is the error message before padding:
ValueError: All input arrays (x) should have the same number of samples. Got array shapes: [(4, 2), (3, 2), (2, 2)]
Here is the error message after padding (happens on the last line of code):
ValueError: Input arrays should have the same number of samples as target arrays. Found 4 input samples and 12 target samples.
Your input shape is wrong for the given input.
You assign the input a size of numFeatures, but actually you have 2-dimensional arrays and they are different (4,2)(3,2)(2,2). I am not sure about your problem, but number of samples and number of features seem to be reversed.
tempInput= Input(shape = (numFeatures,))
Furthermore your y is also weird. Usually you have X (number_of samples, num_features) and y with (number of samples, labels).
Use model.summary() to see how your network looks like.
I am using an Encoder Decoder seq2seq architecture in Keras,
I'm passing a one-hot array of shape (num_samples, max_sentence_length, max_words) for training, and using teacher forcing.
#Encoder
latent_dim = 256
encoder_inputs = Input(shape=(None, max_words))
encoder = LSTM(latent_dim, return_state = True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
encoder_states = [state_h, state_c]
#Decoder
decoder_inputs = Input(shape=(None, max_words))
decoder_lstm = LSTM(latent_dim, return_state = True, return_sequences =
True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=
encoder_states)
decoder_dense = Dense(max_words, activation = 'softmax')
decoder_outputs = decoder_dense(decoder_outputs)
For inference model:
# Inference model
encoder_model = Model(encoder_inputs, encoder_states)
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(
decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model(
[decoder_inputs] + decoder_states_inputs,
[decoder_outputs] + decoder_states)
I tried printing out the encoder_model states, but it always returns the same states for any input. Any help would be appreciated!
This is pretty much just from the Keras example, correct?
https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html
Did you train the model? Everything you have posted is the same as in the Keras documentation, so I don't think that could be the problem.
Here is an example that works, which is also based on the Keras documentation and looks pretty similar to what you have. Maybe try running through this and seeing where you are different?
https://github.com/JEddy92/TimeSeries_Seq2Seq/blob/master/notebooks/TS_Seq2Seq_Intro.ipynb