Could not get batch from DataLoader - machine-learning

I'm trying to implement a model using lstm to generate sonnects in pytorch. When I tested the dataLoader, It took a lot of time and could not return data as expected. I review it and don't know where error locates. Please help me, here is the code(more detail in(https://paste.ubuntu.com/p/vspS3msNVW/))
(10th cell)
class SonnetDataset(Dataset):
def __init__(self, sonnet_in_ids: list, vocab: list, max_seq_length: int):
super().__init__()
self.data = sonnet_in_ids
self.vocab = vocab
self.vocab_size = len(vocab)
self.pad_id = self.vocab.index('<PAD>')
self.start_id = self.vocab.index('<START>')
self.end_id = self.vocab.index('<END>')
self.max_seq_length = max_seq_length + 2
print('init successfully')
def __len__(self):
return len(self.data)
def __getitem__(self, index) -> torch.LongTensor:
print('get item')
x = self.data[index]
x = [self.start_id] + x + [self.end_id]
# padding
x += [self.pad_id] * (self.max_seq_length - len(x))
x = torch.LongTensor(x)
print(x)
return x
batch_size = 4
train_set = SonnetDataset(sonnet_in_ids=sonnets_in_ids, vocab=vocab, max_seq_length=max_length)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True)
next(iter(train_loader))
Could not get data

Related

Converting generative transformer model from keras to PyTorch

I would like to re-create the following keras model in PyTorch.
vocab_size = 22
maxlen = 200
embed_dim = 256
num_heads = 2
feed_forward_dim = 256
batch_size = 128
decoders = 5
def create_model():
inputs = layers.Input(shape=(maxlen,), dtype=tf.int32)
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
decoder_blocks = []
for i in range(decoders):
decoder_blocks.append(DecoderBlock(embed_dim, num_heads, feed_forward_dim))
for i in range(len(decoder_blocks)):
x = decoder_blocks[i](x)
outputs = layers.Dense(vocab_size)(x)
model = keras.Model(inputs=inputs, outputs=[outputs, x])
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(
optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
loss=[loss_fn, None],
)
return model
model = create_model()
Here are the Decoder and the TokenAndPositionEmbedding layers along with the Causal Attention Mask
def causal_attention_mask(batch_size, n_dest, n_src, dtype):
i = tf.range(n_dest)[:, None]
j = tf.range(n_src)
m = i >= j - n_src + n_dest
mask = tf.cast(m, dtype)
mask = tf.reshape(mask, [1, n_dest, n_src])
mult = tf.concat(
[tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)], 0
)
return tf.tile(mask, mult)
class DecoderBlock(layers.Layer):
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
super(DecoderBlock, self).__init__()
self.att = layers.MultiHeadAttention(num_heads, embed_dim)
self.ffn = keras.Sequential(
[layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
)
self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = layers.Dropout(rate)
self.dropout2 = layers.Dropout(rate)
def call(self, inputs):
input_shape = tf.shape(inputs)
batch_size = input_shape[0]
seq_len = input_shape[1]
causal_mask = causal_attention_mask(batch_size, seq_len, seq_len, tf.bool)
attention_output = self.att(inputs, inputs, attention_mask=causal_mask)
attention_output = self.dropout1(attention_output)
out1 = self.layernorm1(inputs + attention_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output)
return self.layernorm2(out1 + ffn_output)
class TokenAndPositionEmbedding(layers.Layer):
def __init__(self, maxlen, vocab_size, embed_dim):
super(TokenAndPositionEmbedding, self).__init__()
self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
def call(self, x):
maxlen = tf.shape(x)[-1]
positions = tf.range(start=0, limit=maxlen, delta=1)
positions = self.pos_emb(positions)
x = self.token_emb(x)
return x + positions
For reference, this code is copied directly from: https://keras.io/examples/generative/text_generation_with_miniature_gpt/
I have tried to create equivalent architecture in PyTorch using nn.TransformerDecoderLayer. Apologies for not including my own code, but I have been completely unsuccessful.

How to deal with big dataset when using pyG?

I am a beginer learning to using torch_geometric to build my GNN models. I refered the sample of the pyG example of node classification and build my own dataset, however, I tried to use my GPU to run the code and it tells me that it run out of memory, maybe my dataset is too large to allocate the GPU memory? I don't know. I shared an machine of 8 A100 with my classmates. Could you please give me some suggestions, thank you!
from torch_geometric.nn import GATConv,GCNConv
from torch_geometric.data import Dataset,DataLoader,HeteroData,Data
import torch.nn as nn
from torch_geometric.nn import DataParallel
from torch_geometric.loader import DataListLoader
import torch.nn.functional as F
import torch
import pandas as pd
from transformers import BertTokenizer,BertModel
import pickle
import time
from tqdm import tqdm
from numba import jit
import json
from torch.optim import lr_scheduler
import matplotlib.pyplot as plt
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "4,5,6,7"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
plt.grid(True)
plt.grid(color='gray',
linestyle='--',
linewidth=1,
alpha=0.3)
begin = time.time()
punctuation = "!#$%&'\(\)-*+,-./:;<=>?#\\\[\]^_`{|}~():;,。【】·、“”‘’《》\"%……——·"
def dataCleanifier(s):
for i in punctuation:
s.replace(i," ")
s = s.replace(" "," ")
s = s.replace("\n","")
return s
class BertClassifier(nn.Module):
def __init__(self,bertType:str,max_length,tag_size):
super(BertClassifier,self).__init__()
self.bertType = bertType
self.tokenizer = BertTokenizer.from_pretrained(self.bertType)
self.encoder = BertModel.from_pretrained(self.bertType)
self.outputDim = self.encoder.pooler.dense.out_features
self.max_length = max_length
self.tag_size = tag_size
self.dropout = nn.Dropout(0.1)
self.activation = nn.LeakyReLU(0.1)
self.convs = nn.ModuleList(
[nn.Conv2d(1, 512, (k, self.outputDim)) for k in (2,3,4)])
self.fc_cnn = nn.Linear(512 * len((2,3,4)), self.tag_size)
def conv_and_pool(self, x, conv):
x = F.relu(conv(x)).squeeze(3)
x = F.max_pool1d(x, x.size(2)).squeeze(2)
return x
def forward(self,x):
x = self.tokenizer.batch_encode_plus(x,return_tensors="pt",max_length=self.max_length,truncation=True,padding="max_length")
attention = x["attention_mask"]
x = x["input_ids"]
x = x.cuda(2)
x = self.encoder(x,attention_mask=attention.cuda(2))['last_hidden_state'][:]
x = x.unsqueeze(1)
encoded = torch.cat([self.conv_and_pool(x,conv) for conv in self.convs],1)
x = self.fc_cnn(encoded)
x = self.activation(x)
# x = F.softmax(x,dim=1)
return x,encoded
class ContrastiveLoss(nn.Module):
def __init__(self):
super(ContrastiveLoss, self).__init__()
def forward(self,representations,label,y_hat):
n = label.shape[0]
T = 0.5
similarity_matrix = F.cosine_similarity(representations.unsqueeze(1), representations.unsqueeze(0), dim=2)
mask = torch.ones_like(similarity_matrix) * (label.expand(n, n).eq(label.expand(n, n).t()))
mask_no_sim = torch.ones_like(mask) - mask
mask_dui_jiao_0 = torch.ones(n ,n) - torch.eye(n, n )
similarity_matrix = torch.exp(similarity_matrix/T)
similarity_matrix = similarity_matrix*mask_dui_jiao_0
sim = mask*similarity_matrix
no_sim = similarity_matrix - sim
no_sim_sum = torch.sum(no_sim , dim=1)
no_sim_sum_expend = no_sim_sum.repeat(n, 1).T
sim_sum = sim + no_sim_sum_expend
loss = torch.div(sim , sim_sum)
loss = mask_no_sim + loss + torch.eye(n, n )
#接下来就是算一个批次中的loss了
loss = -torch.log(loss) #求-log
loss = torch.sum(torch.sum(loss, dim=1) )/(2*n)+nn.CrossEntropyLoss()(y_hat,label)
return loss
class GAT(nn.Module):
def __init__(self, hidden_channels) -> None:
super().__init__()
self.conv1 = GATConv(data.num_features,hidden_channels)
self.conv2 = GATConv(hidden_channels,9)
self.activation = nn.ReLU()
def forward(self,x,edge_index):
x = self.conv1(x,edge_index)
x = self.activation(x)
# print(x)
# x = F.dropout(x,p=0.2)
x = self.conv2(x,edge_index)
return x
x=None
y=None
edge_index = None
train_mask = None
with open("X.pkl","rb") as f1:
x = pickle.load(f1)
with open("Y.pkl","rb") as f2:
y = pickle.load(f2)
y = y.long()
with open("edge_index.pkl","rb") as f3:
edge_index = pickle.load(f3)
# print(edge_index.shape)
with open("train_mask.pkl","rb") as f4:
train_mask = pickle.load(f4)
data = Data(x=x,y=y,edge_index=edge_index)
data.train_mask = train_mask
model = GAT(hidden_channels=32)
model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
scheduler = lr_scheduler.StepLR(optimizer, 100, 0.8)
criterion = ContrastiveLoss()
def train():
model.train()
optimizer.zero_grad() # Clear gradients.
out = model(data.x,data.edge_index) # Perform a single forward pass.
loss = criterion(data.x[data.train_mask], data.y[data.train_mask],out[data.train_mask]) # Compute the loss solely based on the training nodes.
loss.backward() # Derive gradients.
optimizer.step() # Update parameters based on gradients.
return loss
def test():
model.eval()
out = model(data.x, data.edge_index)
pred = out.argmax(dim=1) # Use the class with highest probability.
test_correct = pred[data.train_mask] == data.y[data.train_mask] # Check against ground-truth labels.
test_acc = int(test_correct.sum()) / int(data.train_mask.sum()) # Derive ratio of correct predictions.
return test_acc
accs = []
for epoch in range(1, 1025):
loss = train()
print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}',end=" ")
acc = test()
print("acc:",acc)
accs.append(acc)
scheduler.step()
plt.plot(range(len(accs)),accs)
print(time.time()-begin)
with open("./accs_gat_GCL.pkl","wb") as f1:
pickle.dump(accs,f1)
plt.savefig("./res_GAT_GCL.png",dpi=600)
I have tried to use DataPararel to use multiple GPU to load my model and dataset but failed.

dropout(): argument 'input' (position 1) must be Tensor, not tuple when using XLNet with HuggingfCE

I get an error saying that the input should be of type Tensor, not tuple. I do not know how to work around this problem, as I am already implementing the return_dict=False method as stated in the migration plan.
My model is as follows:
class XLNetClassifier(torch.nn.Module):
def __init__(self, dropout_rate=0.1):
super(XLNetClassifier, self).__init__()
self.XLNet = XLNetModel.from_pretrained('xlnet-base-cased', return_dict=False)
self.d1 = torch.nn.Dropout(dropout_rate)
self.l1 = torch.nn.Linear(768, 64)
self.bn1 = torch.nn.LayerNorm(64)
self.d2 = torch.nn.Dropout(dropout_rate)
self.l2 = torch.nn.Linear(64, 3)
def forward(self, input_ids, attention_mask):
x = self.XLNet(input_ids=input_ids, attention_masks = attention_mask)
x = self.d1(x)
x = self.l1(x)
x = self.bn1(x)
x = torch.nn.Tanh()(x)
x = self.d2(x)
x = self.l2(x)
return x
The error occurs when calling the dropout.
The XLNetModel returns two output values:
last_hidden_state
mems
That means you get a tuple and not a single tensor as the error message says. Your class definition should therefore be:
from transformers import XLNetModel, XLNetTokenizerFast
import torch
class XLNetClassifier(torch.nn.Module):
def __init__(self, dropout_rate=0.1):
super(XLNetClassifier, self).__init__()
self.XLNet = XLNetModel.from_pretrained('xlnet-base-cased', return_dict=False)
self.d1 = torch.nn.Dropout(dropout_rate)
self.l1 = torch.nn.Linear(768, 64)
self.bn1 = torch.nn.LayerNorm(64)
self.d2 = torch.nn.Dropout(dropout_rate)
self.l2 = torch.nn.Linear(64, 3)
def forward(self, input_ids, attention_mask):
x = self.XLNet(input_ids=input_ids, attention_masks = attention_mask)
x = self.d1(x[0])
x = self.l1(x)
x = self.bn1(x)
x = torch.nn.Tanh()(x)
x = self.d2(x)
x = self.l2(x)
return x
tokenizer = XLNetTokenizerFast.from_pretrained('xlnet-base-cased')
model = XLNetClassifier()
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt", return_token_type_ids=False)
outputs = model(**inputs)
or even better without return_dict=False
class XLNetClassifier(torch.nn.Module):
def __init__(self, dropout_rate=0.1):
super(XLNetClassifier, self).__init__()
self.XLNet = XLNetModel.from_pretrained('xlnet-base-cased')
self.d1 = torch.nn.Dropout(dropout_rate)
self.l1 = torch.nn.Linear(768, 64)
self.bn1 = torch.nn.LayerNorm(64)
self.d2 = torch.nn.Dropout(dropout_rate)
self.l2 = torch.nn.Linear(64, 3)
def forward(self, input_ids, attention_mask):
x = self.XLNet(input_ids=input_ids, attention_masks = attention_mask)
x = self.d1(x.last_hidden_state)
x = self.l1(x)
x = self.bn1(x)
x = torch.nn.Tanh()(x)
x = self.d2(x)
x = self.l2(x)
return x

Chainer Autoencoder

I am trying to write a vanilla autoencoder for compressing 13 images. However I am getting the following error:
ValueError: train argument is not supported anymore. Use chainer.using_config
The shape of images is (21,28,3).
filelist = 'ex1.png', 'ex2.png',...11 other images
x = np.array([np.array(Image.open(fname)) for fname in filelist])
xs = x.astype('float32')/255.
class Autoencoder(Chain):
def __init__(self, activation=F.relu):
super().__init__()
with self.init_scope():
# encoder part
self.l1 = L.Linear(1764,800)
self.l2 = L.Linear(800,300)
# decoder part
self.l3 = L.Linear(300,800)
self.l4 = L.Linear(800,1764)
self.activation = activation
def forward(self,x):
h = self.encode(x)
x_recon = self.decode(h)
return x_recon
def __call__(self,x):
x_recon = self.forward(x)
loss = F.mean_squared_error(h, x)
return loss
def encode(self, x, train=True):
h = F.dropout(self.activation(self.l1(x)), train=train)
return self.activation(self.l2(x))
def decode(self, h, train=True):
h = self.activation(self.l3(h))
return self.l4(x)
n_epoch = 5
batch_size = 2
model = Autoencoder()
optimizer = optimizers.SGD(lr=0.05).setup(model)
train_iter = iterators.SerialIterator(xs,batch_size)
valid_iter = iterators.SerialIterator(xs,batch_size)
updater = training.StandardUpdater(train_iter,optimizer)
trainer = training.Trainer(updater,(n_epoch,"epoch"),out="result")
from chainer.training import extensions
trainer.extend(extensions.Evaluator(valid_iter, model, device=gpu_id))
trainer.run()
Is the issue because of the number of nodes in the model or otherwise?
You need to wirte "decoder" part.
When you take mean_squared_error loss, the shape of h and x must be same.
AutoEncoder will encode original x to small space (100-dim) h, but after that we need to reconstruct x' from this h by adding decoder part.
Then loss can be calculated on this reconstructed x'.
For example, as follows (sorry i have not test it to run)
For Chainer v2~
train argument is handled by global configs, so you do not need train argument in dropout function.
class Autoencoder(Chain):
def __init__(self, activation=F.relu):
super().__init__()
with self.init_scope():
# encoder part
self.l1 = L.Linear(1308608,500)
self.l2 = L.Linear(500,100)
# decoder part
self.l3 = L.Linear(100,500)
self.l4 = L.Linear(500,1308608)
self.activation = activation
def forward(self,x):
h = self.encode(x)
x_recon = self.decode(h)
return x_recon
def __call__(self,x):
x_recon = self.forward(x)
loss = F.mean_squared_error(h, x)
return loss
def encode(self, x):
h = F.dropout(self.activation(self.l1(x)))
return self.activation(self.l2(x))
def decode(self, h, train=True):
h = self.activation(self.l3(h))
return self.l4(x)
For Chainer v1
class Autoencoder(Chain):
def __init__(self, activation=F.relu):
super().__init__()
with self.init_scope():
# encoder part
self.l1 = L.Linear(1308608,500)
self.l2 = L.Linear(500,100)
# decoder part
self.l3 = L.Linear(100,500)
self.l4 = L.Linear(500,1308608)
self.activation = activation
def forward(self,x):
h = self.encode(x)
x_recon = self.decode(h)
return x_recon
def __call__(self,x):
x_recon = self.forward(x)
loss = F.mean_squared_error(h, x)
return loss
def encode(self, x, train=True):
h = F.dropout(self.activation(self.l1(x)), train=train)
return self.activation(self.l2(x))
def decode(self, h, train=True):
h = self.activation(self.l3(h))
return self.l4(x)
You can also refer official Variational Auto Encoder example for the next step:
https://github.com/chainer/chainer/tree/master/examples/vae

Building layer wise model for Encoding-Decoding Image

I am writing an autoencoder model for an image encoding-decoding problem.
I want to understand the node distribution in each layer of the model suitable for images.
For the below code I am using 10 images of shape (21*28*3).
class Autoencoder(Chain):
def __init__(self, activation=F.relu):
super().__init__()
with self.init_scope():
# encoder part
self.l1 = L.Linear(1764,882)
self.l2 = L.Linear(882,441)
# decoder part
self.l3 = L.Linear(441,882)
self.l4 = L.Linear(882,1764)
self.activation = activation
def forward(self,x):
h = self.encode(x)
x_recon = self.decode(h)
return x_recon
def __call__(self,x):
x_recon = self.forward(x)
loss = F.mean_squared_error(h, x)
return loss
def encode(self, x):
h = F.dropout(self.activation(self.l1(x)))
return self.activation(self.l2(x))
def decode(self, h, train=True):
h = self.activation(self.l3(h))
return self.l4(x)
gpu_id = 0
n_epoch = 5
batch_size = 2
model = Autoencoder()
optimizer = optimizers.SGD(lr=0.05).setup(model)
train_iter = iterators.SerialIterator(xs,batch_size)
valid_iter = iterators.SerialIterator(xs,batch_size)
updater = training.StandardUpdater(train_iter,optimizer)
trainer = training.Trainer(updater,(n_epoch,"epoch"),out="result")
from chainer.training import extensions
trainer.extend(extensions.Evaluator(valid_iter, model, device=gpu_id))
While running trainer.run():
InvalidType:
Invalid operation is performed in: LinearFunction (Forward)
Expect: x.shape[1] == W.shape[1]
Actual: 1764 != 882
I want to understand how node distribution works layer wise in a model. Please suggest any resource. Also how to assign nodes in layers in case of small number of training images.

Resources