How transformer predict future data with multiple features? - machine-learning
I want to design a time-series predicter to forecast stock price. And I follow the paper to construct a transformer model which I switch decoder to linear. So I met a question how do I know the future features data? If I only use date to training(like prophet) without other features, I think it may be not precisely enough.
My model be like:
class PositionalEncoding(nn.Module):
def __init__(self, d_model, dropout=0.1, max_len=5000):
super(PositionalEncoding, self).__init__()
self.dropout = nn.Dropout(p=dropout)
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0).transpose(0, 1)
self.register_buffer('pe', pe)
def forward(self, x):
'''
x: [seq_len, batch_size, d_model]
'''
x = x + self.pe[:x.size(0), :]
return self.dropout(x)
def get_attn_pad_mask(seq_q, seq_k): # seq_q: [batch_size, seq_len] ,seq_k: [batch_size, seq_len]
batch_size, len_q ,_= seq_q.size() #1*64*5
batch_size, len_k ,_= seq_k.size()
# pad_attn_mask = seq_k.data.eq(0).unsqueeze(1) # 判断 输入那些含有P(=0),用1标记 ,[batch_size, 1, len_k]
pad_attn_mask = torch.ones(batch_size,len_q,len_k) # 判断 输入那些含有P(=0),用1标记 ,[batch_size, 1, len_k]
return pad_attn_mask # 扩展成多维度
class ScaledDotProductAttention(nn.Module):
def __init__(self):
super(ScaledDotProductAttention, self).__init__()
def forward(self, Q, K, V, attn_mask): # Q: [batch_size, n_heads, len_q, d_k]
# K: [batch_size, n_heads, len_k, d_k]
# V: [batch_size, n_heads, len_v(=len_k), d_v]
# attn_mask: [batch_size, n_heads, seq_len, seq_len]
scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size, n_heads, len_q, len_k]
scores.masked_fill_(attn_mask, -1e9) # 如果时停用词P就等于 0
attn = nn.Softmax(dim=-1)(scores)
context = torch.matmul(attn, V) # [batch_size, n_heads, len_q, d_v]
return context, attn
class MultiHeadAttention(nn.Module):
def __init__(self):
super(MultiHeadAttention, self).__init__()
self.W_Q = nn.Linear(d_model, d_k * n_heads, bias=False).cuda()
self.W_K = nn.Linear(d_model, d_k * n_heads, bias=False).cuda()
self.W_V = nn.Linear(d_model, d_v * n_heads, bias=False).cuda()
self.fc = nn.Linear(n_heads * d_v, d_model, bias=False).cuda()
def forward(self, input_Q, input_K, input_V, attn_mask): # input_Q: [batch_size, len_q, d_model]
# input_K: [batch_size, len_k, d_model]
# input_V: [batch_size, len_v(=len_k), d_model]
# attn_mask: [batch_size, seq_len, seq_len]
residual, batch_size = input_Q, input_Q.size(0)
Q = self.W_Q(input_Q).view(batch_size, -1, n_heads, d_k).transpose(1, 2) # Q: [batch_size, n_heads, len_q, d_k]
K = self.W_K(input_K).view(batch_size, -1, n_heads, d_k).transpose(1, 2) # K: [batch_size, n_heads, len_k, d_k]
V = self.W_V(input_V).view(batch_size, -1, n_heads, d_v).transpose(1,2) # V: [batch_size, n_heads, len_v(=len_k), d_v]
attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1).cuda().bool() # attn_mask : [batch_size, n_heads, seq_len, seq_len]
# print('data_type\n', attn_mask.bool())
# pdb.set_trace()
context, attn = ScaledDotProductAttention()(Q, K, V, attn_mask) # context: [batch_size, n_heads, len_q, d_v]
context = context.transpose(1, 2).reshape(batch_size, -1,
n_heads * d_v) # context: [batch_size, len_q, n_heads * d_v]
output = self.fc(context) # [batch_size, len_q, d_model]
return nn.LayerNorm(d_model).cuda()(output + residual), attn
class PoswiseFeedForwardNet(nn.Module):
def __init__(self):
super(PoswiseFeedForwardNet, self).__init__()
self.fc = nn.Sequential(
nn.Linear(d_model, d_ff, bias=False),
nn.ReLU(),
nn.Linear(d_ff, d_model, bias=False))
def forward(self, inputs): # inputs: [batch_size, seq_len, d_model]
residual = inputs
output = self.fc(inputs)
return nn.LayerNorm(d_model).cuda()(output + residual) # [batch_size, seq_len, d_model]
class EncoderLayer(nn.Module):
def __init__(self):
super(EncoderLayer, self).__init__()
self.enc_self_attn = MultiHeadAttention().cuda() # 多头注意力机制
self.pos_ffn = PoswiseFeedForwardNet().cuda() # 前馈神经网络
def forward(self, enc_inputs, enc_self_attn_mask): # enc_inputs: [batch_size, src_len, d_model]
#输入3个enc_inputs分别与W_q、W_k、W_v相乘得到Q、K、V # enc_self_attn_mask: [batch_size, src_len, src_len]
enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, # enc_outputs: [batch_size, src_len, d_model],
enc_self_attn_mask) # attn: [batch_size, n_heads, src_len, src_len]
enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size, src_len, d_model]
return enc_outputs, attn
class Encoder(nn.Module):
def __init__(self):
super(Encoder, self).__init__()
self.src_emb = nn.Linear(15, d_model).cuda()
self.pos_emb = PositionalEncoding(d_model).cuda() # 加入位置信息
self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)]).cuda()
def forward(self, enc_inputs): # enc_inputs: [batch_size, src_len]
enc_outputs = self.src_emb(enc_inputs) # enc_outputs: [batch_size, src_len, d_model]
# print('1',enc_outputs)
enc_outputs = self.pos_emb(enc_outputs) # enc_outputs: [batch_size, src_len, d_model]
enc_self_attn_mask = get_attn_pad_mask(enc_inputs, enc_inputs) # enc_self_attn_mask: [batch_size, src_len, src_len]
enc_self_attns = []
for layer in self.layers:
enc_outputs, enc_self_attn = layer(enc_outputs, enc_self_attn_mask) # enc_outputs : [batch_size, src_len, d_model], # enc_self_attn : [batch_size, n_heads, src_len, src_len]
enc_self_attns.append(enc_self_attn)
return enc_outputs, enc_self_attns
class Transformer(nn.Module):
def __init__(self):
super(Transformer, self).__init__()
self.Encoder = Encoder().cuda()
self.Decoder = nn.Linear(d_model, 512).cuda()
self.projection = nn.Linear(d_model, 1, bias=False).cuda()
self.init_weights()
# self.projection1 = nn.Linear(128, 1, bias=False)
def init_weights(self):
initrange = 0.1
self.Decoder.bias.data.zero_()
self.Decoder.weight.data.uniform_(-initrange, initrange)
def forward(self, enc_inputs): # enc_inputs: [batch_size, src_len] # dec_inputs: [batch_size, tgt_len]
enc_outputs, enc_self_attns = self.Encoder(enc_inputs) # enc_outputs: [batch_size, src_len, d_model],
dec_outputs = self.Decoder(enc_outputs) # enc_self_attns: [n_layers, batch_size, n_heads, src_len, src_len]
dec_logits = self.projection(dec_outputs) # dec_logits: [batch_size, tgt_len, 1]
# dec_logits = self.projection1(dec_logits) # dec_logits: [batch_size, tgt_len, 1]
return dec_logits.view(-1, dec_logits.size(-1)), enc_self_attns
I tied to make some future data by prophet, but I think this method is tricky. I want to know to use history data (up to today) to predict 5 more days result.
Related
Pytorch Batch Size issue when comparing outputs form model and labels
Im having issues with the input and output size being halfed from 16 to 8 when running through my model.I've tried tweaking the input/output size between the maxpool and linear layer, that doesn't work. I was wondering if it has something to do with my loss criterion inputs or if the model should be outputting 16 instead of 8. import torch import torchvision.transforms as transforms from PIL import Image from torch.utils.data import Dataset, DataLoader from os import listdir import os from os.path import isdir from torchsummary import summary # Define the preprocessing steps transform = transforms.Compose([ transforms.Resize((32, 32)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) # Define the custom dataset class VideoDataset(Dataset): def __init__(self, data_dir, transform=None): self.data_dir = data_dir self.transform = transform def __len__(self): return len(self.data_dir) def __getitem__(self, idx): video_dir = self.data_dir[idx] print(video_dir) video = [] for i in range(10): # For example, each video has 10 frames img = Image.open(f"{video_dir}/frame_{i}.jpg") if self.transform: img = self.transform(img) video.append(img) video = torch.stack(video) if(video_dir.find("squat")): label = 1 if(video_dir.find("pull")): label = 0 else: label = 0 # label = str(video_dir.split("/")[-2]) # Assume the class label is included in the video directory name sample = {'video': video, 'label': label} #print(sample) return sample # Load the data path = "videos/squat/" path_pullups = "videos/pull ups/" path_situp = "videos/situp/" data_dir = list() for file in os.scandir(path): if file.is_dir(): data_dir.append(path + file.name) for file in os.scandir(path_pullups): if file.is_dir(): data_dir.append(path_pullups + file.name) for file in os.scandir(path_situp): if file.is_dir(): data_dir.append(path_situp + file.name) print(len(data_dir)/2) # Split the data into training and validation sets train_data = VideoDataset(data_dir[:243], transform=transform) # Use first two classes for training #print("train" + str(train_data.data_dir)) #valid_data = VideoDataset(data_dir[165:], transform=transform) # Use last class for validation # Define the data loaders train_loader = DataLoader(train_data, batch_size=16, shuffle=True) #valid_loader = DataLoader(valid_data, batch_size=16, shuffle=False) # Define the CNN model class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = torch.nn.Conv3d(10, 16, kernel_size=(3, 3, 3), stride=1, padding=1) self.pool = torch.nn.MaxPool3d(kernel_size=(2, 2, 2), stride=2, padding=0) self.fc1 = torch.nn.Linear(16 * 8 * 8 * 8, 32) #16*16*2 self.fc2 = torch.nn.Linear(32, 3) self.fc3 = torch.nn.Linear(3, 1) def forward(self, x): x = self.pool(torch.nn.functional.relu(self.conv1(x))) x = x.view(-1, 16 * 8 * 8 * 8) x = torch.nn.functional.relu(self.fc1(x)) x = self.fc2(x) x = self.fc3(x) x = torch.sigmoid(x) return x # Initialize the model, loss function, and optimizer model = Net() criterion = torch.nn.BCELoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) # Train the model for epoch in range(10): # Train for 10 epochs running_loss = 0.0 for i, data in enumerate(train_loader, 0): inputs, labels = data['video'], data['label'] # .view(-1,1) outputs = model(inputs) #if labels.shape[0] != outputs.shape[0]: # labels = labels.view(-1, outputs.shape[0]).t() summary(model, (10, 3, 32, 32), device='cpu') print("Labels size:" + str(labels.shape)) print("Outputs size:" + str(outputs.shape)) print(outputs, labels) ##################################################################### loss = criterion(outputs, labels) #### error here loss.backward() optimizer.step() running_loss += loss.item() print(f"Epoch {epoch + 1} loss: {running_loss / (i + 1)}") # Evaluate the model # correct = 0 # total = 0 # with torch.no_grad(): # for data in valid_loader: # inputs, labels = data['video'], data['label'] # outputs = model(inputs) # _, predicted = torch.max(outputs.data, 1) # total += labels.size(0) # correct += (predicted == labels).sum().item() # print(f"Accuracy of the model on the validation set: {100 * correct / total}%") Sample inputs are frames from video clips like this: described by their exercise category such as squats, situps, pullups, etc. Desired outputs for this model would be a binary representation of either 1 or 0 that each exercise given is a squat or not as labeled and indicated in the dataset customization function.
PyTorch Siamese Network Oscillating / Fluctuating Loss Function
I have implemented a siamese NN for regression using the resnet18 for transfer learning. The goal is to calculate the correlation coefficient between two images, since we do not have raw data but only images for a specific signal. We want to measure similarity between images. However the loss function of my nn is always oscillating up and down. Code below: Model itself class firstNN(nn.Module): def __init__(self): # wofür ist das da? super(firstNN, self).__init__() self.resnet = models.resnet18(pretrained=True) for param in self.resnet.parameters(): param.requires_grad = False # over-write the first conv layer to be able to read images # as resnet18 reads (3,x,x) where 3 is RGB channels # whereas MNIST has (1,x,x) where 1 is a gray-scale channel self.resnet.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) self.fc_in_features = self.resnet.fc.in_features # remove the last layer of resnet18 (linear layer which is before avgpool layer) self.resnet = torch.nn.Sequential(*(list(self.resnet.children())[:-1])) # add linear layers to compare between the features of the two images self.fc = nn.Sequential( nn.Linear(self.fc_in_features, hidden_dim), torch.nn.ReLU(), nn.Linear(hidden_dim, hidden_dim2), torch.nn.ReLU(), nn.Linear(hidden_dim2,hidden_dim3), torch.nn.ReLU(), nn.Linear(hidden_dim3,1), # nn.ReLU(), # nn.Linear(input_dim, third_dim), ) # Distance function self.binary = False # Get params and register optimizer info, params = self.get_model_params() #self.optimizer = optim.Adam(params, lr=learning_rate, # weight_decay=weight_decay) # self.optimizer = optim.SGD(params, lr=learning_rate, # momentum=0.5) #self.criterion = nn.BCELoss() #self.criterion = nn.MSELoss() LOGGER.info(info) # Initialisiert die weights mit "random" Werten def init_layers(self): nn.init.xavier_normal(self.fc[0].weight.data).to(device) nn.init.xavier_normal(self.fc[2].weight.data).to(device) # Erstellt NN mit dem input, inputs ist unser batch def siamese_basic(self, inputs): output = self.resnet(inputs) output = output.view(output.size()[0], -1) output = self.fc(output) return output def distance_layer(self, vec1, vec2, distance='cos'): if distance == 'cos': similarity = F.cosine_similarity( vec1 + 1e-16, vec2 + 1e-16, dim=-1) elif distance == 'l1': similarity = self.dist_fc(torch.abs(vec1 - vec2)) similarity = similarity.squeeze(1) elif distance == 'l2': similarity = self.dist_fc(torch.abs(vec1 - vec2) ** 2) similarity = similarity.squeeze(1) ic() #if self.binary: # similarity = F.sigmoid(similarity) return similarity def forward(self, template, img): embed1 = self.siamese_basic(template) embed2 = self.siamese_basic(img) # print(f"Before reshape embed2 {embed2.shape}") # print(f"Befor reshape embed1 {embed1.shape}") embed1 = embed1.reshape(template.shape[0],-1).float() embed2 = embed2.reshape(img.shape[0],-1).float() similarity = self.distance_layer(embed1, embed2) # print(f"embed2 {embed2.shape}") # print(f"embed1 {embed1.shape}") # print(f"similarity {similarity.shape}") ic() return similarity#, embed1, embed2 def get_loss(self, outputs, targets): loss = self.criterion(outputs, targets) ic() return loss def get_model_params(self): params = [] total_size = 0 def multiply_iter(p_list): out = 1 for p in p_list: out *= p return out for p in self.parameters(): if p.requires_grad: params.append(p) total_size += multiply_iter(p.size()) return '{}\nparam size: {:,}\n'.format(self, total_size), params def save_checkpoint(self, state, checkpoint_dir, filename): filename = checkpoint_dir + filename LOGGER.info('Save checkpoint %s' % filename) torch.save(state, filename) def load_checkpoint(self, checkpoint_dir, filename): filename = checkpoint_dir + filename LOGGER.info('Load checkpoint %s' % filename) checkpoint = torch.load(filename) self.load_state_dict(checkpoint['state_dict']) self.optimizer.load_state_dict(checkpoint['optimizer']) Choice of criterion etc model = firstNN() criterion = nn.MSELoss() #optimizer = optim.Adam(model.parameters(), lr=learning_rate, # weight_decay=weight_decay) optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum) training_data = CustomImageDataset("") # Create data loaders. train_loader = DataLoader(training_data, batch_size=batch_size, shuffle=True) hidden_dim = 128 hidden_dim2 = 64 hidden_dim3 = 32 do_learn = True save_frequency = 2 batch_size = 40 if torch.cuda.is_available() else 64 learning_rate = 0.0001 num_epochs = 15 weight_decay = 0.1 momentum = 0.9 loss_history = [] r2_history = [] loss_history2 = [] r2_history2 = [] LOGGER = logging.getLogger(__name__) torch.cuda.empty_cache() model = firstNN().to(device) model.train() for epoch in range (num_epochs): running_r2 = 0.0 running_loss = 0.0 for batch_idx, (templates, images, targets) in enumerate(train_loader): templates = templates.unsqueeze(1).float().to(device) images = images.unsqueeze(1).float().to(device) targets = targets.float().to(device) optimizer.zero_grad() outputs = model(templates, images) loss = criterion(outputs, targets) loss.backward() optimizer.step() r2score = torchmetrics.R2Score().to(device) rscore = r2score(outputs, torch.tensor(targets).squeeze()) running_loss += loss.item() running_r2 += rscore.item() loss_history2.append(loss.item()) r2_history2.append(rscore.item()) print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tR2Score: {}'.format( epoch, batch_idx * len(templates), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item(), rscore )) running_loss = running_loss / len(train_loader) running_r2 = running_r2 / len(train_loader) loss_history.append(running_loss) r2_history.append(running_r2) Example of images with spearman correlation of 0.45 Example of Oscillating loss and r2 I have tried using several different learning rates and experimented with weight decay and change of optimizer / nn setup but I dont understant exactly how to combat the issue.
Dimension error in neural network model for classification
Below is the code for Hierarchical Attention Networks, taken from https://github.com/arunarn2/HierarchicalAttentionNetworks. The only difference in the code on the link and mine is that I have 3 classes for classification, whereas they are using 2 maxlen = 100 max_sentences = 15 max_words = 20000 embedding_dim = 100 validation_split = 0.2 #class defining the custom attention layer class HierarchicalAttentionNetwork(Layer): def __init__(self, attention_dim): self.init = initializers.get('normal') self.supports_masking = True self.attention_dim = attention_dim super(HierarchicalAttentionNetwork, self).__init__() def build(self, input_shape): assert len(input_shape) == 3 self.W = K.variable(self.init((input_shape[-1], self.attention_dim))) self.b = K.variable(self.init((self.attention_dim,))) self.u = K.variable(self.init((self.attention_dim, 1))) self.trainable_weightss = [self.W, self.b, self.u] super(HierarchicalAttentionNetwork, self).build(input_shape) def compute_mask(self, inputs, mask=None): return mask def call(self, x, mask=None): # size of x :[batch_size, sel_len, attention_dim] # size of u :[batch_size, attention_dim] # uit = tanh(xW+b) uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b)) ait = K.exp(K.squeeze(K.dot(uit, self.u), -1)) if mask is not None: # Cast the mask to floatX to avoid float64 upcasting ait *= K.cast(mask, K.floatx()) ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx()) weighted_input = x * K.expand_dims(ait) output = K.sum(weighted_input, axis=1) return output def compute_output_shape(self, input_shape): return input_shape[0], input_shape[-1] # building Hierachical Attention network embedding_matrix = np.random.random((len(word_index) + 1, embedding_dim)) for word, i in word_index.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector embedding_layer = Embedding(len(word_index) + 1, embedding_dim, weights=[embedding_matrix], input_length=maxlen, trainable=True, mask_zero=True) sentence_input = Input(shape=(maxlen,), dtype='int32') embedded_sequences = embedding_layer(sentence_input) lstm_word = Bidirectional(GRU(100, return_sequences=True))(embedded_sequences) attn_word = HierarchicalAttentionNetwork(100)(lstm_word) sentenceEncoder = Model(sentence_input, attn_word) review_input = Input(shape=(max_sentences, maxlen), dtype='int32') review_encoder = TimeDistributed(sentenceEncoder)(review_input) lstm_sentence = Bidirectional(GRU(100, return_sequences=True))(review_encoder) attn_sentence = HierarchicalAttentionNetwork(100)(lstm_sentence) preds = Dense(3, activation='softmax')(attn_sentence) model = Model(review_input, preds) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) print("model fitting - Hierachical attention network") Following is the error I get. Please help me understand what the error means and how I can possibly resolve it.
Variational Autoencoder's sampling problem
My vae class looks like this: class Encoder(nn.Module): def __init__(self): super(Encoder, self).__init__() c = capacity self.conv1 = nn.Conv2d(in_channels=1, out_channels=c, kernel_size=4, stride=2, padding=1) # out: c x 14 x 14 self.conv2 = nn.Conv2d(in_channels=c, out_channels=c*2, kernel_size=4, stride=2, padding=1) # out: c x 7 x 7 self.fc_mu = nn.Linear(in_features=c*2*7*7, out_features=latent_dims) self.fc_logvar = nn.Linear(in_features=c*2*7*7, out_features=latent_dims) def forward(self, x): x = F.relu(self.conv1(x)) x = F.relu(self.conv2(x)) x = x.view(x.size(0), -1) # flatten batch of multi-channel feature maps to a batch of feature vectors x_mu = self.fc_mu(x) x_logvar = self.fc_logvar(x) return x_mu, x_logvar class Decoder(nn.Module): def __init__(self): super(Decoder, self).__init__() c = capacity self.fc = nn.Linear(in_features=latent_dims, out_features=c*2*7*7) self.conv2 = nn.ConvTranspose2d(in_channels=c*2, out_channels=c, kernel_size=4, stride=2, padding=1) self.conv1 = nn.ConvTranspose2d(in_channels=c, out_channels=1, kernel_size=4, stride=2, padding=1) def forward(self, x): x = self.fc(x) x = x.view(x.size(0), capacity*2, 7, 7) # unflatten batch of feature vectors to a batch of multi-channel feature maps x = F.relu(self.conv2(x)) x = torch.sigmoid(self.conv1(x)) # last layer before output is sigmoid, since we are using BCE as reconstruction loss return x class VariationalAutoencoder(nn.Module): def __init__(self): super(VariationalAutoencoder, self).__init__() self.encoder = Encoder() self.decoder = Decoder() def forward(self, x): latent_mu, latent_logvar = self.encoder(x) latent = self.latent_sample(latent_mu, latent_logvar) x_recon = self.decoder(latent) return x_recon, latent_mu, latent_logvar def latent_sample(self, mu, logvar): if self.training: # the reparameterization trick std = logvar.mul(0.5).exp_() eps = torch.empty_like(std).normal_() return eps.mul(std).add_(mu) else: return mu def vae_loss(recon_x, x, mu, logvar): # recon_x is the probability of a multivariate Bernoulli distribution p. # -log(p(x)) is then the pixel-wise binary cross-entropy. # Averaging or not averaging the binary cross-entropy over all pixels here # is a subtle detail with big effect on training, since it changes the weight # we need to pick for the other loss term by several orders of magnitude. # Not averaging is the direct implementation of the negative log likelihood, # but averaging makes the weight of the other loss term independent of the image resolution. recon_loss = F.binary_cross_entropy(recon_x.view(-1, 784), x.view(-1, 784), reduction='sum') kldivergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) return recon_loss + variational_beta * kldivergence I train it on MNIST dataset. I want to sample it, or generate an array and give it to the decoder and see what the output will be. The problem is that I don't really understand, what my z array should look like and what shape should it need. Here is the code for sampling: z = ... input = torch.FloatTensor(z).to(device) vae.eval() output = vae.decoder(input) plot_gallery(output.data.cpu().numpy(), 24, 24, n_row=5, n_col=5)
Chainer Autoencoder
I am trying to write a vanilla autoencoder for compressing 13 images. However I am getting the following error: ValueError: train argument is not supported anymore. Use chainer.using_config The shape of images is (21,28,3). filelist = 'ex1.png', 'ex2.png',...11 other images x = np.array([np.array(Image.open(fname)) for fname in filelist]) xs = x.astype('float32')/255. class Autoencoder(Chain): def __init__(self, activation=F.relu): super().__init__() with self.init_scope(): # encoder part self.l1 = L.Linear(1764,800) self.l2 = L.Linear(800,300) # decoder part self.l3 = L.Linear(300,800) self.l4 = L.Linear(800,1764) self.activation = activation def forward(self,x): h = self.encode(x) x_recon = self.decode(h) return x_recon def __call__(self,x): x_recon = self.forward(x) loss = F.mean_squared_error(h, x) return loss def encode(self, x, train=True): h = F.dropout(self.activation(self.l1(x)), train=train) return self.activation(self.l2(x)) def decode(self, h, train=True): h = self.activation(self.l3(h)) return self.l4(x) n_epoch = 5 batch_size = 2 model = Autoencoder() optimizer = optimizers.SGD(lr=0.05).setup(model) train_iter = iterators.SerialIterator(xs,batch_size) valid_iter = iterators.SerialIterator(xs,batch_size) updater = training.StandardUpdater(train_iter,optimizer) trainer = training.Trainer(updater,(n_epoch,"epoch"),out="result") from chainer.training import extensions trainer.extend(extensions.Evaluator(valid_iter, model, device=gpu_id)) trainer.run() Is the issue because of the number of nodes in the model or otherwise?
You need to wirte "decoder" part. When you take mean_squared_error loss, the shape of h and x must be same. AutoEncoder will encode original x to small space (100-dim) h, but after that we need to reconstruct x' from this h by adding decoder part. Then loss can be calculated on this reconstructed x'. For example, as follows (sorry i have not test it to run) For Chainer v2~ train argument is handled by global configs, so you do not need train argument in dropout function. class Autoencoder(Chain): def __init__(self, activation=F.relu): super().__init__() with self.init_scope(): # encoder part self.l1 = L.Linear(1308608,500) self.l2 = L.Linear(500,100) # decoder part self.l3 = L.Linear(100,500) self.l4 = L.Linear(500,1308608) self.activation = activation def forward(self,x): h = self.encode(x) x_recon = self.decode(h) return x_recon def __call__(self,x): x_recon = self.forward(x) loss = F.mean_squared_error(h, x) return loss def encode(self, x): h = F.dropout(self.activation(self.l1(x))) return self.activation(self.l2(x)) def decode(self, h, train=True): h = self.activation(self.l3(h)) return self.l4(x) For Chainer v1 class Autoencoder(Chain): def __init__(self, activation=F.relu): super().__init__() with self.init_scope(): # encoder part self.l1 = L.Linear(1308608,500) self.l2 = L.Linear(500,100) # decoder part self.l3 = L.Linear(100,500) self.l4 = L.Linear(500,1308608) self.activation = activation def forward(self,x): h = self.encode(x) x_recon = self.decode(h) return x_recon def __call__(self,x): x_recon = self.forward(x) loss = F.mean_squared_error(h, x) return loss def encode(self, x, train=True): h = F.dropout(self.activation(self.l1(x)), train=train) return self.activation(self.l2(x)) def decode(self, h, train=True): h = self.activation(self.l3(h)) return self.l4(x) You can also refer official Variational Auto Encoder example for the next step: https://github.com/chainer/chainer/tree/master/examples/vae