How transformer predict future data with multiple features? - machine-learning

I want to design a time-series predicter to forecast stock price. And I follow the paper to construct a transformer model which I switch decoder to linear. So I met a question how do I know the future features data? If I only use date to training(like prophet) without other features, I think it may be not precisely enough.
My model be like:
class PositionalEncoding(nn.Module):
def __init__(self, d_model, dropout=0.1, max_len=5000):
super(PositionalEncoding, self).__init__()
self.dropout = nn.Dropout(p=dropout)
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0).transpose(0, 1)
self.register_buffer('pe', pe)
def forward(self, x):
'''
x: [seq_len, batch_size, d_model]
'''
x = x + self.pe[:x.size(0), :]
return self.dropout(x)
def get_attn_pad_mask(seq_q, seq_k): # seq_q: [batch_size, seq_len] ,seq_k: [batch_size, seq_len]
batch_size, len_q ,_= seq_q.size() #1*64*5
batch_size, len_k ,_= seq_k.size()
# pad_attn_mask = seq_k.data.eq(0).unsqueeze(1) # 判断 输入那些含有P(=0),用1标记 ,[batch_size, 1, len_k]
pad_attn_mask = torch.ones(batch_size,len_q,len_k) # 判断 输入那些含有P(=0),用1标记 ,[batch_size, 1, len_k]
return pad_attn_mask # 扩展成多维度
class ScaledDotProductAttention(nn.Module):
def __init__(self):
super(ScaledDotProductAttention, self).__init__()
def forward(self, Q, K, V, attn_mask): # Q: [batch_size, n_heads, len_q, d_k]
# K: [batch_size, n_heads, len_k, d_k]
# V: [batch_size, n_heads, len_v(=len_k), d_v]
# attn_mask: [batch_size, n_heads, seq_len, seq_len]
scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(d_k) # scores : [batch_size, n_heads, len_q, len_k]
scores.masked_fill_(attn_mask, -1e9) # 如果时停用词P就等于 0
attn = nn.Softmax(dim=-1)(scores)
context = torch.matmul(attn, V) # [batch_size, n_heads, len_q, d_v]
return context, attn
class MultiHeadAttention(nn.Module):
def __init__(self):
super(MultiHeadAttention, self).__init__()
self.W_Q = nn.Linear(d_model, d_k * n_heads, bias=False).cuda()
self.W_K = nn.Linear(d_model, d_k * n_heads, bias=False).cuda()
self.W_V = nn.Linear(d_model, d_v * n_heads, bias=False).cuda()
self.fc = nn.Linear(n_heads * d_v, d_model, bias=False).cuda()
def forward(self, input_Q, input_K, input_V, attn_mask): # input_Q: [batch_size, len_q, d_model]
# input_K: [batch_size, len_k, d_model]
# input_V: [batch_size, len_v(=len_k), d_model]
# attn_mask: [batch_size, seq_len, seq_len]
residual, batch_size = input_Q, input_Q.size(0)
Q = self.W_Q(input_Q).view(batch_size, -1, n_heads, d_k).transpose(1, 2) # Q: [batch_size, n_heads, len_q, d_k]
K = self.W_K(input_K).view(batch_size, -1, n_heads, d_k).transpose(1, 2) # K: [batch_size, n_heads, len_k, d_k]
V = self.W_V(input_V).view(batch_size, -1, n_heads, d_v).transpose(1,2) # V: [batch_size, n_heads, len_v(=len_k), d_v]
attn_mask = attn_mask.unsqueeze(1).repeat(1, n_heads, 1, 1).cuda().bool() # attn_mask : [batch_size, n_heads, seq_len, seq_len]
# print('data_type\n', attn_mask.bool())
# pdb.set_trace()
context, attn = ScaledDotProductAttention()(Q, K, V, attn_mask) # context: [batch_size, n_heads, len_q, d_v]
context = context.transpose(1, 2).reshape(batch_size, -1,
n_heads * d_v) # context: [batch_size, len_q, n_heads * d_v]
output = self.fc(context) # [batch_size, len_q, d_model]
return nn.LayerNorm(d_model).cuda()(output + residual), attn
class PoswiseFeedForwardNet(nn.Module):
def __init__(self):
super(PoswiseFeedForwardNet, self).__init__()
self.fc = nn.Sequential(
nn.Linear(d_model, d_ff, bias=False),
nn.ReLU(),
nn.Linear(d_ff, d_model, bias=False))
def forward(self, inputs): # inputs: [batch_size, seq_len, d_model]
residual = inputs
output = self.fc(inputs)
return nn.LayerNorm(d_model).cuda()(output + residual) # [batch_size, seq_len, d_model]
class EncoderLayer(nn.Module):
def __init__(self):
super(EncoderLayer, self).__init__()
self.enc_self_attn = MultiHeadAttention().cuda() # 多头注意力机制
self.pos_ffn = PoswiseFeedForwardNet().cuda() # 前馈神经网络
def forward(self, enc_inputs, enc_self_attn_mask): # enc_inputs: [batch_size, src_len, d_model]
#输入3个enc_inputs分别与W_q、W_k、W_v相乘得到Q、K、V # enc_self_attn_mask: [batch_size, src_len, src_len]
enc_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs, # enc_outputs: [batch_size, src_len, d_model],
enc_self_attn_mask) # attn: [batch_size, n_heads, src_len, src_len]
enc_outputs = self.pos_ffn(enc_outputs) # enc_outputs: [batch_size, src_len, d_model]
return enc_outputs, attn
class Encoder(nn.Module):
def __init__(self):
super(Encoder, self).__init__()
self.src_emb = nn.Linear(15, d_model).cuda()
self.pos_emb = PositionalEncoding(d_model).cuda() # 加入位置信息
self.layers = nn.ModuleList([EncoderLayer() for _ in range(n_layers)]).cuda()
def forward(self, enc_inputs): # enc_inputs: [batch_size, src_len]
enc_outputs = self.src_emb(enc_inputs) # enc_outputs: [batch_size, src_len, d_model]
# print('1',enc_outputs)
enc_outputs = self.pos_emb(enc_outputs) # enc_outputs: [batch_size, src_len, d_model]
enc_self_attn_mask = get_attn_pad_mask(enc_inputs, enc_inputs) # enc_self_attn_mask: [batch_size, src_len, src_len]
enc_self_attns = []
for layer in self.layers:
enc_outputs, enc_self_attn = layer(enc_outputs, enc_self_attn_mask) # enc_outputs : [batch_size, src_len, d_model], # enc_self_attn : [batch_size, n_heads, src_len, src_len]
enc_self_attns.append(enc_self_attn)
return enc_outputs, enc_self_attns
class Transformer(nn.Module):
def __init__(self):
super(Transformer, self).__init__()
self.Encoder = Encoder().cuda()
self.Decoder = nn.Linear(d_model, 512).cuda()
self.projection = nn.Linear(d_model, 1, bias=False).cuda()
self.init_weights()
# self.projection1 = nn.Linear(128, 1, bias=False)
def init_weights(self):
initrange = 0.1
self.Decoder.bias.data.zero_()
self.Decoder.weight.data.uniform_(-initrange, initrange)
def forward(self, enc_inputs): # enc_inputs: [batch_size, src_len] # dec_inputs: [batch_size, tgt_len]
enc_outputs, enc_self_attns = self.Encoder(enc_inputs) # enc_outputs: [batch_size, src_len, d_model],
dec_outputs = self.Decoder(enc_outputs) # enc_self_attns: [n_layers, batch_size, n_heads, src_len, src_len]
dec_logits = self.projection(dec_outputs) # dec_logits: [batch_size, tgt_len, 1]
# dec_logits = self.projection1(dec_logits) # dec_logits: [batch_size, tgt_len, 1]
return dec_logits.view(-1, dec_logits.size(-1)), enc_self_attns
I tied to make some future data by prophet, but I think this method is tricky. I want to know to use history data (up to today) to predict 5 more days result.

Related

Pytorch Batch Size issue when comparing outputs form model and labels

Im having issues with the input and output size being halfed from 16 to 8 when running through my model.I've tried tweaking the input/output size between the maxpool and linear layer, that doesn't work. I was wondering if it has something to do with my loss criterion inputs or if the model should be outputting 16 instead of 8.
import torch
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from os import listdir
import os
from os.path import isdir
from torchsummary import summary
# Define the preprocessing steps
transform = transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# Define the custom dataset
class VideoDataset(Dataset):
def __init__(self, data_dir, transform=None):
self.data_dir = data_dir
self.transform = transform
def __len__(self):
return len(self.data_dir)
def __getitem__(self, idx):
video_dir = self.data_dir[idx]
print(video_dir)
video = []
for i in range(10): # For example, each video has 10 frames
img = Image.open(f"{video_dir}/frame_{i}.jpg")
if self.transform:
img = self.transform(img)
video.append(img)
video = torch.stack(video)
if(video_dir.find("squat")):
label = 1
if(video_dir.find("pull")):
label = 0
else:
label = 0
# label = str(video_dir.split("/")[-2]) # Assume the class label is included in the video directory name
sample = {'video': video, 'label': label}
#print(sample)
return sample
# Load the data
path = "videos/squat/"
path_pullups = "videos/pull ups/"
path_situp = "videos/situp/"
data_dir = list()
for file in os.scandir(path):
if file.is_dir():
data_dir.append(path + file.name)
for file in os.scandir(path_pullups):
if file.is_dir():
data_dir.append(path_pullups + file.name)
for file in os.scandir(path_situp):
if file.is_dir():
data_dir.append(path_situp + file.name)
print(len(data_dir)/2)
# Split the data into training and validation sets
train_data = VideoDataset(data_dir[:243], transform=transform) # Use first two classes for training
#print("train" + str(train_data.data_dir))
#valid_data = VideoDataset(data_dir[165:], transform=transform) # Use last class for validation
# Define the data loaders
train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
#valid_loader = DataLoader(valid_data, batch_size=16, shuffle=False)
# Define the CNN model
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = torch.nn.Conv3d(10, 16, kernel_size=(3, 3, 3), stride=1, padding=1)
self.pool = torch.nn.MaxPool3d(kernel_size=(2, 2, 2), stride=2, padding=0)
self.fc1 = torch.nn.Linear(16 * 8 * 8 * 8, 32) #16*16*2
self.fc2 = torch.nn.Linear(32, 3)
self.fc3 = torch.nn.Linear(3, 1)
def forward(self, x):
x = self.pool(torch.nn.functional.relu(self.conv1(x)))
x = x.view(-1, 16 * 8 * 8 * 8)
x = torch.nn.functional.relu(self.fc1(x))
x = self.fc2(x)
x = self.fc3(x)
x = torch.sigmoid(x)
return x
# Initialize the model, loss function, and optimizer
model = Net()
criterion = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# Train the model
for epoch in range(10): # Train for 10 epochs
running_loss = 0.0
for i, data in enumerate(train_loader, 0):
inputs, labels = data['video'], data['label']
# .view(-1,1)
outputs = model(inputs)
#if labels.shape[0] != outputs.shape[0]:
# labels = labels.view(-1, outputs.shape[0]).t()
summary(model, (10, 3, 32, 32), device='cpu')
print("Labels size:" + str(labels.shape))
print("Outputs size:" + str(outputs.shape))
print(outputs, labels)
#####################################################################
loss = criterion(outputs, labels) #### error here
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch + 1} loss: {running_loss / (i + 1)}")
# Evaluate the model
# correct = 0
# total = 0
# with torch.no_grad():
# for data in valid_loader:
# inputs, labels = data['video'], data['label']
# outputs = model(inputs)
# _, predicted = torch.max(outputs.data, 1)
# total += labels.size(0)
# correct += (predicted == labels).sum().item()
# print(f"Accuracy of the model on the validation set: {100 * correct / total}%")
Sample inputs are frames from video clips like this:
described by their exercise category such as squats, situps, pullups, etc.
Desired outputs for this model would be a binary representation of either 1 or 0 that each exercise given is a squat or not as labeled and indicated in the dataset customization function.

PyTorch Siamese Network Oscillating / Fluctuating Loss Function

I have implemented a siamese NN for regression using the resnet18 for transfer learning. The goal is to calculate the correlation coefficient between two images, since we do not have raw data but only images for a specific signal. We want to measure similarity between images. However the loss function of my nn is always oscillating up and down.
Code below:
Model itself
class firstNN(nn.Module):
def __init__(self):
# wofür ist das da?
super(firstNN, self).__init__()
self.resnet = models.resnet18(pretrained=True)
for param in self.resnet.parameters():
param.requires_grad = False
# over-write the first conv layer to be able to read images
# as resnet18 reads (3,x,x) where 3 is RGB channels
# whereas MNIST has (1,x,x) where 1 is a gray-scale channel
self.resnet.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
self.fc_in_features = self.resnet.fc.in_features
# remove the last layer of resnet18 (linear layer which is before avgpool layer)
self.resnet = torch.nn.Sequential(*(list(self.resnet.children())[:-1]))
# add linear layers to compare between the features of the two images
self.fc = nn.Sequential(
nn.Linear(self.fc_in_features, hidden_dim),
torch.nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim2),
torch.nn.ReLU(),
nn.Linear(hidden_dim2,hidden_dim3),
torch.nn.ReLU(),
nn.Linear(hidden_dim3,1),
# nn.ReLU(),
# nn.Linear(input_dim, third_dim),
)
# Distance function
self.binary = False
# Get params and register optimizer
info, params = self.get_model_params()
#self.optimizer = optim.Adam(params, lr=learning_rate,
# weight_decay=weight_decay)
# self.optimizer = optim.SGD(params, lr=learning_rate,
# momentum=0.5)
#self.criterion = nn.BCELoss()
#self.criterion = nn.MSELoss()
LOGGER.info(info)
# Initialisiert die weights mit "random" Werten
def init_layers(self):
nn.init.xavier_normal(self.fc[0].weight.data).to(device)
nn.init.xavier_normal(self.fc[2].weight.data).to(device)
# Erstellt NN mit dem input, inputs ist unser batch
def siamese_basic(self, inputs):
output = self.resnet(inputs)
output = output.view(output.size()[0], -1)
output = self.fc(output)
return output
def distance_layer(self, vec1, vec2, distance='cos'):
if distance == 'cos':
similarity = F.cosine_similarity(
vec1 + 1e-16, vec2 + 1e-16, dim=-1)
elif distance == 'l1':
similarity = self.dist_fc(torch.abs(vec1 - vec2))
similarity = similarity.squeeze(1)
elif distance == 'l2':
similarity = self.dist_fc(torch.abs(vec1 - vec2) ** 2)
similarity = similarity.squeeze(1)
ic()
#if self.binary:
# similarity = F.sigmoid(similarity)
return similarity
def forward(self, template, img):
embed1 = self.siamese_basic(template)
embed2 = self.siamese_basic(img)
# print(f"Before reshape embed2 {embed2.shape}")
# print(f"Befor reshape embed1 {embed1.shape}")
embed1 = embed1.reshape(template.shape[0],-1).float()
embed2 = embed2.reshape(img.shape[0],-1).float()
similarity = self.distance_layer(embed1, embed2)
# print(f"embed2 {embed2.shape}")
# print(f"embed1 {embed1.shape}")
# print(f"similarity {similarity.shape}")
ic()
return similarity#, embed1, embed2
def get_loss(self, outputs, targets):
loss = self.criterion(outputs, targets)
ic()
return loss
def get_model_params(self):
params = []
total_size = 0
def multiply_iter(p_list):
out = 1
for p in p_list:
out *= p
return out
for p in self.parameters():
if p.requires_grad:
params.append(p)
total_size += multiply_iter(p.size())
return '{}\nparam size: {:,}\n'.format(self, total_size), params
def save_checkpoint(self, state, checkpoint_dir, filename):
filename = checkpoint_dir + filename
LOGGER.info('Save checkpoint %s' % filename)
torch.save(state, filename)
def load_checkpoint(self, checkpoint_dir, filename):
filename = checkpoint_dir + filename
LOGGER.info('Load checkpoint %s' % filename)
checkpoint = torch.load(filename)
self.load_state_dict(checkpoint['state_dict'])
self.optimizer.load_state_dict(checkpoint['optimizer'])
Choice of criterion etc
model = firstNN()
criterion = nn.MSELoss()
#optimizer = optim.Adam(model.parameters(), lr=learning_rate,
# weight_decay=weight_decay)
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
training_data = CustomImageDataset("")
# Create data loaders.
train_loader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
hidden_dim = 128
hidden_dim2 = 64
hidden_dim3 = 32
do_learn = True
save_frequency = 2
batch_size = 40 if torch.cuda.is_available() else 64
learning_rate = 0.0001
num_epochs = 15
weight_decay = 0.1
momentum = 0.9
loss_history = []
r2_history = []
loss_history2 = []
r2_history2 = []
LOGGER = logging.getLogger(__name__)
torch.cuda.empty_cache()
model = firstNN().to(device)
model.train()
for epoch in range (num_epochs):
running_r2 = 0.0
running_loss = 0.0
for batch_idx, (templates, images, targets) in enumerate(train_loader):
templates = templates.unsqueeze(1).float().to(device)
images = images.unsqueeze(1).float().to(device)
targets = targets.float().to(device)
optimizer.zero_grad()
outputs = model(templates, images)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
r2score = torchmetrics.R2Score().to(device)
rscore = r2score(outputs, torch.tensor(targets).squeeze())
running_loss += loss.item()
running_r2 += rscore.item()
loss_history2.append(loss.item())
r2_history2.append(rscore.item())
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tR2Score: {}'.format(
epoch, batch_idx * len(templates), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item(), rscore ))
running_loss = running_loss / len(train_loader)
running_r2 = running_r2 / len(train_loader)
loss_history.append(running_loss)
r2_history.append(running_r2)
Example of images with spearman correlation of 0.45
Example of Oscillating loss and r2
I have tried using several different learning rates and experimented with weight decay and change of optimizer / nn setup but I dont understant exactly how to combat the issue.

Dimension error in neural network model for classification

Below is the code for Hierarchical Attention Networks, taken from https://github.com/arunarn2/HierarchicalAttentionNetworks. The only difference in the code on the link and mine is that I have 3 classes for classification, whereas they are using 2
maxlen = 100
max_sentences = 15
max_words = 20000
embedding_dim = 100
validation_split = 0.2
#class defining the custom attention layer
class HierarchicalAttentionNetwork(Layer):
def __init__(self, attention_dim):
self.init = initializers.get('normal')
self.supports_masking = True
self.attention_dim = attention_dim
super(HierarchicalAttentionNetwork, self).__init__()
def build(self, input_shape):
assert len(input_shape) == 3
self.W = K.variable(self.init((input_shape[-1], self.attention_dim)))
self.b = K.variable(self.init((self.attention_dim,)))
self.u = K.variable(self.init((self.attention_dim, 1)))
self.trainable_weightss = [self.W, self.b, self.u]
super(HierarchicalAttentionNetwork, self).build(input_shape)
def compute_mask(self, inputs, mask=None):
return mask
def call(self, x, mask=None):
# size of x :[batch_size, sel_len, attention_dim]
# size of u :[batch_size, attention_dim]
# uit = tanh(xW+b)
uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
ait = K.exp(K.squeeze(K.dot(uit, self.u), -1))
if mask is not None:
# Cast the mask to floatX to avoid float64 upcasting
ait *= K.cast(mask, K.floatx())
ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
weighted_input = x * K.expand_dims(ait)
output = K.sum(weighted_input, axis=1)
return output
def compute_output_shape(self, input_shape):
return input_shape[0], input_shape[-1]
# building Hierachical Attention network
embedding_matrix = np.random.random((len(word_index) + 1, embedding_dim))
for word, i in word_index.items():
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
# words not found in embedding index will be all-zeros.
embedding_matrix[i] = embedding_vector
embedding_layer = Embedding(len(word_index) + 1, embedding_dim, weights=[embedding_matrix],
input_length=maxlen, trainable=True, mask_zero=True)
sentence_input = Input(shape=(maxlen,), dtype='int32')
embedded_sequences = embedding_layer(sentence_input)
lstm_word = Bidirectional(GRU(100, return_sequences=True))(embedded_sequences)
attn_word = HierarchicalAttentionNetwork(100)(lstm_word)
sentenceEncoder = Model(sentence_input, attn_word)
review_input = Input(shape=(max_sentences, maxlen), dtype='int32')
review_encoder = TimeDistributed(sentenceEncoder)(review_input)
lstm_sentence = Bidirectional(GRU(100, return_sequences=True))(review_encoder)
attn_sentence = HierarchicalAttentionNetwork(100)(lstm_sentence)
preds = Dense(3, activation='softmax')(attn_sentence)
model = Model(review_input, preds)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
print("model fitting - Hierachical attention network")
Following is the error I get. Please help me understand what the error means and how I can possibly resolve it.

Variational Autoencoder's sampling problem

My vae class looks like this:
class Encoder(nn.Module):
def __init__(self):
super(Encoder, self).__init__()
c = capacity
self.conv1 = nn.Conv2d(in_channels=1, out_channels=c, kernel_size=4, stride=2, padding=1) # out: c x 14 x 14
self.conv2 = nn.Conv2d(in_channels=c, out_channels=c*2, kernel_size=4, stride=2, padding=1) # out: c x 7 x 7
self.fc_mu = nn.Linear(in_features=c*2*7*7, out_features=latent_dims)
self.fc_logvar = nn.Linear(in_features=c*2*7*7, out_features=latent_dims)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = x.view(x.size(0), -1) # flatten batch of multi-channel feature maps to a batch of feature vectors
x_mu = self.fc_mu(x)
x_logvar = self.fc_logvar(x)
return x_mu, x_logvar
class Decoder(nn.Module):
def __init__(self):
super(Decoder, self).__init__()
c = capacity
self.fc = nn.Linear(in_features=latent_dims, out_features=c*2*7*7)
self.conv2 = nn.ConvTranspose2d(in_channels=c*2, out_channels=c, kernel_size=4, stride=2, padding=1)
self.conv1 = nn.ConvTranspose2d(in_channels=c, out_channels=1, kernel_size=4, stride=2, padding=1)
def forward(self, x):
x = self.fc(x)
x = x.view(x.size(0), capacity*2, 7, 7) # unflatten batch of feature vectors to a batch of multi-channel feature maps
x = F.relu(self.conv2(x))
x = torch.sigmoid(self.conv1(x)) # last layer before output is sigmoid, since we are using BCE as reconstruction loss
return x
class VariationalAutoencoder(nn.Module):
def __init__(self):
super(VariationalAutoencoder, self).__init__()
self.encoder = Encoder()
self.decoder = Decoder()
def forward(self, x):
latent_mu, latent_logvar = self.encoder(x)
latent = self.latent_sample(latent_mu, latent_logvar)
x_recon = self.decoder(latent)
return x_recon, latent_mu, latent_logvar
def latent_sample(self, mu, logvar):
if self.training:
# the reparameterization trick
std = logvar.mul(0.5).exp_()
eps = torch.empty_like(std).normal_()
return eps.mul(std).add_(mu)
else:
return mu
def vae_loss(recon_x, x, mu, logvar):
# recon_x is the probability of a multivariate Bernoulli distribution p.
# -log(p(x)) is then the pixel-wise binary cross-entropy.
# Averaging or not averaging the binary cross-entropy over all pixels here
# is a subtle detail with big effect on training, since it changes the weight
# we need to pick for the other loss term by several orders of magnitude.
# Not averaging is the direct implementation of the negative log likelihood,
# but averaging makes the weight of the other loss term independent of the image resolution.
recon_loss = F.binary_cross_entropy(recon_x.view(-1, 784), x.view(-1, 784), reduction='sum')
kldivergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
return recon_loss + variational_beta * kldivergence
I train it on MNIST dataset.
I want to sample it, or generate an array and give it to the decoder and see what the output will be.
The problem is that I don't really understand, what my z array should look like and what shape should it need.
Here is the code for sampling:
z = ...
input = torch.FloatTensor(z).to(device)
vae.eval()
output = vae.decoder(input)
plot_gallery(output.data.cpu().numpy(), 24, 24, n_row=5, n_col=5)

Chainer Autoencoder

I am trying to write a vanilla autoencoder for compressing 13 images. However I am getting the following error:
ValueError: train argument is not supported anymore. Use chainer.using_config
The shape of images is (21,28,3).
filelist = 'ex1.png', 'ex2.png',...11 other images
x = np.array([np.array(Image.open(fname)) for fname in filelist])
xs = x.astype('float32')/255.
class Autoencoder(Chain):
def __init__(self, activation=F.relu):
super().__init__()
with self.init_scope():
# encoder part
self.l1 = L.Linear(1764,800)
self.l2 = L.Linear(800,300)
# decoder part
self.l3 = L.Linear(300,800)
self.l4 = L.Linear(800,1764)
self.activation = activation
def forward(self,x):
h = self.encode(x)
x_recon = self.decode(h)
return x_recon
def __call__(self,x):
x_recon = self.forward(x)
loss = F.mean_squared_error(h, x)
return loss
def encode(self, x, train=True):
h = F.dropout(self.activation(self.l1(x)), train=train)
return self.activation(self.l2(x))
def decode(self, h, train=True):
h = self.activation(self.l3(h))
return self.l4(x)
n_epoch = 5
batch_size = 2
model = Autoencoder()
optimizer = optimizers.SGD(lr=0.05).setup(model)
train_iter = iterators.SerialIterator(xs,batch_size)
valid_iter = iterators.SerialIterator(xs,batch_size)
updater = training.StandardUpdater(train_iter,optimizer)
trainer = training.Trainer(updater,(n_epoch,"epoch"),out="result")
from chainer.training import extensions
trainer.extend(extensions.Evaluator(valid_iter, model, device=gpu_id))
trainer.run()
Is the issue because of the number of nodes in the model or otherwise?
You need to wirte "decoder" part.
When you take mean_squared_error loss, the shape of h and x must be same.
AutoEncoder will encode original x to small space (100-dim) h, but after that we need to reconstruct x' from this h by adding decoder part.
Then loss can be calculated on this reconstructed x'.
For example, as follows (sorry i have not test it to run)
For Chainer v2~
train argument is handled by global configs, so you do not need train argument in dropout function.
class Autoencoder(Chain):
def __init__(self, activation=F.relu):
super().__init__()
with self.init_scope():
# encoder part
self.l1 = L.Linear(1308608,500)
self.l2 = L.Linear(500,100)
# decoder part
self.l3 = L.Linear(100,500)
self.l4 = L.Linear(500,1308608)
self.activation = activation
def forward(self,x):
h = self.encode(x)
x_recon = self.decode(h)
return x_recon
def __call__(self,x):
x_recon = self.forward(x)
loss = F.mean_squared_error(h, x)
return loss
def encode(self, x):
h = F.dropout(self.activation(self.l1(x)))
return self.activation(self.l2(x))
def decode(self, h, train=True):
h = self.activation(self.l3(h))
return self.l4(x)
For Chainer v1
class Autoencoder(Chain):
def __init__(self, activation=F.relu):
super().__init__()
with self.init_scope():
# encoder part
self.l1 = L.Linear(1308608,500)
self.l2 = L.Linear(500,100)
# decoder part
self.l3 = L.Linear(100,500)
self.l4 = L.Linear(500,1308608)
self.activation = activation
def forward(self,x):
h = self.encode(x)
x_recon = self.decode(h)
return x_recon
def __call__(self,x):
x_recon = self.forward(x)
loss = F.mean_squared_error(h, x)
return loss
def encode(self, x, train=True):
h = F.dropout(self.activation(self.l1(x)), train=train)
return self.activation(self.l2(x))
def decode(self, h, train=True):
h = self.activation(self.l3(h))
return self.l4(x)
You can also refer official Variational Auto Encoder example for the next step:
https://github.com/chainer/chainer/tree/master/examples/vae

Resources