Related
Im having issues with the input and output size being halfed from 16 to 8 when running through my model.I've tried tweaking the input/output size between the maxpool and linear layer, that doesn't work. I was wondering if it has something to do with my loss criterion inputs or if the model should be outputting 16 instead of 8.
import torch
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from os import listdir
import os
from os.path import isdir
from torchsummary import summary
# Define the preprocessing steps
transform = transforms.Compose([
transforms.Resize((32, 32)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# Define the custom dataset
class VideoDataset(Dataset):
def __init__(self, data_dir, transform=None):
self.data_dir = data_dir
self.transform = transform
def __len__(self):
return len(self.data_dir)
def __getitem__(self, idx):
video_dir = self.data_dir[idx]
print(video_dir)
video = []
for i in range(10): # For example, each video has 10 frames
img = Image.open(f"{video_dir}/frame_{i}.jpg")
if self.transform:
img = self.transform(img)
video.append(img)
video = torch.stack(video)
if(video_dir.find("squat")):
label = 1
if(video_dir.find("pull")):
label = 0
else:
label = 0
# label = str(video_dir.split("/")[-2]) # Assume the class label is included in the video directory name
sample = {'video': video, 'label': label}
#print(sample)
return sample
# Load the data
path = "videos/squat/"
path_pullups = "videos/pull ups/"
path_situp = "videos/situp/"
data_dir = list()
for file in os.scandir(path):
if file.is_dir():
data_dir.append(path + file.name)
for file in os.scandir(path_pullups):
if file.is_dir():
data_dir.append(path_pullups + file.name)
for file in os.scandir(path_situp):
if file.is_dir():
data_dir.append(path_situp + file.name)
print(len(data_dir)/2)
# Split the data into training and validation sets
train_data = VideoDataset(data_dir[:243], transform=transform) # Use first two classes for training
#print("train" + str(train_data.data_dir))
#valid_data = VideoDataset(data_dir[165:], transform=transform) # Use last class for validation
# Define the data loaders
train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
#valid_loader = DataLoader(valid_data, batch_size=16, shuffle=False)
# Define the CNN model
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = torch.nn.Conv3d(10, 16, kernel_size=(3, 3, 3), stride=1, padding=1)
self.pool = torch.nn.MaxPool3d(kernel_size=(2, 2, 2), stride=2, padding=0)
self.fc1 = torch.nn.Linear(16 * 8 * 8 * 8, 32) #16*16*2
self.fc2 = torch.nn.Linear(32, 3)
self.fc3 = torch.nn.Linear(3, 1)
def forward(self, x):
x = self.pool(torch.nn.functional.relu(self.conv1(x)))
x = x.view(-1, 16 * 8 * 8 * 8)
x = torch.nn.functional.relu(self.fc1(x))
x = self.fc2(x)
x = self.fc3(x)
x = torch.sigmoid(x)
return x
# Initialize the model, loss function, and optimizer
model = Net()
criterion = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# Train the model
for epoch in range(10): # Train for 10 epochs
running_loss = 0.0
for i, data in enumerate(train_loader, 0):
inputs, labels = data['video'], data['label']
# .view(-1,1)
outputs = model(inputs)
#if labels.shape[0] != outputs.shape[0]:
# labels = labels.view(-1, outputs.shape[0]).t()
summary(model, (10, 3, 32, 32), device='cpu')
print("Labels size:" + str(labels.shape))
print("Outputs size:" + str(outputs.shape))
print(outputs, labels)
#####################################################################
loss = criterion(outputs, labels) #### error here
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch + 1} loss: {running_loss / (i + 1)}")
# Evaluate the model
# correct = 0
# total = 0
# with torch.no_grad():
# for data in valid_loader:
# inputs, labels = data['video'], data['label']
# outputs = model(inputs)
# _, predicted = torch.max(outputs.data, 1)
# total += labels.size(0)
# correct += (predicted == labels).sum().item()
# print(f"Accuracy of the model on the validation set: {100 * correct / total}%")
Sample inputs are frames from video clips like this:
described by their exercise category such as squats, situps, pullups, etc.
Desired outputs for this model would be a binary representation of either 1 or 0 that each exercise given is a squat or not as labeled and indicated in the dataset customization function.
How can I avoid underfitting in Pytorch NeuralNetwork?
I try to predict the power consumtion of a plant based on seven features. I have built two simple neural network models.
The first one is a Linear model, and the second is a RNN model. However, both models perform bad in the test set, their forecast result is a straight line.
Something about data
There are about 360 samples in the CSV file. I take the first 300 samples for trainning and the others for test. The first 7 columns of raw data are features of daily operation. The last column is the electricity consumption of every day.
Setting of training set
In the linear model, train data is the first 7 colums of a certain day, and corresponding target is the power consumption of that day.
In the RNN model, train data is all the 8 columns of three days(seven features and power consumption), and corresponding traget is the power consumption of next three days.
Code
Code of RNN model
import torch
import pandas as pd
import numpy as np
import torch.nn.functional as f
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from matplotlib import pyplot as plt
'''
build simple RNN
'''
batchSize = 3
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
netPath = ''
'''Data processing'''
# read raw data
filePath = 'F:/.csv'
initialData = pd.read_csv(filePath)
print(initialData.head(10))
print('hello world')
# Separate features and power consumption.
trainDatas = initialData.iloc[0:7, 1:301]
trainPowerConsum = pd.DataFrame(initialData.iloc[-1, 1:301]).T
trainDatas = pd.concat([trainDatas, trainPowerConsum], 0)
trainPowerConsum = initialData.iloc[-1, 2:302]
# Plot
powerConsumPlot = trainDatas.iloc[-1, :]
xData = np.linspace(1, powerConsumPlot.shape[0], 300)
plt.plot(xData, powerConsumPlot)
plt.show()
testDatas = initialData.iloc[0:7, 302:-1]
testPowerConsum = pd.DataFrame(initialData.iloc[-1, 302:-1]).T
testDatas = pd.concat([testDatas, testPowerConsum], 0)
testPowerConsum = initialData.iloc[-1, 303:]
# convert to dataframe
trainDatas = pd.DataFrame(trainDatas)
trainDatas = trainDatas.T
trainPowerConsum = pd.DataFrame(trainPowerConsum)
testDatas = pd.DataFrame(testDatas)
testDatas = testDatas.T
testPowerConsum = pd.DataFrame(testPowerConsum)
# change the unit of PowerConsumption
trainDatas.iloc[:, -1] = trainDatas.iloc[:, -1] * 1000
testDatas.iloc[:, -1] = testDatas.iloc[:, -1] * 1000
trainPowerConsum.iloc[:, 0] = trainPowerConsum.iloc[:, 0] * 1000
testPowerConsum.iloc[:, 0] = testPowerConsum.iloc[:, 0] * 1000
assert testPowerConsum.shape[0] == testDatas.shape[0]
assert trainDatas.shape[0] == trainPowerConsum.shape[0]
# convert dataframe to tensor
trainDatas = torch.tensor(trainDatas.values.astype(float), device=device)
trainPowerConsum = torch.tensor(trainPowerConsum.values.astype(float), device=device)
testDatas = torch.tensor(testDatas.values.astype(float), device=device)
testPowerConsum = torch.tensor(testPowerConsum.values.astype(float), device=device)
trainDatasList = list()
trainPowerConsumList = list()
for i in range(298):
trainDatasList.append(trainDatas[i:i + 3])
trainPowerConsumList.append(trainPowerConsum[i:i + 3])
from torch.nn.utils.rnn import pad_sequence
trainPowerConsum = pad_sequence(trainPowerConsumList, batch_first=True)
trainDatas = pad_sequence(trainDatasList, batch_first=True)
print(trainDatas.shape)
# ensure the batch_size of test data is 1
testDatas = torch.unsqueeze(testDatas, dim=0)
testPowerConsum = torch.unsqueeze(testPowerConsum, dim=0)
'''build dataloader'''
trainDataLoader = DataLoader(
TensorDataset(
trainDatas, trainPowerConsum
),
shuffle=True, batch_size=batchSize, drop_last=True)
print('Data is ready')
seqLen = 2
inputDim = 8
hiddenSize = 3
numLayer = 2
learningRate = 0.01
class RNNModel(torch.nn.Module):
def __init__(self, inputsize, hiddensize, batchsize, numLayer):
super(RNNModel, self).__init__()
self.batchsize = batchsize
self.inputsize = inputsize
self.hiddensize = hiddensize
self.numlayers = numLayer
self.rnn = torch.nn.RNN(input_size=self.inputsize, hidden_size=self.hiddensize, num_layers=self.numlayers,
batch_first=True)
self.l1 = torch.nn.Linear(hiddenSize, hiddensize)
self.l2 = torch.nn.Linear(hiddenSize, 1)
def forward(self, input, hidden):
out, hidden = self.rnn(input.float(), hidden.float())
batch_size, seq_len, input_dim = out.shape
out = out.reshape(-1, input_dim)
# out = f.sigmoid(self.l1(out))
out = f.relu(self.l1(out))
out = self.l2(out)
out = out.reshape(batch_size, seq_len, -1)
return out, hidden
def initHidden(self):
hidden = torch.zeros(self.numlayers, self.batchsize, self.hiddensize, device=device, dtype=torch.float64)
return hidden
net = RNNModel(inputDim, hiddenSize, batchSize, numLayer).to(device)
criterion = torch.nn.L1Loss()
optimizer = optim.Adam(net.parameters(), lr=learningRate,momentum=0.01)
def train(epoch):
runLoss = 0.
optimizer.zero_grad()
hidden = net.initHidden()
for batchIndex, data in enumerate(trainDataLoader, 0):
inputs, target = data
optimizer.zero_grad()
outputs, hidden = net(inputs, hidden)
hidden = hidden.detach()
loss = criterion(outputs.float(), target.float())
loss = loss.mean()
loss.backward()
optimizer.step()
print(f'{epoch + 1},\t Loss={loss.item()}')
# torch.save(net.state_dict(), netPath)
def test():
testDatasVice = torch.clone(testDatas)
input = testDatasVice[:, 0, :]
input = input.view(1, 1, -1)
assert input.shape[2] == 8
predictPowConsum = list()
# the first hidden tensor in test set is zero
hidden = torch.zeros(2, 1, 3, device=device, dtype=torch.float64)
with torch.no_grad():
for i in range(testDatas.shape[1]):
output, hidden = net(input, hidden)
if i < 51:
testDatasVice[:, i + 1, -1] = output[0]
input = torch.unsqueeze(testDatasVice[:, i + 1, :], dim=0)
predictPowConsum.append(output.data.cpu().numpy().ravel()[0])
elif i == 51:
predictPowConsum.append(output.data.cpu().numpy().ravel()[0])
else:
print('\tindexError') # Exclude potential Errors
return predictPowConsum
if __name__ == '__main__':
epochNum = 300
for epoch in range(epochNum):
train(epoch)
predictPowConsum = test()
# plotting
xData = np.arange(303, 303 + testPowerConsum.size(1))
plt.plot(xData, testPowerConsum.cpu().numpy()[0, :, 0])
plt.plot(xData, predictPowConsum)
plt.show()
Code of Linear model
import torch
import pandas as pd
import numpy as np
import torch.nn.functional as f
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from matplotlib import pyplot as plt
filePath = 'F:.csv'
initialData = pd.read_csv(filePath)
print(initialData.head(10))
print('hello world')
trainDatas = initialData.iloc[0:7, 1:300]
trainPowerConsum = initialData.iloc[-1, 1:300]
testDatas = initialData.iloc[0:7, 300:-1]
testPowerConsum = initialData.iloc[-1, 300:-1]
trainDatas = pd.DataFrame(trainDatas)
trainDatas = trainDatas.T
trainPowerConsum = pd.DataFrame(trainPowerConsum)
testDatas = pd.DataFrame(testDatas)
testDatas = testDatas.T
testPowerConsum = pd.DataFrame(testPowerConsum)
trainPowerConsum.iloc[:, 0] = trainPowerConsum.iloc[:, 0] * 1000
testPowerConsum.iloc[:, 0] = testPowerConsum.iloc[:, 0] * 1000
# build dataloader
trainData = DataLoader(
TensorDataset(
torch.tensor(trainDatas.values).float(),
torch.tensor(trainPowerConsum.values.astype(float)).float()
),
shuffle=True, batch_size=15)
testData = DataLoader(
TensorDataset(
torch.tensor(testDatas.values.astype(float)).float(),
torch.tensor(testPowerConsum.values.astype(float)).float()
),
shuffle=False, batch_size=15)
print('data is ready')
class SimpleNet(torch.nn.Module):
def __init__(self):
super(SimpleNet, self).__init__()
self.l1 = torch.nn.Linear(7, 15)
self.l2 = torch.nn.Linear(15, 30)
self.l3 = torch.nn.Linear(30, 15)
self.l4 = torch.nn.Linear(15, 5)
self.l5 = torch.nn.Linear(5, 1)
def forward(self, x):
x = f.relu(self.l1(x))
x = f.relu(self.l2(x))
x = f.relu(self.l3(x))
x = f.relu(self.l4(x))
return self.l5(x)
model = SimpleNet()
criterion = torch.nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.0001)
def train(epoch):
runLoss = 0.
for batch_index, data in enumerate(trainData, 0):
inputs, target = data
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, target)
loss.backward()
optimizer.step()
runLoss += loss
print(f'{epoch + 1},{batch_index + 1},\tLoss={runLoss / 5}')
runLoss = 0
def test(epoch):
totalError = 0.
print('Start to test the model')
with torch.no_grad():
for data in testData:
# test ---------data for test
# testlab ---------corresponding power consumption
test, testlab = data
outputs = model(test)
predicted = outputs.data
testError = testlab - predicted
# plotting
if epoch % 50 == 2:
xData = np.linspace(1, 15, 15)
if predicted.size(0) != 15:
pass
else:
plt.plot(xData, predicted[:, 0].numpy(), label='predicted', color='red')
plt.plot(xData, testlab[:, 0].numpy(), label='origData', color='blue')
plt.show()
totalError += (torch.abs(testError).sum().item())
print(f'Average Error on test set is {totalError / 54}')
if __name__ == '__main__':
for epoch in range(1000):
train(epoch)
test(epoch)
Image of Output
output of RNN
The blue line is the actual data, and the orange line is the output of RNN model.
Solutions and its Effect
I have looked around and apparently I've got the choice between these solutions:
Add new domain-specific features
Decrease the amount of regularization used
Increase the duration of training
Increase the complexity or type of the model
Decrease the learning rate
Try other activate function
I have tried some solutions:
The data for trainning isn't regularized. I just change the unit of electricity from kWh to Wh
I take ReLu as activate function after using Sigmoid, but it doesn't work
I adjust the learning rate from 0.01 to 0.001, it doesn't improve
I try different optimizer such as SGD and Adam on both model, even use momentum, it doesn't get better
The sequence length of RNN model is 60 firstly, then is set to 3. The loss dropped more rapidly in the latter case, but the forecast result still is a straight line
In a word, all solutions I find doesn't work.
Besides, if shuffle is True when building DataLoader, the loss skips violently between epochs. But it drops slowly and close to an constant eventually when shuffle is False.
What could be the best way to avoid the problem?
Thanks in advance!
I am currently trying train a CNN using PyTorch to predict a subject's age. The age group ranges from 0 to 116. I used the same model to train it on gender classification with two options: male or female.
I ported the same code for the age classification, I was getting errors. The error was due to our last fully connected layer not return a large enough output (in terms of matrix size, it was initially returning a 50 x 2 matrix due to our gender classifier but I switched it to 50 x 117 for the age classification based on the total age options.)
My issue now is that the training loop prints epochs with a huge loss (~3.5 while before, when training the gender classification, it was sub zero.)
Below is my code:
DataLoader class:
class MyDataset(Dataset):
def __init__(self, root_directory, csv_file, image_path, transform = None):
annotated_path = os.path.relpath(csv_file) # Path to UTKFace Dataset and Annotations
self.read_in_csv = pd.read_csv(annotated_path, index_col=False)
self.image_path = os.path.join(root_directory, image_path)
self.transform = transform
self.labels = np.asarray(self.read_in_csv.loc[:,'age'])
def __getitem__(self, index):
attr = self.labels[index]
image_name = str(self.read_in_csv.loc[index, 'file'])
image = Image.open(image_name)
if self.transform:
image = self.transform(image)
dict = {'image':image, 'label':attr}
return dict
def __len__(self):
return len(self.read_in_csv.index)
CNN Architecture:
class ConvolutionalNN(nn.Module):
def __init__(self):
super(ConvolutionalNN,self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(3,96,kernel_size=7,stride=4),
nn.BatchNorm2d(96), # Number of Features
nn.ReLU(),
nn.MaxPool2d(kernel_size=3,stride=2))
self.layer2 = nn.Sequential(
nn.Conv2d(96,256,kernel_size=5,padding=2),
nn.BatchNorm2d(256),
nn.ReLU(), # Default = False
nn.MaxPool2d(kernel_size=3,stride=2))
self.layer3 = nn.Sequential(
nn.Conv2d(256,384,kernel_size=3,padding=1),
nn.BatchNorm2d(384),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3,stride=2))
self.fc1 = nn.Linear(384*6*6,512)
self.fc2 = nn.Linear(512,512)
self.fc3 = nn.Linear(512,117)
def forward(self,x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = out.view(out.size(0),-1)
#print out.size()
out = F.dropout(F.relu(self.fc1(out)))
out = F.dropout(F.relu(self.fc2(out)))
out = self.fc3(out)
return out
Training Loop:
def training_loop(checkpoint = None, best=False):
current_epoch = 1
num_epochs = 50
train_acc_history = []
val_acc_history = []
epoch_history = []
learning_rate = 0.001
best_val_acc = 0.0
is_best = False
criterion = nn.CrossEntropyLoss()
## Predict the Age and Gender of the Human in the Image
optimizer = torch.optim.SGD(cnn.parameters(),lr=0.001,momentum=0.9)
if checkpoint is not None:
is_best = best
current_epoch = checkpoint['epoch']
train_acc_history = checkpoint['train_acc_history']
val_acc_history = checkpoint['val_acc_history']
best_val_acc = checkpoint['best_val_acc']
optimizer.load_state_dict(checkpoint['optimizer'])
epoch_history = checkpoint['epoch_history']
print('Uploading our images now...')
for epoch in range(current_epoch, num_epochs + current_epoch):
print('Starting epoch %d / %d' % (epoch + 1, num_epochs + current_epoch))
print('Learning Rate for this epoch: {}'.format(learning_rate))
for i, batch in enumerate(train_loader):
images, labels = batch['image'], batch['label']
images = images.clone().detach()
labels = labels.clone().detach()
if use_gpu:
images = images.cuda()
labels = labels.cuda()
optimizer.zero_grad()
pred_labels = cnn(images)
loss = criterion(pred_labels,labels)
loss.backward()
optimizer.step()
So this is my code. It does not seem to be training well.
Please let me know on what could be done to fix this.
When I use linear or No activation in the last Discriminator layer using weight clipping Discriminator accuracy goes to 1 and Generator goes to 0. In case when I remove weight clipping, Generator accuracy goes to 1 and discriminator goes to 0 around 300 iterations. But when I use sigmoid activation as the last layer in the discriminator with clipping Generator accuracy goes to 1 and without clipping the generator loss get stuck while accuracies going as they should around 0.5.
NOTE - in all cases, results are produced and all of the show WARNING:tensorflow:Discrepancy between trainable weights and collected trainable weights, did you set model.trainable without calling model.compile after ?
Code is given here, please do not mind the indentation on copying and pasting it's everywhere -
class WGAN():
def __init__(self,
input_dim,
disc_filter,
disc_kernel,
disc_strides,
disc_dropout,
disc_lr,
gen_filter,
gen_kernel,
gen_strides,
gen_upsample,
gen_lr,
z_dim,
batch_size):
self.input_dim = input_dim
self.disc_filter = disc_filter
self.disc_kernel = disc_kernel
self.disc_strides = disc_strides
self.disc_dropout = disc_dropout
self.disc_lr = disc_lr
self.gen_filter = gen_filter
self.gen_kernel = gen_kernel
self.gen_strides = gen_strides
self.gen_upsample = gen_upsample
self.gen_lr = gen_lr
self.z_dim = z_dim
self.batch_size = batch_size
self.weight_init = RandomNormal(mean=0., stddev=0.02)
self.d_losses = []
self.g_losses = []
self.epoch = 0
self.Discriminator()
self.Generator()
self.full_model()
def wasserstein(self, y_true, y_pred):
return -K.mean(y_true * y_pred)
def Discriminator(self):
disc_input = Input(shape=self.input_dim, name='discriminator_input')
x = disc_input
for i in range(len(self.disc_filter)):
x = Conv2D(filters=self.disc_filter[i], kernel_size=self.disc_kernel[i], strides=self.disc_strides[i], padding='same', name='disc_'+str(i))(x)
x = LeakyReLU()(x)
x = Dropout(self.disc_dropout)(x)
x = BatchNormalization()(x)
x = Flatten()(x)
disc_output = Dense(1, activation='sigmoid', kernel_initializer = self.weight_init)(x)
self.discriminator = Model(disc_input, disc_output)
def Generator(self):
gen_input = Input(shape=(self.z_dim,), name='generator_input')
x = gen_input
x = Dense(7*7*self.batch_size, kernel_initializer = self.weight_init)(x)
x = LeakyReLU()(x)
x = BatchNormalization()(x)
x = Reshape(target_shape=(7,7,self.batch_size))(x)
for i in range(len(self.gen_filter)):
if self.gen_upsample[i]==2:
x = UpSampling2D(size=self.gen_upsample[i], name='upsample_'+str(i/2))(x)
x = Conv2D(filters=self.gen_filter[i], kernel_size=self.gen_kernel[i], strides=self.gen_strides[i], padding='same', name='gen_'+str(i))(x)
else:
x = Conv2DTranspose(filters=self.gen_filter[i], kernel_size=self.gen_kernel[i], strides=self.gen_strides[i], padding='same', name='gen_'+str(i))(x)
if i<len(self.gen_filter)-1:
x = BatchNormalization()(x)
x = LeakyReLU()(x)
else:
x = Activation("tanh")(x)
gen_output = x
self.generator = Model(gen_input, gen_output)
def set_trainable(self, model, val):
model.trainable=val
for l in model.layers:
l.trainable=val
def full_model(self):
### COMPILE DISCRIMINATOR
self.discriminator.compile(optimizer= Adam(self.disc_lr), loss = self.wasserstein, metrics=['accuracy'])
### COMPILE THE FULL GAN
self.set_trainable(self.discriminator, False)
self.discriminator.compile(optimizer= Adam(self.disc_lr), loss = self.wasserstein, metrics=['accuracy'])
model_input = Input(shape=(self.z_dim,), name='model_input')
model_output = self.discriminator(self.generator(model_input))
self.model = Model(model_input, model_output)
self.model.compile(optimizer= Adam(self.disc_lr), loss = self.wasserstein, metrics=['accuracy'])
self.set_trainable(self.discriminator, True)
def train_generator(self, batch_size):
valid = np.ones((batch_size,1))
noise = np.random.normal(0, 1, (batch_size, self.z_dim))
return self.model.train_on_batch(noise, valid)
def train_discriminator(self, x_train, batch_size, using_generator):
valid = np.ones((batch_size,1))
fake = np.zeros((batch_size,1))
if using_generator:
true_imgs = next(x_train)[0]
if true_imgs.shape[0] != batch_size:
true_imgs = next(x_train)[0]
else:
idx = np.random.randint(0, x_train.shape[0], batch_size)
true_imgs = x_train[idx]
noise = np.random.normal(0, 1, (batch_size, self.z_dim))
gen_imgs = self.generator.predict(noise)
d_loss_real, d_acc_real = self.discriminator.train_on_batch(true_imgs, valid)
d_loss_fake, d_acc_fake = self.discriminator.train_on_batch(gen_imgs, fake)
d_loss = 0.5 * (d_loss_real + d_loss_fake)
d_acc = 0.5 * (d_acc_real + d_acc_fake)
for l in self.discriminator.layers:
weights = l.get_weights()
weights = [np.clip(w, -0.01, 0.01) for w in weights]
l.set_weights(weights)
return [d_loss, d_loss_real, d_loss_fake, d_acc, d_acc_real, d_acc_fake]
def train(self, x_train, batch_size, epochs, print_every_n_batches = 50, using_generator = False):
for epoch in range(self.epoch, self.epoch + epochs):
d = self.train_discriminator(x_train, batch_size, using_generator)
g = self.train_generator(batch_size)
if self.epoch % print_every_n_batches == 0:
print ("%d [D loss: (%.3f)(R %.3f, F %.3f)] [D acc: (%.3f)(%.3f, %.3f)] [G loss: %.3f] [G acc: %.3f]" % (epoch, d[0], d[1], d[2], d[3], d[4], d[5], g[0], g[1]))
self.d_losses.append(d)
self.g_losses.append(g)
self.epoch+=1
I'm using Keras with the TensorFlow backend. I've just figured out how to train and classify sequences of different lengths without masking, because I can't get masking to work. In the toy example I'm working with, I'm trying to train an LSTM to detect whether a sequence of arbitrary length starts with a 1 or not.
from keras.models import Sequential
from keras.layers import LSTM, Dense
import numpy as np
def gen_sig(num_samples, seq_len):
one_indices = np.random.choice(a=num_samples, size=num_samples // 2, replace=False)
x_val = np.zeros((num_samples, seq_len), dtype=np.bool)
x_val[one_indices, 0] = 1
y_val = np.zeros(num_samples, dtype=np.bool)
y_val[one_indices] = 1
return x_val, y_val
N_train = 100
N_test = 10
recall_len = 20
X_train, y_train = gen_sig(N_train, recall_len)
X_test, y_test = gen_sig(N_train, recall_len)
print('Build STATEFUL model...')
model = Sequential()
model.add(LSTM(10, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print('Train...')
for epoch in range(15):
mean_tr_acc = []
mean_tr_loss = []
for seq_idx in range(X_train.shape[0]):
start_val = X_train[seq_idx, 0]
assert y_train[seq_idx] == start_val
assert tuple(np.nonzero(X_train[seq_idx, :]))[0].shape[0] == start_val
y_in = np.array([y_train[seq_idx]], dtype=np.bool)
for j in range(np.random.choice(a=np.arange(5, recall_len+1))):
x_in = np.array([[[X_train[seq_idx][j]]]])
tr_loss, tr_acc = model.train_on_batch(x_in, y_in)
mean_tr_acc.append(tr_acc)
mean_tr_loss.append(tr_loss)
model.reset_states()
print('accuracy training = {}'.format(np.mean(mean_tr_acc)))
print('loss training = {}'.format(np.mean(mean_tr_loss)))
print('___________________________________')
mean_te_acc = []
mean_te_loss = []
for seq_idx in range(X_test.shape[0]):
start_val = X_test[seq_idx, 0]
assert y_test[seq_idx] == start_val
assert tuple(np.nonzero(X_test[seq_idx, :]))[0].shape[0] == start_val
y_in = np.array([y_test[seq_idx]], dtype=np.bool)
for j in range(np.random.choice(a=np.arange(5, recall_len+1))):
te_loss, te_acc = model.test_on_batch(np.array([[[X_test[seq_idx][j]]]], dtype=np.bool), y_in)
mean_te_acc.append(te_acc)
mean_te_loss.append(te_loss)
model.reset_states()
print('accuracy testing = {}'.format(np.mean(mean_te_acc)))
print('loss testing = {}'.format(np.mean(mean_te_loss)))
print('___________________________________')
As seen in the code, my error is being batched over each time-step. This is bad for multiple reasons. How do I train the network in two steps? For example:
Run a bunch of values through the network to accumulate the error
Adjust the weights of the network given this accumulated error
To do what is described in the original question, the easiest way is to train the original network with masking, but then test with a stateful network so any length input can be classified:
import numpy as np
np.random.seed(1)
import tensorflow as tf
tf.set_random_seed(1)
from keras import models
from keras.layers import Dense, Masking, LSTM
import matplotlib.pyplot as plt
def stateful_model():
hidden_units = 256
model = models.Sequential()
model.add(LSTM(hidden_units, batch_input_shape=(1, 1, 1), return_sequences=False, stateful=True))
model.add(Dense(1, activation='relu', name='output'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop')
return model
def train_rnn(x_train, y_train, max_len, mask):
epochs = 10
batch_size = 200
vec_dims = 1
hidden_units = 256
in_shape = (max_len, vec_dims)
model = models.Sequential()
model.add(Masking(mask, name="in_layer", input_shape=in_shape,))
model.add(LSTM(hidden_units, return_sequences=False))
model.add(Dense(1, activation='relu', name='output'))
model.compile(loss='binary_crossentropy', optimizer='rmsprop')
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
validation_split=0.05)
return model
def gen_train_sig_cls_pair(t_stops, num_examples, mask):
x = []
y = []
max_t = int(np.max(t_stops))
for t_stop in t_stops:
one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False)
sig = np.zeros((num_examples, max_t), dtype=np.int8)
sig[one_indices, 0] = 1
sig[:, t_stop:] = mask
x.append(sig)
cls = np.zeros(num_examples, dtype=np.bool)
cls[one_indices] = 1
y.append(cls)
return np.concatenate(x, axis=0), np.concatenate(y, axis=0)
def gen_test_sig_cls_pair(t_stops, num_examples):
x = []
y = []
for t_stop in t_stops:
one_indices = np.random.choice(a=num_examples, size=num_examples // 2, replace=False)
sig = np.zeros((num_examples, t_stop), dtype=np.bool)
sig[one_indices, 0] = 1
x.extend(list(sig))
cls = np.zeros((num_examples, t_stop), dtype=np.bool)
cls[one_indices] = 1
y.extend(list(cls))
return x, y
if __name__ == '__main__':
noise_mag = 0.01
mask_val = -10
signal_lengths = (10, 15, 20)
x_in, y_in = gen_train_sig_cls_pair(signal_lengths, 10, mask_val)
mod = train_rnn(x_in[:, :, None], y_in, int(np.max(signal_lengths)), mask_val)
testing_dat, expected = gen_test_sig_cls_pair(signal_lengths, 3)
state_mod = stateful_model()
state_mod.set_weights(mod.get_weights())
res = []
for s_i in range(len(testing_dat)):
seq_in = list(testing_dat[s_i])
seq_len = len(seq_in)
for t_i in range(seq_len):
res.extend(state_mod.predict(np.array([[[seq_in[t_i]]]])))
state_mod.reset_states()
fig, axes = plt.subplots(2)
axes[0].plot(np.concatenate(testing_dat), label="input")
axes[1].plot(res, "ro", label="result", alpha=0.2)
axes[1].plot(np.concatenate(expected, axis=0), "bo", label="expected", alpha=0.2)
axes[1].legend(bbox_to_anchor=(1.1, 1))
plt.show()