Expected improvement optimization with GEKKO - machine-learning

I am trying to optimize the expected improvement function for Bayesian optimization applications. For this, I am using the scikit-learn Gaussian Process model embedded into the GEKKO optimization suite. When solving the optimization model the following error is shown:
#error: Model Expression
*** Error in syntax of function string: Missing operator
Position: 128
((0.5)((1+(((2/pi))(atan(((((2)((((v1-i320))/(((2)(sqrt(2))))))))((1+(((((v1-i320))/(((2)(sqrt(2))))))^(4))))))))-(0.0)))=0)
The code is below
import numpy as np
import pandas as pd
from gekko import GEKKO
from gekko.ML import Gekko_GPR
from gekko.ML import CustomMinMaxGekkoScaler
import sklearn.gaussian_process as gpr
# Training data
x_train = np.array([0.6, 0.9, 0.3, 0.45, 1.05, 0.75, 0.15,
0.225, 0.825, 1.125]).reshape(-1,1)
y_train = np.array([-0.809016994, 0.809016994, -0.309016994, -0.951056516,
0.951056516, -1.83772E-16, 0.587785252, 0.156434465,
0.4539905, 0.707106781]).reshape(-1,1)
# Additional information
lb = [0.0] # lower bound
ub = [1.2] # upper bound
n_dim = len(lb) # number of dimension
n_train = x_train.shape[0] # size of the training set
# Function to fit the Gaussian process
def gp_fit(data_s, gp_reg):
d_array = data_s.to_numpy()
x_tr = d_array[:,1].reshape(-1,1)
y_tr = d_array[:,-1].reshape(-1,1)
gp_model = gp_reg.fit(x_tr, y_tr)
return gp_model # it delivers the gp model object
# gekko scaler definition
data = pd.DataFrame(np.hstack((x_train, y_train)), columns=['x', 'y'])
features = ['x']
label = ['y']
scaler = CustomMinMaxGekkoScaler(data,features,label)
data_s = scaler.scaledData() # data scaled
# kernel and gp regressor definition
bounds_m = (1e-4, 3000) # bounds for the hyperparameters
kernel_main = gpr.kernels.Matern(length_scale=np.ones(n_dim),
length_scale_bounds=bounds_m,
nu=2.5)
constant_kernel = gpr.kernels.ConstantKernel(1.0, constant_value_bounds=bounds_m)
white_kernel = gpr.kernels.WhiteKernel(1.0, noise_level_bounds=(1.13e-07, 1.83e-02))
K_cov = constant_kernel*kernel_main + white_kernel
gp_regressor = gpr.GaussianProcessRegressor(kernel=K_cov, alpha=1e-8,
optimizer='fmin_l_bfgs_b',
n_restarts_optimizer=50,
random_state=20)
# gp_model creation
gp_model = gp_fit(data_s, gp_regressor) # trainig the model with the data scaled
# gekko model definition and solution
m = GEKKO(remote=False) # model definition
x = m.Var(0.4, lb=0, ub=1) # definition of variables scaled
y, std = Gekko_GPR(gp_model, m).predict(x, return_std=True) # gp prediction with std
# constants
epsilon = m.Const(0.01, 'epsilon')
best_y = m.Const(1.0, 'best_y')
pi_m = m.Const(np.pi, 'pi')
# equations
Z = (y - best_y - epsilon)/std == 0.0
pdf = 1/(std*m.sqrt(2*pi_m))*m.exp(-0.5*((x-y)/std)**2) == 0.0
erf = 2/pi_m*m.atan(2*((x-y)/(2*m.sqrt(2)))*(1+((x-y)/(2*m.sqrt(2)))**4)) == 0.0
cdf = 0.5*(1+erf) == 0
m.Equations([Z, pdf, erf, cdf])
# objective function
ei = Z*std*cdf + std*pdf
m.Maximize(ei)
m.options.IMODE = 3 # steady state optimization
m.solve(disp=True)

I was able to fix your error, but I am unable to get it fully working. Here is what I suggest:
for your objective function and cdf function, you are using Gekko equations from variables like erf. I suggest reformatting some of that with gekko Intermediate values, like so:
# equations
tZ = m.Intermediate((y - best_y - epsilon)/std)
Z = tZ == 0.0
tpdf = m.Intermediate(1/(std*m.sqrt(2*pi_m))*m.exp(-0.5*((x-y)/std)**2))
pdf = tpdf == 0.0
terf = m.Intermediate(2/pi_m*m.atan(2*((x-y)/(2*m.sqrt(2)))*(1+((x-y)/(2*m.sqrt(2)))**4)))
erf = terf == 0.0
tcdf = m.Intermediate(0.5*(1+terf))
cdf = tcdf == 0.0
m.Equations([Z, pdf, erf, cdf])
# objective function
ei = tZ*std*tcdf + std*tpdf
Changing this causes Gekko to throw a "TOO_FEW_DEGREES_OF_FREEDOM" Error, as you are trying to solve 4 equations with 1 variable. I suggest making these equations a soft constraint (trying to minimize them rather than set them to 0) or add additional variables to the problem statement.

Related

Why my feature map seems incorrect when the prediction of the class is correct

from torchvision.models.feature_extraction import create_feature_extractor
# Data processing
preprocess = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)])
image_path = './data/test_images/anemone.jpg'
image = Image.open(image_path).convert('RGB')
img_processed = preprocess(image)
batch_img_cat_tensor = torch.unsqueeze(img_processed, 0)
# Model initialization
resnet50_model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)
# Eval model for predictions
resnet50_model.eval()
# Creating feature extractor (Detailed example here: https://pytorch.org/blog/FX-feature-extraction-torchvision/)
feature_extractor = create_feature_extractor(resnet50_model,
return_nodes=['layer4.2.conv3', 'fc'])
# Forward pass
out = feature_extractor(batch_img_cat_tensor)
pred = torch.argmax(out['fc'])
# Transforming last conv output to numpy and reshaping it so that the channels would be last
last_conv_output = torch.squeeze(out['layer4.2.conv3'])
last_conv_output = torch.reshape(last_conv_output, (7, 7, -1))
last_conv_output = last_conv_output.detach().numpy()
last_conv_output = last_conv_output.astype(np.uint8)
Calculating the upscale factors for last conv output
width_factor = int(image.size[0] / last_conv_output.shape[0])
height_factor = int(image.size[1] / last_conv_output.shape[1])
# Getting the shapes of the last conv output
last_conv_w, last_conv_h, n_channels = last_conv_output.shape
# Calculate the
upscaled_h = last_conv_h * height_factor
upscaled_w = last_conv_w * width_factor
# Upscaling the last_conv_output so that it could be "masked" with original image
upsampled_last_conv_output = np.zeros((upscaled_h, upscaled_w, n_channels))
upsampled_last_conv_output = []
for x in range(0, n_channels, 512):
upsampled_last_conv_output.append(cv2.resize(last_conv_output[:, :, x:x+512], (upscaled_w, upscaled_h), cv2.INTER_CUBIC))
upsampled_last_conv_output = np.concatenate(upsampled_last_conv_output, axis=2)
# Getting the weights of the predicted class
last_layer_weights = resnet50_model.fc.weight.T
last_layer_weights_for_pred = last_layer_weights[:, pred]
# Dot multiplying the upsampled_last_conv_output with last_layer_weights_for_pred
upsampled_last_conv_output = upsampled_last_conv_output.reshape((-1, 2048))
heat_map = np.dot(upsampled_last_conv_output,
last_layer_weights_for_pred.detach().numpy()).reshape(upscaled_h, upscaled_w)
# Plotting the results
fig, ax = plt.subplots()
ax.imshow(image)
ax.imshow(heat_map, cmap='jet', alpha=0.5)
ax.set_title(prediction)
I have followed the tutorial from here: https://www.youtube.com/watch?v=GiyldmoYe_M&t=665s&ab_channel=DigitalSreeni
The main problem with this is that I get the feature map that looks like this:
As you see it looks like the model reacts to multiple areas on the image and no matter what image I use it always has the biggest reaction in the middle.
PS. If you think this question should be posted on the AI stack exchange please notify me
I have found an error I made. It was that after creating a
heat_map = np.dot(upsampled_last_conv_output, last_layer_weights_for_pred.detach().numpy()).reshape(upscaled_h, upscaled_w)
I had to apply this as well:
heat_map = heat_map - np.min(heat_map)
heat_map = heat_map / np.max(heat_map)
Since I normalized the image, the generated heatmap was also normalized, so I needed to "denormalize" it back to it's original values.

XGBoost using Hyperopt. Facing issues while Hyper-Parameter Tuning

I am trying to Hyper-Parameter Tune XGBoostClassifier using Hyperopt. But I am facing a error. Please find below the code that I am using and the error as well:-
Step_1: Objective Function
import csv
from hyperopt import STATUS_OK
from timeit import default_timer as timer
MAX_EVALS = 200
N_FOLDS = 10
def objective(params, n_folds = N_FOLDS):
"""Objective function for XGBoost Hyperparameter Optimization"""
# Keep track of evals
global ITERATION
ITERATION += 1
# # Retrieve the subsample if present otherwise set to 1.0
# subsample = params['boosting_type'].get('subsample', 1.0)
# # Extract the boosting type
# params['boosting_type'] = params['boosting_type']['boosting_type']
# params['subsample'] = subsample
# Make sure parameters that need to be integers are integers
for parameter_name in ['max_depth', 'colsample_bytree',
'min_child_weight']:
params[parameter_name] = int(params[parameter_name])
start = timer()
# Perform n_folds cross validation
cv_results = xgb.cv(params, train_set, num_boost_round = 10000,
nfold = n_folds, early_stopping_rounds = 100,
metrics = 'auc', seed = 50)
run_time = timer() - start
# Extract the best score
best_score = np.max(cv_results['auc-mean'])
# Loss must be minimized
loss = 1 - best_score
# Boosting rounds that returned the highest cv score
n_estimators = int(np.argmax(cv_results['auc-mean']) + 1)
# Write to the csv file ('a' means append)
of_connection = open(out_file, 'a')
writer = csv.writer(of_connection)
writer.writerow([loss, params, ITERATION, n_estimators,
run_time])
# Dictionary with information for evaluation
return {'loss': loss, 'params': params, 'iteration': ITERATION,
'estimators': n_estimators, 'train_time': run_time,
'status': STATUS_OK}
I have defined the sample space and the optimization algorithm as well. While running Hyperopt, I am encountering this error below. The error is in the objective function.
Error:KeyError: 'auc-mean'
<ipython-input-62-8d4e97f16929> in objective(params, n_folds)
25 run_time = timer() - start
26 # Extract the best score
---> 27 best_score = np.max(cv_results['auc-mean'])
28 # Loss must be minimized
29 loss = 1 - best_score
First, print cv_results and see which key exists.
In the below example notebook the keys were : 'test-auc-mean' and 'train-auc-mean'
See cell 5 here:
https://www.kaggle.com/tilii7/bayesian-optimization-of-xgboost-parameters
#avvinci is correct. Let me explain it further.
cv_results = xgb.cv(params, train_set, num_boost_round = 10000,
nfold = n_folds, early_stopping_rounds = 100,
metrics = 'auc', seed = 50)
This is xgboost cross validation and it return the evaluation history. The history is essentially a pandas dataframe. The column names in the dataframe depends upon what is being passes as in, train, test and eval.
best_score = np.max(cv_results['auc-mean'])
Here you are looking for the best auc in the evaluation history which are called
'test-auc-mean' and 'train-auc-mean'
as #avvinci suggested. The column name 'auc-mean' does not exists so it throws KeyError. Either you call it train-auc-mean for best auc in training set or test-auc-mean for best auc in test set.
If you are in doubt, just run that cross validation outside and use head on the cv_results.

Why should the training label for Generator in GAN be always True?

I am currently learning deep learning especially GAN.
I found a simple code of GAN from a web site below.
https://medium.com/#devnag/generative-adversarial-networks-gans-in-50-lines-of-code-pytorch-e81b79659e3f
However, in the code, I don't understand why we always need to give true label to Generator as below.
for g_index in range(g_steps):
# 2. Train G on D's response (but DO NOT train D on these labels)
G.zero_grad()
gen_input = Variable(gi_sampler(minibatch_size, g_input_size))
g_fake_data = G(gen_input)
dg_fake_decision = D(preprocess(g_fake_data.t()))
g_error = criterion(dg_fake_decision, Variable(torch.ones(1))) # we want to fool, so pretend it's all genuine
g_error.backward()
g_optimizer.step() # Only optimizes G's parameters
Specifically, on this line.
g_error = criterion(dg_fake_decision, Variable(torch.ones(1))) # we want to fool, so pretend it's all genuine
Input data for Generator is fake data(includes noise), so if we assign True labels on those input data, I think Generator ends up creating data which is similar to fake data(doesn't look like genuine). Is my understanding wrong? Sorry for the silly question, but if you have knowledge, plz help me out.
I'll put a whole code below.
#!/usr/bin/env python
# Generative Adversarial Networks (GAN) example in PyTorch.
# See related blog post at https://medium.com/#devnag/generative-adversarial-networks-gans-in-50-lines-of-code-pytorch-e81b79659e3f#.sch4xgsa9
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
# Data params
data_mean = 4
data_stddev = 1.25
# Model params
g_input_size = 1 # Random noise dimension coming into generator, per output vector
g_hidden_size = 50 # Generator complexity
g_output_size = 1 # size of generated output vector
d_input_size = 100 # Minibatch size - cardinality of distributions
d_hidden_size = 50 # Discriminator complexity
d_output_size = 1 # Single dimension for 'real' vs. 'fake'
minibatch_size = d_input_size
d_learning_rate = 2e-4 # 2e-4
g_learning_rate = 2e-4
optim_betas = (0.9, 0.999)
num_epochs = 30000
print_interval = 200
d_steps = 1 # 'k' steps in the original GAN paper. Can put the discriminator on higher training freq than generator
g_steps = 1
# ### Uncomment only one of these
#(name, preprocess, d_input_func) = ("Raw data", lambda data: data, lambda x: x)
(name, preprocess, d_input_func) = ("Data and variances", lambda data: decorate_with_diffs(data, 2.0), lambda x: x * 2)
print("Using data [%s]" % (name))
# ##### DATA: Target data and generator input data
def get_distribution_sampler(mu, sigma):
return lambda n: torch.Tensor(np.random.normal(mu, sigma, (1, n))) # Gaussian
def get_generator_input_sampler():
return lambda m, n: torch.rand(m, n) # Uniform-dist data into generator, _NOT_ Gaussian
# ##### MODELS: Generator model and discriminator model
class Generator(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(Generator, self).__init__()
self.map1 = nn.Linear(input_size, hidden_size)
self.map2 = nn.Linear(hidden_size, hidden_size)
self.map3 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = F.elu(self.map1(x))
x = F.sigmoid(self.map2(x))
return self.map3(x)
class Discriminator(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(Discriminator, self).__init__()
self.map1 = nn.Linear(input_size, hidden_size)
self.map2 = nn.Linear(hidden_size, hidden_size)
self.map3 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = F.elu(self.map1(x))
x = F.elu(self.map2(x))
return F.sigmoid(self.map3(x))
def extract(v):
return v.data.storage().tolist()
def stats(d):
return [np.mean(d), np.std(d)]
def decorate_with_diffs(data, exponent):
mean = torch.mean(data.data, 1, keepdim=True)
mean_broadcast = torch.mul(torch.ones(data.size()), mean.tolist()[0][0])
diffs = torch.pow(data - Variable(mean_broadcast), exponent)
return torch.cat([data, diffs], 1)
d_sampler = get_distribution_sampler(data_mean, data_stddev)
gi_sampler = get_generator_input_sampler()
G = Generator(input_size=g_input_size, hidden_size=g_hidden_size, output_size=g_output_size)
D = Discriminator(input_size=d_input_func(d_input_size), hidden_size=d_hidden_size, output_size=d_output_size)
criterion = nn.BCELoss() # Binary cross entropy: http://pytorch.org/docs/nn.html#bceloss
d_optimizer = optim.Adam(D.parameters(), lr=d_learning_rate, betas=optim_betas)
g_optimizer = optim.Adam(G.parameters(), lr=g_learning_rate, betas=optim_betas)
for epoch in range(num_epochs):
for d_index in range(d_steps):
# 1. Train D on real+fake
D.zero_grad()
# 1A: Train D on real
d_real_data = Variable(d_sampler(d_input_size))
d_real_decision = D(preprocess(d_real_data))
d_real_error = criterion(d_real_decision, Variable(torch.ones(1))) # ones = true
d_real_error.backward() # compute/store gradients, but don't change params
# 1B: Train D on fake
d_gen_input = Variable(gi_sampler(minibatch_size, g_input_size))
d_fake_data = G(d_gen_input).detach() # detach to avoid training G on these labels
d_fake_decision = D(preprocess(d_fake_data.t()))
d_fake_error = criterion(d_fake_decision, Variable(torch.zeros(1))) # zeros = fake
d_fake_error.backward()
d_optimizer.step() # Only optimizes D's parameters; changes based on stored gradients from backward()
for g_index in range(g_steps):
# 2. Train G on D's response (but DO NOT train D on these labels)
G.zero_grad()
gen_input = Variable(gi_sampler(minibatch_size, g_input_size))
g_fake_data = G(gen_input)
dg_fake_decision = D(preprocess(g_fake_data.t()))
g_error = criterion(dg_fake_decision, Variable(torch.ones(1))) # we want to fool, so pretend it's all genuine
g_error.backward()
g_optimizer.step() # Only optimizes G's parameters
if epoch % print_interval == 0:
print("%s: D: %s/%s G: %s (Real: %s, Fake: %s) " % (epoch,
extract(d_real_error)[0],
extract(d_fake_error)[0],
extract(g_error)[0],
stats(extract(d_real_data)),
stats(extract(d_fake_data))))
In this part of the code you are training G to fool D, so G generates fake data and asks D whether it thinks it's real (true labels), D's gradients are then propogated all the way to G (this is possible as D's input was G's output) so that it will learn to better fool D in the next iteration.
The inputs of G are not trainable and G only tries to transform them into real data (data similar to what d_sampler generates)

Unable to write gradient step in theano for rnn

I have following code in which I convert words to one hot vectors and do a gradient descent in theano using rnn for predicting next words given a sequence of words(basically a language model).
# coding: utf-8
# In[68]:
#Importing stuff
import theano
import theano.tensor as T
import numpy as np
# In[69]:
import nltk
import sys
import operator
import csv
import itertools
from utils import *
from datetime import datetime
# In[70]:
#Fixing vocabulary size for one hot vectors and some initialization stuff
v_size = 8000
unknown_token = "UNKNOWN_TOKEN"
start_token = "<s>"
end_token = "</s>"
# In[71]:
#Read data and start preprocessing
with open('reddit-comments-2015-08.csv','rb') as f:
reader = csv.reader(f, skipinitialspace=True)
reader.next()
sentences = list(itertools.chain(*[nltk.sent_tokenize(x[0].decode('utf-8')) for x in reader]))
print len(sentences)
# In[72]:
#Tokenize the sentences and add start and end tokens
tokenized_sentences = [nltk.word_tokenize(s) for s in sentences]
tokenized_sentences = [[start_token] + s + [end_token] for s in tokenized_sentences]
# In[73]:
#Get word frequencies and use only most frequent words in vocabulary
word_freq = nltk.FreqDist(itertools.chain(*tokenized_sentences))
vocab = word_freq.most_common(v_size-1)
# In[74]:
#Do mapping and reverse mapping
index_to_word = [x[0] for x in vocab]
index_to_word.append(unknown_token)
word_to_index = {w:i for i,w in enumerate(index_to_word)}
#Removing less frequent words
for i, s in enumerate(tokenized_sentences):
tokenized_sentences[i] = [w if w in word_to_index else unknown_token for w in s]
#Got vectors but they are not one hot
X_train = np.asarray([[word_to_index[w] for w in s[:-1]] for s in tokenized_sentences])
Y_train = np.asarray([[word_to_index[w] for w in s[1:]] for s in tokenized_sentences])
#Preprocessing ends here
# In[75]:
#Take only one sentence for now
X_train = X_train[0]
Y_train = Y_train[0]
# In[76]:
#Make input and output as onehot vectors. This can easily be replaced with vectors generated by word2vec.
X_train_onehot = np.eye(v_size)[X_train]
X = theano.shared(np.array(X_train_onehot).astype('float32'), name = 'X')
Y_train_onehot = np.eye(v_size)[Y_train]
Y = theano.shared(np.array(Y_train_onehot).astype('float32'), name = 'Y')
# In[77]:
#Initializing U, V and W
i_dim = v_size
h_dim = 100
o_dim = v_size
U = theano.shared(np.random.randn(i_dim, h_dim).astype('float32'), name = 'U')
W = theano.shared(np.random.randn(h_dim, h_dim).astype('float32'), name = 'W')
V = theano.shared(np.random.randn(h_dim, o_dim).astype('float32'), name = 'V')
# In[78]:
#forward propagation
s = T.vector('s')
results, updates = theano.scan(lambda x, sm1: T.tanh( T.dot(x, U) + T.dot(sm1, W)),
sequences = X_train_onehot,
outputs_info = s
)
y_hat = T.dot(results, V)
forward_propagation = theano.function(inputs=[s], outputs = y_hat)
# In[80]:
#loss
loss = T.sum(T.nnet.categorical_crossentropy(y_hat, Y))
# In[81]:
#Gradients
dw = T.grad(loss, W)
du = T.grad(loss, U)
dv = T.grad(loss, V)
# In[82]:
#BPTT
learning_rate = T.scalar('learning_rate')
gradient_step = theano.function(inputs = [s, learning_rate],
updates = (
(U, U - learning_rate * du),
(V, V - learning_rate * dv),
(W, W - learning_rate * dw)
)
)
# In[ ]:
But it keeps throwing error at gradient step. I am posting full code because I don't know which step is affecting the error. The following is the screenshot of error in jupyter notebook.
I solved it. The problem is with mismatch of types. I had to typecast du, dv, dw, learning rate to float32. By default, they are float64.

Tensorflow Grid3LSTMCell visualization

I'm having a difficult time visualizing what this Tensorflow class creates. I want to implement a LSTM RNN that handles 3D data.
class Grid3LSTMCell(GridRNNCell):
"""3D BasicLSTM cell
This creates a 2D cell which receives input and gives output in the first dimension.
The first dimension can optionally be non-recurrent if `non_recurrent_fn` is specified.
The second and third dimensions are LSTM.
"""
def __init__(self, num_units, tied=False, non_recurrent_fn=None,
use_peepholes=False, forget_bias=1.0):
super(Grid3LSTMCell, self).__init__(num_units=num_units, num_dims=3,
input_dims=0, output_dims=0, priority_dims=0, tied=tied,
non_recurrent_dims=None if non_recurrent_fn is None else 0,
cell_fn=lambda n, i: rnn_cell.LSTMCell(
num_units=n, input_size=i, forget_bias=forget_bias,
use_peepholes=use_peepholes),
non_recurrent_fn=non_recurrent_fn)
The class is found in `from tensorflow.contrib.grid_rnn.python.ops import grid_rnn_cell`.
This is difficult to explain, so I've provided a drawing. Here is what I want it to do...
However the comment sounds like it isn't doing this. The comment makes it sound like the RNN is still a flat RNN, where the first dimension is outputting to, what is commonly called, the outputs variable (see below). The second dimension is outputting to the next step in the RNN, and the third dimension is outputting to the next hidden layer.
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
If this is the case, what is the point in having the first and second dimensions? Aren't they essentially the same thing? The BasicLSTMCell sends the output to the next step into outputs -- in other words they are one in the same.
Clarity?
For reference, here is my example code...
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
from tensorflow.contrib.grid_rnn.python.ops import grid_rnn_cell
import numpy as np
#define parameters
learning_rate = 0.01
batch_size = 2
n_input_x = 10
n_input_y = 10
n_input_z = 10
n_hidden = 128
n_classes = 2
n_output = n_input_x * n_classes
x = tf.placeholder("float", [n_input_x, n_input_y, n_input_z])
y = tf.placeholder("float", [n_input_x, n_input_y, n_input_z, n_classes])
weights = {}
biases = {}
for i in xrange(n_input_y * n_input_z):
weights[i] = tf.Variable(tf.random_normal([n_hidden, n_output]))
biases[i] = tf.Variable(tf.random_normal([n_output]))
#generate random data
input_data = np.random.rand(n_input_x, n_input_y, n_input_z)
ground_truth = np.random.rand(n_input_x, n_input_y, n_input_z, n_classes)
#build GridLSTM
def GridLSTM_network(x):
x = tf.reshape(x, [-1,n_input_x])
x = tf.split(0, n_input_y * n_input_z, x)
lstm_cell = grid_rnn_cell.Grid3LSTMCell(n_hidden)
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
output = []
for i in xrange(n_input_y * n_input_z):
output.append(tf.matmul(outputs[i], weights[i]) + biases[i])
return output
#initialize network, cost, optimizer and all variables
pred = GridLSTM_network(x)
# import pdb
# pdb.set_trace()
pred = tf.pack(pred)
pred = tf.transpose(pred,[1,0,2])
pred= tf.reshape(pred, [-1, n_input_x, n_input_y, n_input_z, n_classes])
temp_pred = tf.reshape(pred, [-1,n_classes])
temp_y = tf.reshape(y,[-1, n_classes])
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(temp_pred, temp_y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(0,tf.cast(tf.sub(tf.nn.sigmoid(temp_pred),temp_y), tf.int32))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 0
while 1:
print step
step = step + 1
# pdb.set_trace
sess.run(optimizer, feed_dict={x: input_data, y: ground_truth})

Resources