Scipy.optimize - minimize not respecting constraints - scipy-optimize-minimize

Using the code below to to understand how Scipy optmization/minimization works. The results are not matching what I am expecting.
"""
Minimize: f = 2*x[0]*x[1] + 2*x[0] - x[0]**2 - 2*x[1]**2
Subject to: -2*x[0] + 2*x[1] <= -2
2*x[0] - 4*x[1] <= 0
x[0]**3 -x[1] == 0
where: 0 <= x[0] <= inf
1 <= x[1] <= inf
"""
import numpy as np
from scipy.optimize import minimize
def objective(x):
return 2.0*x[0]*x[1] + 2.0*x[0] - x[0]**2 - 2.0*x[1]**2
def constraint1(x):
return +2.0*x[0] - 2.0*x[1] - 2.0
def constraint2(x):
return -2.0*x[0] + 4.0*x[1]
def constraint3(x):
sum_eq = x[0]**3.0 -x[1]
return sum_eq
# initial guesses
n = 2
x0 = np.zeros(n)
x0[0] = 10.0
x0[1] = 100.0
# show initial objective
print('Initial SSE Objective: ' + str(objective(x0)))
# optimize
#b = (1.0,None)
bnds = ((0.0,1000.0), (1.0,1000.0))
con1 = {'type': 'ineq', 'fun': constraint1}
con2 = {'type': 'ineq', 'fun': constraint2}
con3 = {'type': 'eq', 'fun': constraint3}
cons = ([con1, con2, con3])
solution = minimize(objective,
x0,
method='SLSQP',
bounds=bnds,
constraints=cons)
x = solution.x
print(solution)
# show final objective
print('Final SSE Objective: ' + str(objective(x)))
# print solution
print('Solution')
print('x1 = ' + str(x[0]))
print('x2 = ' + str(x[1]))
print('\n')
print('x', x)
print('constraint1', constraint1(x))
print('constraint2', constraint2(x))
print('constraint3', constraint3(x))
When I run, this is what Python throws on its output console:
Initial SSE Objective: -18080.0
fun: 2.0
jac: array([ 0.00000000e+00, -2.98023224e-08])
message: 'Optimization terminated successfully.'
nfev: 122
nit: 17
njev: 13
status: 0
success: True
x: array([2., 1.])
Final SSE Objective: 2.0
Solution
x1 = 2.0000000000010196
x2 = 1.0000000000012386
x [2. 1.]
constraint1 -4.3787196091216174e-13
constraint2 2.915001573455811e-12
constraint3 7.000000000010997
Despite the optimizer says the result was successful, the constraint3 is not respected because the result should be zero. What am I missing?

Your problem is incompatible. You can eliminate the 3rd constraint (which makes your problem simpler in the first place - only a scalar optimization), after this it is a bit more clear to see what is the problem. From constraint 3 and the lower bound on the original x1 follows, that x0 is not feasible from 0 to 1, so the lower bound in the 1D problem should be 1. It is easy to see that constraint 2 will be always positive, when x0 is larger than 1, therefore it will never be satisfied.
When I run your original problem for me it stops with positive directional derivative (and for the rewritten problem with 'Inequality constraints incompatible').
Which SciPy are you using? For me it is 1.4.1.
On the picture below you can see the objective and the remaining constraints for the 1D problem (horizontal axis is the original x0 variable)
"""
Minimize: f = 2*x[0]*x1 + 2*x[0] - x[0]**2 - 2*x1**2
Subject to: -2*x[0] + 2*x[1] <= -2
2*x[0] - 4*x[1] <= 0
x[0]**3 -x[1] == 0
where: 0 <= x[0] <= inf
1 <= x[1] <= inf
"""
import numpy as np
from scipy.optimize import minimize
def objective(x):
return 2*x**4 + 2*x - x**2 - 2*x**6
def constraint1(x):
return x - x**3 - 1
def constraint2(x):
return 2 * x**3 - x
#
# def constraint3(x):
# sum_eq = x[0]**3.0 -x[1]
# return sum_eq
# initial guesses
n = 1
x0 = np.zeros(n)
x0[0] = 2.
# x0[1] = 100.0
# show initial objective
print('Initial SSE Objective: ' + str(objective(x0)))
# optimize
#b = (1.0,None)
bnds = ((1.0,1000.0),)
con1 = {'type': 'ineq', 'fun': constraint1}
con2 = {'type': 'ineq', 'fun': constraint2}
# con3 = {'type': 'eq', 'fun': constraint3}
cons = [
# con1,
con2,
# con3,
]
solution = minimize(objective,
x0,
method='SLSQP',
bounds=bnds,
constraints=cons)
x = solution.x
print(solution)
# show final objective
print('Final SSE Objective: ' + str(objective(x)))
# print solution
print('Solution')
print('x1 = ' + str(x[0]))
# print('x2 = ' + str(x[1]))
print('\n')
print('x', x)
print('constraint1', constraint1(x))
print('constraint2', constraint2(x))
# print('constraint3', constraint3(x))
x_a = np.linspace(1, 2, 200)
f = objective(x_a)
c1 = constraint1(x_a)
c2 = constraint2(x_a)
import matplotlib.pyplot as plt
plt.figure()
plt.plot(x_a, f, label="f")
plt.plot(x_a, c1, label="c1")
plt.plot(x_a, c2, label="c2")
plt.legend()
plt.show()

Related

Implementing linear regression from scratch in python

I'm trying to Implement linear regression in python using the following gradient decent formulas (Notice that these formulas are after partial derive)
slope
y_intercept
but the code keeps giving me wearied results ,I think (I'm not sure) that the error is in the gradient_descent function
import numpy as np
class LinearRegression:
def __init__(self , x:np.ndarray ,y:np.ndarray):
self.x = x
self.m = len(x)
self.y = y
def calculate_predictions(self ,slope:int , y_intercept:int) -> np.ndarray: # Calculate y hat.
predictions = []
for x in self.x:
predictions.append(slope * x + y_intercept)
return predictions
def calculate_error_cost(self , y_hat:np.ndarray) -> int:
error_valuse = []
for i in range(self.m):
error_valuse.append((y_hat[i] - self.y[i] )** 2)
error = (1/(2*self.m)) * sum(error_valuse)
return error
def gradient_descent(self):
costs = []
# initialization values
temp_w = 0
temp_b = 0
a = 0.001 # Learning rate
while True:
y_hat = self.calculate_predictions(slope=temp_w , y_intercept= temp_b)
sum_w = 0
sum_b = 0
for i in range(len(self.x)):
sum_w += (y_hat[i] - self.y[i] ) * self.x[i]
sum_b += (y_hat[i] - self.y[i] )
w = temp_w - a * ((1/self.m) *sum_w)
b = temp_b - a * ((1/self.m) *sum_b)
temp_w = w
temp_b = b
costs.append(self.calculate_error_cost(y_hat))
try:
if costs[-1] > costs[-2]: # If global minimum reached
return [w,b]
except IndexError:
pass
I Used this dataset:-
https://www.kaggle.com/datasets/tanuprabhu/linear-regression-dataset?resource=download
after downloading it like this:
import pandas
p = pandas.read_csv('linear_regression_dataset.csv')
l = LinearRegression(x= p['X'] , y= p['Y'])
print(l.gradient_descent())
But It's giving me [-568.1905905426412, -2.833321633515304] Which is decently not accurate.
I want to implement the algorithm not using external modules like scikit-learn for learning purposes.
I tested the calculate_error_cost function and it worked as expected and I don't think that there is an error in the calculate_predictions function
One small problem you have is that you are returning the last values of w and b, when you should be returning the second-to-last parameters (because they yield a lower cost). This should not really matter that much... unless your learning rate is too high and you are immediately getting a higher value for the cost function on the second iteration. This I believe is your real problem, judging from the dataset you shared.
The algorithm does work on the dataset, but you need to change the learning rate. I ran it in the example below and it gave the result shown in the image. One caveat is that I added a limit to the iterations to avoid the algorithm from taking too long (and only marginally improving the result).
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
class LinearRegression:
def __init__(self , x:np.ndarray ,y:np.ndarray):
self.x = x
self.m = len(x)
self.y = y
def calculate_predictions(self ,slope:int , y_intercept:int) -> np.ndarray: # Calculate y hat.
predictions = []
for x in self.x:
predictions.append(slope * x + y_intercept)
return predictions
def calculate_error_cost(self , y_hat:np.ndarray) -> int:
error_valuse = []
for i in range(self.m):
error_valuse.append((y_hat[i] - self.y[i] )** 2)
error = (1/(2*self.m)) * sum(error_valuse)
return error
def gradient_descent(self):
costs = []
# initialization values
temp_w = 0
temp_b = 0
iteration = 0
a = 0.00001 # Learning rate
while iteration < 1000:
y_hat = self.calculate_predictions(slope=temp_w , y_intercept= temp_b)
sum_w = 0
sum_b = 0
for i in range(len(self.x)):
sum_w += (y_hat[i] - self.y[i] ) * self.x[i]
sum_b += (y_hat[i] - self.y[i] )
w = temp_w - a * ((1/self.m) *sum_w)
b = temp_b - a * ((1/self.m) *sum_b)
costs.append(self.calculate_error_cost(y_hat))
try:
if costs[-1] > costs[-2]: # If global minimum reached
print(costs)
return [temp_w,temp_b]
except IndexError:
pass
temp_w = w
temp_b = b
iteration += 1
print(iteration)
return [temp_w,temp_b]
p = pd.read_csv('linear_regression_dataset.csv')
x_data = p['X']
y_data = p['Y']
lin_reg = LinearRegression(x_data, y_data)
y_hat = lin_reg.calculate_predictions(*lin_reg.gradient_descent())
fig = plt.figure()
plt.plot(x_data, y_data, 'r.', label='Data')
plt.plot(x_data, y_hat, 'b-', label='Linear Regression')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()

Pytorch, slicing tensor causes RuntimeError:: one of the variables needed for gradient computation has been modified by an inplace operation:

I wrote a RNN with LSTM cell with Pycharm. The peculiarity of this network is that the output of the RNN is fed into a integration opeartion, computed with Runge-kutta.
The integration takes some input and propagate that in time one step ahead. In order to do so I need to slice the feature tensor X along the batch dimension, and pass this to the Runge-kutta.
class MyLSTM(torch.nn.Module):
def __init__(self, ni, no, sampling_interval, nh=10, nlayers=1):
super(MyLSTM, self).__init__()
self.device = torch.device("cpu")
self.dtype = torch.float
self.ni = ni
self.no = no
self.nh = nh
self.nlayers = nlayers
self.lstms = torch.nn.ModuleList(
[torch.nn.LSTMCell(self.ni, self.nh)] + [torch.nn.LSTMCell(self.nh, self.nh) for i in range(nlayers - 1)])
self.out = torch.nn.Linear(self.nh, self.no)
self.do = torch.nn.Dropout(p=0.2)
self.actfn = torch.nn.Sigmoid()
self.sampling_interval = sampling_interval
self.scaler_states = None
# Options
# description of the whole block
def forward(self, x, h0, train=False, integrate_ode=True):
x0 = x.clone().requires_grad_(True)
hs = x # initiate hidden state
if h0 is None:
h = torch.zeros(hs.shape[0], self.nh, device=self.device)
c = torch.zeros(hs.shape[0], self.nh, device=self.device)
else:
(h, c) = h0
# LSTM cells
for i in range(self.nlayers):
h, c = self.lstms[i](hs, (h, c))
if train:
hs = self.do(h)
else:
hs = h
# Output layer
# y = self.actfn(self.out(hs))
y = self.out(hs)
if integrate_ode:
p = y
y = self.integrate(x0, p)
return y, (h, c)
def integrate(self, x0, p):
# RK4 steps per interval
M = 4
DT = self.sampling_interval / M
X = x0
# X = self.scaler_features.inverse_transform(x0)
for b in range(X.shape[0]):
xx = X[b, :]
for j in range(M):
k1 = self.ode(xx, p[b, :])
k2 = self.ode(xx + DT / 2 * k1, p[b, :])
k3 = self.ode(xx + DT / 2 * k2, p[b, :])
k4 = self.ode(xx + DT * k3, p[b, :])
xx = xx + DT / 6 * (k1 + 2 * k2 + 2 * k3 + k4)
X_all[b, :] = xx
return X_all
def ode(self, x0, y):
# Here I a dynamic model
I get this error:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor []], which is output 0 of SelectBackward, is at version 64; expected version 63 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
the problem is in the operations xx = X[b, :] and p[b,:]. I know that because I choose batch dimension of 1, then I can replace the previous two equations with xx=X and p, and this works. How can split the tensor without loosing the gradient?
I had the same question, and after a lot of searching, I added .detach() function after "h" and "c" in the RNN cell.

differential evolution global minimum problem

Im trying to get the global minimum of a non linear function with linear constraints. I share you the code:
rho = 1.0
g = 9.8
C = 1.0 #roughness coefficient
J = rho*g/(float(1e6)) #constant of function
capacity = [0.3875, 0.607, 0.374] #Supply data
demand = [0.768, 0.315, 0.38,] #Demand data
m= len(capacity)
n = len(demand)
x0 = np.array(m*n*[0.01]) #Initial point for algorithm
# In[59]:
#READ L AND d FROM ARCGIS !!!
L = (314376.57, 277097.9663, 253756.9869 ,265786.5632, 316712.6028, 232857.1468, 112063.9914, 135762.94, 131152.8206)
h= (75, 75, 75, 75, 75, 75, 1320,75,75)
K = np.zeros(m*n)
N=np.zeros(m*n)
# In[60]:
#Each path has its own L,d, therefore, own constant
# In[61]:
#np.seterr(all='ignore')
def diameter(x):
d = 1.484
if x >= 0.276:
d = 1.484
elif x >= 0.212:
d = 1.299
elif x >= 0.148:
d = 1.086
elif x >= 0.0975:
d = 0.881
elif x >= 0.079:
d = 0.793
elif x >= 0.062:
d = 0.705
elif x >= 0.049:
d = 0.626
elif x >= 0.038:
d = 0.555
elif x >= 0.030:
d = 0.494
elif x >= 0.024:
d = 0.441
elif x >= 0.0198:
d = 0.397
elif x >= 0.01565:
d = 0.353
elif x >= 0.0123:
d = 0.313
elif x >= 0.0097:
d = 0.278
elif x >= 0.0077:
d = 0.247
elif x >= 0.0061:
d = 0.22
elif x >= 0.0049:
d = 0.198
elif x >= 0.00389:
d = 0.176
elif x >= 0.0032:
d = 0.159
elif x >= 0.0025:
d = 0.141
elif x >= 0:
d = 0.123
return d
#Definition of Objetive function
def objective(x):
sum_obj = 0.0
for i in range(len(L)):
K = 10.674*L[i]/(C**1.852*diameter(x[i])**4.871)
N[i] = K*x[i]**2.852*J+x[i]*J*h[i]
sum_obj = sum_obj + N[i]
return sum_obj
print(str(objective(x0)))
#Definition of the constraints
GB=[]
for i in range(m):
GB.append(n*i*[0]+n*[1]+(m-1-i)*n*[0])
P=[]
for i in range(n):
P.append(m*(i*[0]+[1]+(n-1-i)*[0]))
DU=np.array(GB+P)
lb = np.array(m*[0] + demand) # Supply
ub = np.array(capacity + n*[np.inf]) # Demand
# In[62]:
b = (0, 1)
bnds = []
for i in range(m*n):
bnds.append(b)
cons = LinearConstraint(DU,lb,ub)
solution = differential_evolution(objective,x0,cons,bnds)
x = solution.x
# show initial objective
print('Initial SSE Objective: ' + str(objective(x0)))
# show final objective
print('Final SSE Objective: ' + str(objective(x)))
# print solution
print('Solution Supply to Demand:')
print('Q = ' + str(((np.around(x,6)).reshape(10,18))))
I dont know why, but when I run appear the following:
"if strategy in self._binomial:
TypeError: unhashable type: 'list'
Anyone have gotten the same mistake ? Its my first time trying to solve optimization problem, so I need a little bit of help. Any advice is welcome !!
Thanks a lot !
You're not calling differential_evolution correctly. For your example you should call it as:
differential_evolution(objective, bnds, contraints=(cons,))
(I'm not sure if there are additional problems)

keras change the parameters during training

I have a customized layer to do a simple linear-transformation. like x*w+b. I want to change the w and b during the training, is that possible? For example, I want w1 in the first iteration and w2 in second iteration.(w1 and w2 defined by myself).
Of course, you can do it, but you need to do it in a smart way. Here is some code you can play with.
from keras import backend as K
from keras.layers import *
from keras.models import *
import numpy as np
class MyDense( Layer ) :
def __init__( self, units=64, use_bias=True, **kwargs ) :
super(MyDense, self).__init__( **kwargs )
self.units = units
self.use_bias = use_bias
return
def build( self, input_shape ) :
input_dim = input_shape[-1]
self.count = 0
self.w1 = self.add_weight(shape=(input_dim, self.units), initializer='glorot_uniform', name='w1')
self.w0 = self.add_weight(shape=(input_dim, self.units), initializer='glorot_uniform', name='w0')
if self.use_bias:
self.bias = self.add_weight(shape=(self.units,),initializer='glorot_uniform',name='bias' )
else:
self.bias = None
self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
self.built = True
return
def call( self, x ) :
if self.count % 2 == 1 :
c0, c1 = 0, 1
else :
c0, c1 = 1, 0
w = c0 * self.w0 + c1 * self.w1
self.count += 1
output = K.dot( x, w )
if self.use_bias:
output = K.bias_add(output, self.bias, data_format='channels_last')
return output
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) >= 2
assert input_shape[-1]
output_shape = list(input_shape)
output_shape[-1] = self.units
return tuple(output_shape)
# define a dummy model
x = Input(shape=(128,))
y = MyDense(10)(x)
y = Dense(1, activation='sigmoid')(y)
model = Model(inputs=x, outputs=y)
print model.summary()
# get some dummy data
a = np.random.randn(100,128)
b = (np.random.randn(100,) > 0).astype('int32')
# compile and train
model.compile('adam', 'binary_crossentropy')
model.fit( a, b )
Note: the following code is equivalent to what we did above, but it will NOT work !!!
if self.count % 2 == 1 :
w = self.w0
else :
w = self.w1
Why? Because having zero gradients (the former implementation) for one variable is NOT equivalent to having None gradients (the later implementation).

How to use multiple GPUs effectively when training deep networks?

I am using a machine which has 2 GPUs Titan Black to train my deep learning model which has 3 layers (3x3, 3x3 and 5x5).
The training runs pretty well but when I watch nvidia-smi (watch every 1 sec), I realized that my program uses only one GPU for computation, the second one always 0% even when the first one reach 100%.
I am trying to use tf.device to assign specific tasks for each of them but then they run one-by-one, not in parallel, and the total time was even increased, not reduced (I guess because 2 GPUs had to exchange values with each other)
Below is my program. It is quite messy, maybe you just need to pay attention at the graph where I use tf.device is enough...
Thank you so much!
import tensorflow as tf
import numpy as np
from six.moves import cPickle as pickle
import matplotlib.pyplot as plt
from os import listdir, sys
from os.path import isfile, join
from time import gmtime, strftime
import time
def validatePath(path):
path = path.replace("\\","/")
if (path[len(path)-1] != "/"):
path = path + "/"
return path
hidden_size_default = np.array([16, 32, 64, 32])
cnn1_default = 3
cnn2_default = 3
cnn3_default = 5
SIZE_BATCH_VALID = 200
input_path = 'ARCHIVES-sub-dataset'
output_path = 'ARCHIVES-model'
log_address = "trainlog.txt"
tf.app.flags.DEFINE_integer('h0', hidden_size_default[0], 'Size of hidden layer 0th')
tf.app.flags.DEFINE_integer('h1', hidden_size_default[1], 'Size of hidden layer 1st')
tf.app.flags.DEFINE_integer('h2', hidden_size_default[2], 'Size of hidden layer 2nd')
tf.app.flags.DEFINE_integer('h3', hidden_size_default[3], 'Size of hidden layer 3rd')
tf.app.flags.DEFINE_integer('k1', cnn1_default , 'Size of kernel 1st')
tf.app.flags.DEFINE_integer('k2', cnn2_default , 'Size of kernel 2nd')
tf.app.flags.DEFINE_integer('k3', cnn3_default , 'Size of kernel 3rd')
tf.app.flags.DEFINE_string('input_path', input_path, 'The parent directory which contains 2 directories: dataset and label')
tf.app.flags.DEFINE_string('output_path', output_path, 'The directory which will store models (you have to create)')
tf.app.flags.DEFINE_string('log_address', log_address, 'The file name which will store the log')
FLAGS = tf.app.flags.FLAGS
load_path = FLAGS.input_path
save_model_path = FLAGS.output_path
log_addr = FLAGS.log_address
load_path = validatePath(load_path)
save_model_path = validatePath(save_model_path)
cnn1 = FLAGS.k1
cnn2 = FLAGS.k2
cnn3 = FLAGS.k3
hidden_size = np.array([FLAGS.h0, FLAGS.h1, FLAGS.h2, FLAGS.h3])
# Shuffle the dataset and its label
def randomize(dataset, labels):
permutation = np.random.permutation(labels.shape[0])
shuffled_dataset = dataset[permutation,:]
shuffled_labels = labels[permutation]
return shuffled_dataset, shuffled_labels
def writemyfile(mystring):
with open(log_addr, "a") as myfile:
myfile.write(str(mystring + "\n"))
num_labels = 5
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/ predictions.shape[0])
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def DivideSets(input_set):
length_set = input_set.shape[0]
index_70 = int(length_set*0.7)
index_90 = int(length_set*0.9)
set_train = input_set[0:index_70]
set_valid = input_set[index_70:index_90]
set_test = input_set[index_90:length_set]
return np.float32(set_train), np.float32(set_valid), np.float32(set_test)
# from 1-value labels to 5 values of (0 and 1)
def LabelReconstruct(label_set):
label_set = label_set.astype(int)
new_label_set = np.zeros(shape=(len(label_set),num_labels))
for i in range(len(label_set)):
new_label_set[i][label_set[i]] = 1
return new_label_set.astype(int)
def LoadDataSet(load_path):
list_data = [f for f in listdir(load_path + "dataset/") if isfile(join(load_path + "dataset/", f))]
list_label = [f for f in listdir(load_path + "label/") if isfile(join(load_path + "dataset/", f))]
if list_data.sort() == list_label.sort():
return list_data
else:
print("data and labels are not suitable")
return 0
# load, randomize, normalize images and reconstruct labels
def PrepareData(*arg):
filename = arg[0]
loaded_dataset = pickle.load( open( load_path + "dataset/" + filename, "rb" ))
loaded_labels = pickle.load( open( load_path + "label/" + filename, "rb" ))
if len(arg) == 1:
datasize = len(loaded_labels)
elif len(arg) == 2:
datasize = int(arg[1])
else:
print("not more than 2 arguments please!")
dataset_full,labels_full = randomize(loaded_dataset[0:datasize], loaded_labels[0:datasize])
return NormalizeData(dataset_full), LabelReconstruct(labels_full)
def NormalizeData(dataset):
dataset = dataset - (dataset.mean())
dataset = dataset / (dataset.std())
return dataset
### LOAD DATA
listfiles = LoadDataSet(load_path)
# divide
listfiles_train = listfiles[0:15]
listfiles_valid = listfiles[15:25]
listfiles_test = listfiles[25:len(listfiles)]
graphCNN = tf.Graph()
with graphCNN.as_default():
with tf.device('/gpu:0'):
x = tf.placeholder(tf.float32, shape=(None, 224,224,3)) # X
y_ = tf.placeholder(tf.float32, shape=(None, num_labels)) # Y_
dropout = tf.placeholder(tf.float32)
if dropout == 1.0:
keep_prob = tf.constant([0.2, 0.3, 0.5], dtype=tf.float32)
else:
keep_prob = tf.constant([1.0, 1.0, 1.0], dtype=tf.float32)
weights_1 = weight_variable([cnn1,cnn1,3, hidden_size[0]])
biases_1 = bias_variable([hidden_size[0]])
weights_2 = weight_variable([cnn2,cnn2,hidden_size[0], hidden_size[1]])
biases_2 = bias_variable([hidden_size[1]])
weights_3 = weight_variable([cnn3,cnn3,hidden_size[1], hidden_size[2]])
biases_3 = bias_variable([hidden_size[2]])
weights_4 = weight_variable([56 * 56 * hidden_size[2], hidden_size[3]])
biases_4 = bias_variable([hidden_size[3]])
weights_5 = weight_variable([hidden_size[3], num_labels])
biases_5 = bias_variable([num_labels])
def model(data):
with tf.device('/gpu:1'):
train_hidden_1 = tf.nn.relu(conv2d(data, weights_1) + biases_1)
train_hidden_2 = max_pool_2x2(tf.nn.relu(conv2d(train_hidden_1, weights_2) + biases_2))
train_hidden_2_drop = tf.nn.dropout(train_hidden_2, keep_prob[0])
train_hidden_3 = max_pool_2x2(tf.nn.relu(conv2d(train_hidden_2_drop, weights_3) + biases_3))
train_hidden_3_drop = tf.nn.dropout(train_hidden_3, keep_prob[1])
train_hidden_3_drop = tf.reshape(train_hidden_3_drop,[-1, 56 * 56 * hidden_size[2]])
train_hidden_4 = tf.nn.relu(tf.matmul(train_hidden_3_drop, weights_4) + biases_4)
train_hidden_4_drop = tf.nn.dropout(train_hidden_4, keep_prob[2])
logits = tf.matmul(train_hidden_4_drop, weights_5) + biases_5
return logits
t_train_labels = tf.argmax(y_, 1) # From one-hot (one and zeros) vectors to values
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=model(x), labels=t_train_labels))
optimizer = tf.train.AdamOptimizer(0.01).minimize(loss)
y = tf.nn.softmax(model(x))
### RUNNING
print("log address: %s" % (log_addr))
#num_steps = 10001
times_repeat = 20 # number of epochs
batch_size = 100
with tf.Session(graph=graphCNN,config=tf.ConfigProto(log_device_placement=True)) as session:
tf.initialize_all_variables().run()
saver = tf.train.Saver(max_to_keep=0)
writemyfile("---ARCHIVES_M1----")
mytime = strftime("%Y-%m-%d %H:%M:%S", time.localtime())
writemyfile(str("\nTime: %s \nLayers: %d,%d,%d \epochs: %d" % (mytime,cnn1,cnn2,cnn3,times_repeat)))
writemyfile("Train files:" + str(listfiles_train))
writemyfile("Valid files:" + str(listfiles_valid))
writemyfile("Test files:" + str(listfiles_test))
print("Model will be saved in file: %s" % save_model_path)
writemyfile(str("Model will be saved in file: %s" % save_model_path))
### TRAINING & VALIDATION
valid_accuracies_epochs = np.array([])
for time_repeat in range(times_repeat):
print("- time_repeat:",time_repeat)
writemyfile("- time_repeat:"+str(time_repeat))
for file_train in listfiles_train:
file_train_id = int(file_train[0:len(file_train)-4])
time_start_this_file = time.time()
#LOAD DATA
print("- - file:",file_train_id, end=' ')
writemyfile("- - file:" + str(file_train_id))
Data_train, Label_train= PrepareData(file_train)
for step in range(0,len(Data_train)-batch_size,batch_size):
batch_data = Data_train[step:step+batch_size]
batch_labels = Label_train[step:step+batch_size]
feed_dict = {x : batch_data, y_ : batch_labels, dropout: 1.0}
opti, l, predictions = session.run([optimizer, loss, y], feed_dict=feed_dict)
train_accuracies = np.array([])
for index_tr_accu in range(0,len(Data_train)-SIZE_BATCH_VALID,SIZE_BATCH_VALID):
current_predictions = y.eval(feed_dict={x: Data_train[index_tr_accu:index_tr_accu+SIZE_BATCH_VALID],dropout: 0.0})
current_accuracy = accuracy(current_predictions, Label_train[index_tr_accu:index_tr_accu+SIZE_BATCH_VALID])
train_accuracies = np.r_[train_accuracies,current_accuracy]
train_accuracy = train_accuracies.mean()
print("batch accu: %.2f%%" %(train_accuracy),end=" | ")
writemyfile("batch accu: %.2f%%" %(train_accuracy))
time_done_this_file = time.time() - time_start_this_file
print("time: %.2fs" % (time_done_this_file))
writemyfile("time: %.2fs" % (time_done_this_file))
# save model
model_addr = save_model_path + "model335" + "-epoch-" + str(time_repeat) + ".ckpt"
save_path = saver.save(session, model_addr,) # max_to_keep default was 5
mytime = strftime("%Y-%m-%d %H:%M:%S", time.localtime())
print("epoch finished at %s \n model address: %s" % (mytime,model_addr))
writemyfile("epoch finished at %s \n model address: %s" % (mytime,model_addr))
# validation
valid_accuracies = np.array([])
for file_valid in listfiles_valid:
file_valid_id = int(file_valid[0:len(file_valid)-4])
Data_valid, Label_valid = PrepareData(file_valid)
for index_vl_accu in range(0,len(Data_valid)-SIZE_BATCH_VALID,SIZE_BATCH_VALID):
current_predictions = y.eval(feed_dict={x: Data_valid[index_vl_accu:index_vl_accu+SIZE_BATCH_VALID],dropout: 0.0})
current_accuracy = accuracy(current_predictions, Label_valid[index_vl_accu:index_vl_accu+SIZE_BATCH_VALID])
valid_accuracies = np.r_[valid_accuracies,current_accuracy]
valid_accuracy = valid_accuracies.mean()
print("epoch %d - valid accu: %.2f%%" %(time_repeat,valid_accuracy))
writemyfile("epoch %d - valid accu: %.2f%%" %(time_repeat,valid_accuracy))
valid_accuracies_epochs = np.hstack([valid_accuracies_epochs,valid_accuracy])
print('Done!!')
writemyfile(str('Done!!'))
session.close()
Update: I found cifar10_multi_gpu_train.py seems to be a good example for training with multi GPUs, but honestly I don't know how to apply on my case.
I think you need to change
def model(data):
with tf.device('/gpu:1'):
to:
def model(data):
for d in ['/gpu:0', '/gpu:1']:
with tf.device(d):
and ditch the line with tf.device('/gpu:0'):
Since at the first with tf.device... you are only doing initiation
of variables and then you are resetting your devices with the next with tf.device.
Let me know if this works since I can't test it.

Resources