No gradients provided for any variable

No gradients provided for any variable - machine-learning

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
Nclass = 500
D = 2
M = 3
K = 3
X1 = np.random.randn(Nclass, D) + np.array([0, -2])
X2 = np.random.randn(Nclass, D) + np.array([2, 2])
X3 = np.random.randn(Nclass, D) + np.array([-2, 2])
X = np.vstack ([X1, X2, X3]).astype(np.float32)
Y = np.array([0]*Nclass + [1]*Nclass + [2]*Nclass)
plt.scatter(X[:,0], X[:,1], c=Y, s=100, alpha=0.5)
plt.show()
N = len(Y)
T = np.zeros((N, K))
for i in range(N):
T[i, Y[i]] = 1
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def forward(X, W1, b1, W2, b2):
Z = tf.nn.sigmoid(tf.matmul(X, W1) + b1)
return tf.matmul(Z, W2) + b2
tfX = tf.placeholder(tf.float32, [None, D])
tfY = tf.placeholder(tf.float32, [None, K])
W1 = init_weights([D, M])
b1 = init_weights([M])
W2 = init_weights([M, K])
b2 = init_weights([K])
py_x = forward(tfX, W1, b1, W2, b2)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=py_x, logits=T))
train_op = tf.train.GradientDescentOptimizer(0.05).minimize(cost)
predict_op = tf.argmax(py_x, 1)
sess = tf.Session()
inti = tf.initizalize_all_variables()
for i in range(1000):
sess.run(train_op, feed_dict={tfX: X, tfY: T})
pred = sess.run(predict_op, feed_dict={tfX: X, tfY: T})
if i % 10 == 0:
print(np.mean(Y == pred))
I have a little issue :
Traceback (most recent call last):
File "test.py", line 45, in <module>
train_op = tf.train.GradientDescentOptimizer(0.05).minimize(cost)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/optimizer.py", line 322, in minimize
([str(v) for _, v in grads_and_vars], loss))
ValueError: No gradients provided for any variable, check your graph for ops that do not support gradients, between variables ["<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32_ref>", "<tf.Variable 'Variable_1:0' shape=(3,) dtype=float32_ref>", "<tf.Variable 'Variable_2:0' shape=(3, 3) dtype=float32_ref>", "<tf.Variable 'Variable_3:0' shape=(3,) dtype=float32_ref>"] and loss Tensor("Mean:0", shape=(), dtype=float64).
It is unclear what I have to do here. Could anyone be able to help me at this point?

If T are the true labels and py_x the network outputs, you will have to switch the arguments in the cross entropy function:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=T, logits=py_x))
The logits must be the network outputs and the labels must be the true labels. If you confuse the arguments, the optimizer will fail to backpropagate, since there will be no gradient.
You also have to initialize your variables before training; your code lacks a sess.run(init) statement (you also had a typo in your initialize_all_variables().
I also shuffled your data; maybe it will lead to faster convergence towards the labels.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
Nclass = 500
D = 2
M = 3
K = 3
X1 = np.random.randn(Nclass, D) + np.array([0, -2])
X2 = np.random.randn(Nclass, D) + np.array([2, 2])
X3 = np.random.randn(Nclass, D) + np.array([-2, 2])
X = np.vstack ([X1, X2, X3]).astype(np.float32)
Y = np.array([0]*Nclass + [1]*Nclass + [2]*Nclass)
perm = np.random.permutation(len(X))
X = X[perm]
Y = Y[perm]
# plt.scatter(X[:,0], X[:,1], c=Y, s=100, alpha=0.5)
# plt.show()
N = len(Y)
T = np.zeros((N, K))
for i in range(N):
T[i, Y[i]] = 1
print(T)
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def forward(X, W1, b1, W2, b2):
Z = tf.nn.sigmoid(tf.matmul(X, W1) + b1)
return tf.matmul(Z, W2) + b2
tfX = tf.placeholder(tf.float32, [None, D])
tfY = tf.placeholder(tf.float32, [None, K])
W1 = init_weights([D, M])
b1 = init_weights([M])
W2 = init_weights([M, K])
b2 = init_weights([K])
py_x = forward(tfX, W1, b1, W2, b2)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=T, logits=py_x))
train_op = tf.train.GradientDescentOptimizer(0.1).minimize(cost)
predict_op = tf.argmax(py_x, 1)
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
for i in range(1000):
sess.run(train_op, feed_dict={tfX: X, tfY: T})
pred = sess.run(predict_op, feed_dict={tfX: X, tfY: T})
if i % 10 == 0:
print(np.mean(Y == pred))

It figured out that you should run inti i.e.
inti = tf.initialize_all_variables()
sess.run(inti)
before running the GradientDescentOptimizer

Related

Pytorch, slicing tensor causes RuntimeError:: one of the variables needed for gradient computation has been modified by an inplace operation:

I wrote a RNN with LSTM cell with Pycharm. The peculiarity of this network is that the output of the RNN is fed into a integration opeartion, computed with Runge-kutta.
The integration takes some input and propagate that in time one step ahead. In order to do so I need to slice the feature tensor X along the batch dimension, and pass this to the Runge-kutta.
class MyLSTM(torch.nn.Module):
def __init__(self, ni, no, sampling_interval, nh=10, nlayers=1):
super(MyLSTM, self).__init__()
self.device = torch.device("cpu")
self.dtype = torch.float
self.ni = ni
self.no = no
self.nh = nh
self.nlayers = nlayers
self.lstms = torch.nn.ModuleList(
[torch.nn.LSTMCell(self.ni, self.nh)] + [torch.nn.LSTMCell(self.nh, self.nh) for i in range(nlayers - 1)])
self.out = torch.nn.Linear(self.nh, self.no)
self.do = torch.nn.Dropout(p=0.2)
self.actfn = torch.nn.Sigmoid()
self.sampling_interval = sampling_interval
self.scaler_states = None
# Options
# description of the whole block
def forward(self, x, h0, train=False, integrate_ode=True):
x0 = x.clone().requires_grad_(True)
hs = x # initiate hidden state
if h0 is None:
h = torch.zeros(hs.shape[0], self.nh, device=self.device)
c = torch.zeros(hs.shape[0], self.nh, device=self.device)
else:
(h, c) = h0
# LSTM cells
for i in range(self.nlayers):
h, c = self.lstms[i](hs, (h, c))
if train:
hs = self.do(h)
else:
hs = h
# Output layer
# y = self.actfn(self.out(hs))
y = self.out(hs)
if integrate_ode:
p = y
y = self.integrate(x0, p)
return y, (h, c)
def integrate(self, x0, p):
# RK4 steps per interval
M = 4
DT = self.sampling_interval / M
X = x0
# X = self.scaler_features.inverse_transform(x0)
for b in range(X.shape[0]):
xx = X[b, :]
for j in range(M):
k1 = self.ode(xx, p[b, :])
k2 = self.ode(xx + DT / 2 * k1, p[b, :])
k3 = self.ode(xx + DT / 2 * k2, p[b, :])
k4 = self.ode(xx + DT * k3, p[b, :])
xx = xx + DT / 6 * (k1 + 2 * k2 + 2 * k3 + k4)
X_all[b, :] = xx
return X_all
def ode(self, x0, y):
# Here I a dynamic model
I get this error:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor []], which is output 0 of SelectBackward, is at version 64; expected version 63 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
the problem is in the operations xx = X[b, :] and p[b,:]. I know that because I choose batch dimension of 1, then I can replace the previous two equations with xx=X and p, and this works. How can split the tensor without loosing the gradient?

I had the same question, and after a lot of searching, I added .detach() function after "h" and "c" in the RNN cell.

Logistic Regression not able to find value of theta

I have hundred Entries in csv file.
Physics,Maths,Status_class0or1
30,40,0
90,70,1
Using above data i am trying to build logistic (binary) classifier.
Please advise me where i am doing wrong ? Why i am getting answer in 3*3 Matrix (9 values of theta, where as it should be 3 only)
Here is code:
importing the libraries
import numpy as np
import pandas as pd
from sklearn import preprocessing
reading data from csv file.
df = pd.read_csv("LogisticRegressionFirstBinaryClassifier.csv", header=None)
df.columns = ["Maths", "Physics", "AdmissionStatus"]
X = np.array(df[["Maths", "Physics"]])
y = np.array(df[["AdmissionStatus"]])
X = preprocessing.normalize(X)
X = np.c_[np.ones(X.shape[0]), X]
theta = np.ones((X.shape[1], 1))
print(X.shape) # (100, 3)
print(y.shape) # (100, 1)
print(theta.shape) # (3, 1)
calc_z to caculate dot product of X and theta
def calc_z(X,theta):
return np.dot(X,theta)
Sigmoid function
def sigmoid(z):
return 1 / (1 + np.exp(-z))
Cost_function
def cost_function(X, y, theta):
z = calc_z(X,theta)
h = sigmoid(z)
return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
print("cost_function =" , cost_function(X, y, theta))
def derivativeofcostfunction(X, y, theta):
z = calc_z(X,theta)
h = sigmoid(z)
calculation = np.dot((h - y).T,X)
return calculation
print("derivativeofcostfunction=", derivativeofcostfunction(X, y, theta))
def grad_desc(X, y, theta, lr=.001, converge_change=.001):
cost = cost_function(X, y, theta)
change_cost = 1
num_iter = 1
while(change_cost > converge_change):
old_cost = cost
print(theta)
print (derivativeofcostfunction(X, y, theta))
theta = theta - lr*(derivativeofcostfunction(X, y, theta))
cost = cost_function(X, y, theta)
change_cost = old_cost - cost
num_iter += 1
return theta, num_iter
Here is the output :
[[ 0.4185146 -0.56877556 0.63999433]
[15.39722864 9.73995197 11.07882445]
[12.77277463 7.93485324 9.24909626]]
[[0.33944777 0.58199037 0.52493407]
[0.02106587 0.36300629 0.30297278]
[0.07040604 0.3969297 0.33737757]]
[[-0.05856159 -0.89826735 0.30849185]
[15.18035041 9.59004868 10.92827046]
[12.4804775 7.73302024 9.04599788]]
[[0.33950634 0.58288863 0.52462558]
[0.00588552 0.35341624 0.29204451]
[0.05792556 0.38919668 0.32833157]]
[[-5.17526527e-01 -1.21534937e+00 -1.03387571e-02]
[ 1.49729502e+01 9.44663458e+00 1.07843504e+01]
[ 1.21978140e+01 7.53778010e+00 8.84964495e+00]]
(array([[ 0.34002386, 0.58410398, 0.52463592],
[-0.00908743, 0.34396961, 0.28126016],
[ 0.04572775, 0.3816589 , 0.31948193]]), 46)

I changed this code , just added Transpose while returning the matrix and it fixed my issue.
def derivativeofcostfunction(X, y, theta):
z = calc_z(X,theta)
h = sigmoid(z)
calculation = np.dot((h - y).T,X)
return calculation.T

Gradient Descent cost function explosion

I am writing this code for linear regression and trying Gradient Descent to minimize the RSS. The cost function seems to explode to infinity within 12 iterations. I know this is not supposed to happen. Maybe, I have used the wrong gradient function for RSS (can be seen in the function "grad()")?
NumberObservations=100
minVal=1
maxVal=20
X = np.random.uniform(minVal,maxVal,(NumberObservations,1))
e = np.random.normal(0, 1, (NumberObservations,1))
Y= 10 + 5*X + e
B = np.array([[0], [0]])
sum_y = sum(Y)
sum_x = sum(X)
sum_xy = sum(np.multiply(X, Y))
sum_x2 = sum(X*X)
alpha = 0.00001
iterations = 15
def cost_fun(X, Y, B):
b0 = B[0]
b1 = B[1]
s = (Y - (b0 + (b1*X)))**2
rss = sum(s)
return rss
def grad(X, Y, B):
print("B = " + str(B))
b0 = B[0]
b1 = B[1]
g0 = -2*(Y - b0 - (b1*X))
g1 = -2*((X*Y) - (b0*X) - (b1*X**2))
grad = np.concatenate((g0, g1), axis = 1)
return grad
def gradient_descent(X, Y, B, alpha, iterations):
cost_history = [0] * iterations
m = len(Y)
x0 = np.array(np.ones(m))
x0 = x0.reshape((100, 1))
X1 = np.concatenate((x0, X), axis = 1)
for iteration in range(iterations):
h = np.dot(X1, B)
h = h.reshape((100, 1))
loss = h - Y
g = grad(X, Y, B)
gradient = (np.dot(g.T, loss) / m)
B = B - alpha * gradient
cost = cost_fun(X, Y, B)
cost_history[iteration] = cost
print("Iteration %d | Cost: %f" % (iteration, cost))
print("-----------------------------------------------------------------------")
return B, cost_history
newB, cost_history = gradient_descent(X, Y, B, alpha, iterations)
# New Values of B
print(newB)
Please help.

Building a non linear model with ReLUs in TensorFlow

I'm trying to build a simple non-linear model in TensorFlow. I have created this sample data:
x_data = np.arange(-100, 100).astype(np.float32)
y_data = np.abs(x_data + 20.)
I guess this shape should be easily reconstructed using a couple of ReLUs, but I can't figure out how.
So far, my approach is to wrap linear components with ReLUs, but this doesn't run:
W1 = tf.Variable(tf.random_uniform([1], -1.0, 1.0))
W2 = tf.Variable(tf.random_uniform([1], -1.0, 1.0))
b1 = tf.Variable(tf.zeros([1]))
b2 = tf.Variable(tf.zeros([1]))
y = tf.nn.relu(W1 * x_data + b1) + tf.nn.relu(W2 * x_data + b2)
Any ideas about how to express this model using ReLUs in TensorFlow?

I think you're asking how to combine ReLUs in a working model? Two options are shown below:
Option 1) Input of ReLU1 into ReLU2
This is probably the preferred method. Note that r1 is the input to r2.
x = tf.placeholder('float', shape=[None, 1])
y_ = tf.placeholder('float', shape=[None, 1])
W1 = weight_variable([1, hidden_units])
b1 = bias_variable([hidden_units])
r1 = tf.nn.relu(tf.matmul(x, W1) + b1)
# Input of r1 into r2 (which is just y)
W2 = weight_variable([hidden_units, 1])
b2 = bias_variable([1])
y = tf.nn.relu(tf.matmul(r1,W2)+b2) # ReLU2
Option 2) Add ReLU1 and ReLU2
Option 2 was listed in the original question, but I don't know if this is what you really want...read below for a full working example and try it. I think you'll find it doesn't model well.
x = tf.placeholder('float', shape=[None, 1])
y_ = tf.placeholder('float', shape=[None, 1])
W1 = weight_variable([1, hidden_units])
b1 = bias_variable([hidden_units])
r1 = tf.nn.relu(tf.matmul(x, W1) + b1)
# Add r1 to r2 -- won't be able to reduce the error.
W2 = weight_variable([1, hidden_units])
b2 = bias_variable([hidden_units])
r2 = tf.nn.relu(tf.matmul(x, W2) + b2)
y = tf.add(r1,r2) # Again, ReLU2 is just y
Full Working Example
Below is a full working example. By default it uses option 1, however, option 2 is also included in the comments.
from __future__ import print_function
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Config the matlotlib backend as plotting inline in IPython
%matplotlib inline
episodes = 55
batch_size = 5
hidden_units = 10
learning_rate = 1e-3
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
# Produce the data
x_data = np.arange(-100, 100).astype(np.float32)
y_data = np.abs(x_data + 20.)
# Plot it.
plt.plot(y_data)
plt.ylabel('y_data')
plt.show()
# Might want to randomize the data
# np.random.shuffle(x_data)
# y_data = np.abs(x_data + 20.)
# reshape data ...
x_data = x_data.reshape(200, 1)
y_data = y_data.reshape(200, 1)
# create placeholders to pass the data to the model
x = tf.placeholder('float', shape=[None, 1])
y_ = tf.placeholder('float', shape=[None, 1])
W1 = weight_variable([1, hidden_units])
b1 = bias_variable([hidden_units])
r1 = tf.nn.relu(tf.matmul(x, W1) + b1)
# Input of r1 into r2 (which is just y)
W2 = weight_variable([hidden_units, 1])
b2 = bias_variable([1])
y = tf.nn.relu(tf.matmul(r1,W2)+b2)
# OPTION 2
# Add r1 to r2 -- won't be able to reduce the error.
#W2 = weight_variable([1, hidden_units])
#b2 = bias_variable([hidden_units])
#r2 = tf.nn.relu(tf.matmul(x, W2) + b2)
#y = tf.add(r1,r2)
mean_square_error = tf.reduce_sum(tf.square(y-y_))
training = tf.train.AdamOptimizer(learning_rate).minimize(mean_square_error)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
min_error = np.inf
for _ in range(episodes):
# iterrate trough every row (with batch size of 1)
for i in range(x_data.shape[0]-batch_size+1):
_, error = sess.run([training, mean_square_error], feed_dict={x: x_data[i:i+batch_size], y_:y_data[i:i+batch_size]})
if error < min_error :
min_error = error
if min_error < 3:
print(error)
#print(error)
#print(error, x_data[i:i+batch_size], y_data[i:i+batch_size])
# error = sess.run([training, mean_square_error], feed_dict={x: x_data[i:i+batch_size], y_:y_data[i:i+batch_size]})
# if error != None:
# print(error)
sess.close()
print("\n\nmin_error:",min_error)
It might be easier to see in a jupiter notebook here

Here is a simple feedforward network with one hidden layer.
import numpy as np
import tensorflow as tf
episodes = 55
batch_size = 5
hidden_units = 10
learning_rate = 1e-3
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
# normalize the data and shuffle them
x_data = np.arange(0, 1, 0.005).astype(float)
np.random.shuffle(x_data)
y_data = np.abs(x_data + .1)
# reshape data ...
x_data = x_data.reshape(200, 1)
y_data = y_data.reshape(200, 1)
# create placeholders to pass the data to the model
x = tf.placeholder('float', shape=[None, 1])
y_ = tf.placeholder('float', shape=[None, 1])
W1 = weight_variable([1, hidden_units])
b1 = bias_variable([hidden_units])
h1 = tf.nn.relu(tf.matmul(x, W1) + b1)
W2 = weight_variable([hidden_units, 1])
b2 = bias_variable([1])
y = tf.matmul(h1, W2) + b2
mean_square_error = tf.reduce_sum(tf.square(y-y_))
training = tf.train.AdamOptimizer(learning_rate).minimize(mean_square_error)
sess = tf.InteractiveSession()
sess.run(tf.initialize_all_variables())
for _ in xrange(episodes):
# iterrate trough every row (with batch size of 1)
for i in xrange(x_data.shape[0]-batch_size+1):
_, error = sess.run([training, mean_square_error], feed_dict={x: x_data[i:i+batch_size], y_:y_data[i:i+batch_size]})
#print error
print error, x_data[i:i+batch_size], y_data[i:i+batch_size]
error = sess.run([training, mean_square_error], feed_dict={x: x_data[i:i+batch_size], y_:y_data[i:i+batch_size]})
print error

Inspired by all responses I managed to train this model by using the proposed model in the accepted answer. Here is the code:
import tensorflow as tf
import numpy as np
# Create 200 x, y data points in NumPy to represent the function
x_data = np.arange(-100, 100).astype(np.float32)
y_data = np.abs(x_data + 20.)
W1 = tf.Variable(tf.random_uniform([1], -1.0, 1.0))
b1 = tf.Variable(tf.zeros([1]))
W2 = tf.Variable(tf.random_uniform([1], -1.0, 1.0))
b2 = tf.Variable(tf.zeros([1]))
y = tf.nn.relu(W1 * x_data + b1) + tf.nn.relu(W2 * x_data + b2)
# Minimize the mean squared errors.
mean_square_error = tf.reduce_sum(tf.square(y-y_data))
train = tf.train.AdamOptimizer(learning_rate).minimize(mean_square_error)
sess = tf.Session()
init = tf.initialize_all_variables()
sess.run(init)
# Fit the non-linear function.
for step in xrange(50000):
sess.run(train)
if step % 10000 == 0:
#Expected values: W1 = 1., W2 = -1., b1 = 20., b2 = -20.
print(step, sess.run(W1), sess.run(b1), sess.run(W2), sess.run(b2))

A Simple Network on TensorFlow

I was trying to train a very simple model on TensorFlow. Model takes a single float as input and returns the probability of input being greater than 0. I used 1 hidden layer with 10 hidden units. Full code is shown below:
import tensorflow as tf
import random
# Graph construction
x = tf.placeholder(tf.float32, shape = [None,1])
y_ = tf.placeholder(tf.float32, shape = [None,1])
W = tf.Variable(tf.random_uniform([1,10],0.,0.1))
b = tf.Variable(tf.random_uniform([10],0.,0.1))
layer1 = tf.nn.sigmoid( tf.add(tf.matmul(x,W), b) )
W1 = tf.Variable(tf.random_uniform([10,1],0.,0.1))
b1 = tf.Variable(tf.random_uniform([1],0.,0.1))
y = tf.nn.sigmoid( tf.add( tf.matmul(layer1,W1),b1) )
loss = tf.square(y - y_)
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
# Training
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
N = 1000
while N != 0:
batch = ([],[])
u = random.uniform(-10.0,+10.0)
if u >= 0.:
batch[0].append([u])
batch[1].append([1.0])
if u < 0.:
batch[0].append([u])
batch[1].append([0.0])
sess.run(train_step, feed_dict = {x : batch[0] , y_ : batch[1]} )
N -= 1
while(True):
u = raw_input("Give an x\n")
print sess.run(y, feed_dict = {x : [[u]]})
The problem is, I am getting terribly unrelated results. Model does not learn anything and returns irrelevant probabilities. I tried to adjust learning rate and change variable initialization, but I did not get anything useful. Do you have any suggestions?

You are computing only one probability what you want is to have two classes:
greater/equal than zero.
less than zero.
So the output of the network will be a tensor of shape two that will contain the probabilities of each class. I renamed y_ in your example to labels:
labels = tf.placeholder(tf.float32, shape = [None,2])
Next we compute the cross entropy between the result of the network and the expected classification. The classes for positive numbers would be [1.0, 0] and for negative numbers would be [0.0, 1.0].
The loss function becomes:
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, labels)
loss = tf.reduce_mean(cross_entropy)
I renamed the y to logits as that is a more descriptive name.
Training this network for 10000 steps gives:
Give an x
3.0
[[ 0.96353203 0.03686807]]
Give an x
200
[[ 0.97816485 0.02264325]]
Give an x
-20
[[ 0.12095013 0.87537241]]
Full code:
import tensorflow as tf
import random
# Graph construction
x = tf.placeholder(tf.float32, shape = [None,1])
labels = tf.placeholder(tf.float32, shape = [None,2])
W = tf.Variable(tf.random_uniform([1,10],0.,0.1))
b = tf.Variable(tf.random_uniform([10],0.,0.1))
layer1 = tf.nn.sigmoid( tf.add(tf.matmul(x,W), b) )
W1 = tf.Variable(tf.random_uniform([10, 2],0.,0.1))
b1 = tf.Variable(tf.random_uniform([1],0.,0.1))
logits = tf.nn.sigmoid( tf.add( tf.matmul(layer1,W1),b1) )
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, labels)
loss = tf.reduce_mean(cross_entropy)
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
# Training
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
N = 1000
while N != 0:
batch = ([],[])
u = random.uniform(-10.0,+10.0)
if u >= 0.:
batch[0].append([u])
batch[1].append([1.0, 0.0])
if u < 0.:
batch[0].append([u])
batch[1].append([0.0, 1.0])
sess.run(train_step, feed_dict = {x : batch[0] , labels : batch[1]} )
N -= 1
while(True):
u = raw_input("Give an x\n")
print sess.run(logits, feed_dict = {x : [[u]]})

Develop Reference

ios ruby-on-rails asp.net-mvc docker delphi jenkins grails google-sheets machine-learning dart

No gradients provided for any variable - machine-learning

It figured out that you should run inti i.e. inti = tf.initialize_all_variables() sess.run(inti) before running the GradientDescentOptimizer

Related

Pytorch, slicing tensor causes RuntimeError:: one of the variables needed for gradient computation has been modified by an inplace operation:

Logistic Regression not able to find value of theta

Gradient Descent cost function explosion

Building a non linear model with ReLUs in TensorFlow

A Simple Network on TensorFlow

Categories

Resources