power law and truncated power law curve fitting problem in python - probability-distribution

Recently, I am making a code that allows curve fitting (power law and truncated power law) in a log-log scale graph using a series of data.
I found the 'from scipy.optimize import curve_fit' module, so the code I made it is as follows:
Power law (the x and y value of data is self.fx_ti and self.fy_pti, respectively)
def Powerlaw (self, event):
fig, ax = plt.subplots(2,1)
ax[0].plot(self.fx_ti, self.fy_pti, 'ko', alpha = 0.7, markersize = 4)
ax[0].set_title("Probability Distribution")
ax[0].set(ylabel = "P" + r"($\tau_i$) " + r'$s^{-1}$', xlabel = r"$\tau _i$"+" (s)")
def power_law(ti, a0, mi):
return a0*ti**(-mi)
popt, pcov = curve_fit(power_law, self.fx_ti, self.fy_pti, p0 = [0,0])
ax[0].plot(self.fx_ti, power_law(self.fx_ti, *popt), 'b', lw = 1)
res = self.fy_pti - power_law(self.fx_ti, *popt)
ax[1].plot(self.fx_ti, res, "ko", markersize = 4)
ax[0].set_xscale('log')
ax[0].set_yscale('log')
plt.ion
plt.show()
I think a pretty good fitting occurs when running the code.
But the problem is in 'truncated power law' fitting.
There is nothing different from general power law fitting.
The code I made it is as follows :
def Truncated (self, event):
fig, ax = plt.subplots(2,1)
ax[0].plot(self.fx_ti, self.fy_pti, 'ko', alpha = 0.7, markersize = 4)
ax[0].set_title("Probability Distribution")
ax[0].set(ylabel = "P" + r"($\tau_i$) " + r'$s^{-1}$', xlabel = r"$\tau _i$"+" (s)")
def truncated_power_law(x, a, b, c):
return a*x**(-b)*np.exp(np.multiply(-1,x)/c)
popt, pcov = curve_fit(truncated_power_law, self.fx_ti, self.fy_pti)
ax[0].plot(self.fx_ti, popt[0]*self.fx_ti**(-popt[1])*np.exp(np.multiply(-1,self.fx_ti)/popt[2]), 'b', lw = 1)
res = self.fy_pti - truncated_power_law(self.fx_ti, *popt)
ax[1].plot(self.fx_ti, res, "ko", markersize = 4)
ax[0].set_xscale('log')
ax[0].set_yscale('log')
plt.ion
plt.show()
You can see the result in the picture I attached.
Please python code geniuses...help...please
enter image description here

Related

Saving Gradient in Backward Pass Google-JAX

I am using JAX to implement a simple neural network (NN) and I want to access and save the gradients from the backward pass for further analysis after the NN ran. I can access and look at the gradients temporarily with the python debugger (as long as I am not using jit). But I want to save all gradients over the whole training process and analyze them after the training is done. I have come up with a rather hacky solution for this using id_tap and a global variable (see the code below). But I was wondering whether there is a better solution which does not violate the functional principles of JAX.
Many thanks!
import jax.numpy as jnp
from jax import grad, jit, vmap, random, custom_vjp
from jax.experimental.host_callback import id_tap
# experimental solution
global_save_list = {'x':[],'w':[],'g':[],'des':[]}
def global_save_func(ctx, des):
x, w, g = ctx
global_save_list['x'].append(x)
global_save_list['w'].append(w)
global_save_list['g'].append(g)
global_save_list['des'].append(des)
#custom_vjp
def qmvm(x, w):
return jnp.dot(x, w)
def qmvm_fwd(x, w):
return qmvm(x, w), (x, w)
def qmvm_bwd(ctx, g):
x, w = ctx
# here I would like to save gradients g - or at least running statistics of them
# experimental solution with id_tap
id_tap(global_save_func, ((x, w, g)))
fwd_grad = jnp.dot(g, w.transpose())
w_grad = jnp.dot(x, g.transpose())
return fwd_grad, w_grad
qmvm.defvjp(qmvm_fwd, qmvm_bwd)
def run_nn(x, w):
out = qmvm(x, w) # 1st MVM
out = qmvm(out, w) # 2nd MVM
return out
run_nn_batched = vmap(run_nn)
#jit
def loss(x, w, target):
out = run_nn_batched(x, w)
return jnp.sum((out - target)**2)
key = random.PRNGKey(42)
subkey1, subkey2, subkey3 = random.split(key, 3)
A = random.uniform(subkey1, (10, 10, 10), minval = -10, maxval = 10)
B = random.uniform(subkey2, (10, 10, 10), minval = -10, maxval = 10)
C = random.uniform(subkey3, (10, 10, 10), minval = -10, maxval = 10)
for e in range(10):
gval = grad(loss, argnums = 0)(A, B, C)
# some type of update rule
# here I would like to access gradients, preferably knowing to which MVM (1st or 2nd) and example they belong
# experimental solution:
print(global_save_list)

Pytorch, slicing tensor causes RuntimeError:: one of the variables needed for gradient computation has been modified by an inplace operation:

I wrote a RNN with LSTM cell with Pycharm. The peculiarity of this network is that the output of the RNN is fed into a integration opeartion, computed with Runge-kutta.
The integration takes some input and propagate that in time one step ahead. In order to do so I need to slice the feature tensor X along the batch dimension, and pass this to the Runge-kutta.
class MyLSTM(torch.nn.Module):
def __init__(self, ni, no, sampling_interval, nh=10, nlayers=1):
super(MyLSTM, self).__init__()
self.device = torch.device("cpu")
self.dtype = torch.float
self.ni = ni
self.no = no
self.nh = nh
self.nlayers = nlayers
self.lstms = torch.nn.ModuleList(
[torch.nn.LSTMCell(self.ni, self.nh)] + [torch.nn.LSTMCell(self.nh, self.nh) for i in range(nlayers - 1)])
self.out = torch.nn.Linear(self.nh, self.no)
self.do = torch.nn.Dropout(p=0.2)
self.actfn = torch.nn.Sigmoid()
self.sampling_interval = sampling_interval
self.scaler_states = None
# Options
# description of the whole block
def forward(self, x, h0, train=False, integrate_ode=True):
x0 = x.clone().requires_grad_(True)
hs = x # initiate hidden state
if h0 is None:
h = torch.zeros(hs.shape[0], self.nh, device=self.device)
c = torch.zeros(hs.shape[0], self.nh, device=self.device)
else:
(h, c) = h0
# LSTM cells
for i in range(self.nlayers):
h, c = self.lstms[i](hs, (h, c))
if train:
hs = self.do(h)
else:
hs = h
# Output layer
# y = self.actfn(self.out(hs))
y = self.out(hs)
if integrate_ode:
p = y
y = self.integrate(x0, p)
return y, (h, c)
def integrate(self, x0, p):
# RK4 steps per interval
M = 4
DT = self.sampling_interval / M
X = x0
# X = self.scaler_features.inverse_transform(x0)
for b in range(X.shape[0]):
xx = X[b, :]
for j in range(M):
k1 = self.ode(xx, p[b, :])
k2 = self.ode(xx + DT / 2 * k1, p[b, :])
k3 = self.ode(xx + DT / 2 * k2, p[b, :])
k4 = self.ode(xx + DT * k3, p[b, :])
xx = xx + DT / 6 * (k1 + 2 * k2 + 2 * k3 + k4)
X_all[b, :] = xx
return X_all
def ode(self, x0, y):
# Here I a dynamic model
I get this error:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor []], which is output 0 of SelectBackward, is at version 64; expected version 63 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
the problem is in the operations xx = X[b, :] and p[b,:]. I know that because I choose batch dimension of 1, then I can replace the previous two equations with xx=X and p, and this works. How can split the tensor without loosing the gradient?
I had the same question, and after a lot of searching, I added .detach() function after "h" and "c" in the RNN cell.

How to debug if weight keep increasing. Pytorch program

I m having some doubt when practicing Pytorch program.
I have function like y = m1x1 + m2x2 + c (just 2 weights to learn here). The expected values of weight should be 16,-14 and bias should be 36. But in every epoch the learned wight goes very big. Can any one help me to debug and understand this 20 lines of code, what going wrong here.
import torch
x = torch.randint(size = (1,2), high = 10)
w = torch.Tensor([16,-14])
b = 36
#Compute Ground Truth
y = w * x + b
#Find weights by program
epoch = 20
learning_rate = 30
#initialize random
w1 = torch.rand(size= (1,2), requires_grad= True)
b1 = torch.ones(size = [1], requires_grad= True)
for i in range(epoch):
y1 = w1 * x + b1
#loss function RMSQ
loss = torch.sum((y1-y)**2)
#Find gradient
loss.backward()
with torch.no_grad():
#update parameters
w1 -= (learning_rate * w1.grad)
b1 -= (learning_rate * b1.grad)
w1.grad.zero_()
b1.grad.zero_()
print("B ", b1)
print("W ", w1)
Thanks,
Ganesh
You have a very large learning rate.
This is an illustration from Jeremy Jordan's blog that explains exactly what is going on in your case.

Tensorflow Neural Network for Binary Classicication; how do I use placeholder

Here is my code:
My target is a vector with shape(N,) which is a vector with only binary numbers
However, I'm running into compiling errors
/Library/Frameworks/Python.framework/Versions/3.6/bin/python3.6 /Users/Lai/Dropbox/PersonalProject/MachineLearningForSports/models/NeuralNetwork.py
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
"This module will be removed in 0.20.", DeprecationWarning)
Traceback (most recent call last):
File "/Users/Lai/Dropbox/PersonalProject/MachineLearningForSports/models/NeuralNetwork.py", line 102, in <module>
_, c = sess.run([optimizer,cost],feed_dict = {x:batch_x,y:batch_y})
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 766, in run
run_metadata_ptr)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 943, in _run
% (np_val.shape, subfeed_t.name, str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (100,) for Tensor 'Placeholder_1:0', which has shape '(?, 2)'
Since my batch size is 100; I believe that the error is at when comparing my target to my predictions. The tf.placable seems make the prediction with N*2, although I'm sure. Any help ?? Thanks
import tensorflow as tf
import DataPrepare as dp
import numpy as np
def random_init(x,num_feature_1st,num_feature_2nd,num_class):
W1 = tf.Variable(tf.random_normal([num_feature_1st,num_feature_2nd]))
bias1 = tf.Variable(tf.random_normal([num_feature_2nd]))
W2 = tf.Variable(tf.random_normal([num_feature_2nd,num_class]))
bias2 = tf.Variable(tf.random_normal([num_class]))
return [W1,bias1,W2,bias2]
def softsign(z):
"""The softsign function, applied elementwise."""
return z / (1. + np.abs(z))
def multilayer_perceptron(x,num_feature_1st,num_feature_2nd,num_class):
params = random_init(x,num_feature_1st,num_feature_2nd,num_class)
layer_1 = tf.add(tf.matmul(x,params[0]),params[1])
layer_1 = softsign(layer_1)
#layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1,params[2]),params[3])
#output = tf.nn.softmax(layer_2)
output = tf.nn.sigmoid(layer_2)
return output
def next_batch(num, dataX,dataY):
idx = np.arange(0,len(dataX))
np.random.shuffle(idx)
idx = idx[0:num]
dataX_shuffle = [dataX[i] for i in idx]
dataY_shuffle = [dataY[i] for i in idx]
dataX_shuffle = np.asarray(dataX_shuffle)
dataY_shuffle = np.asarray(dataY_shuffle)
return dataX_shuffle, dataY_shuffle
if __name__ == "__main__":
#sess = tf.InteractiveSession()
learning_rate = 0.001
training_epochs = 10
batch_size = 100
display_step = 1
num_feature_1st = 6
num_feature_2nd = 500
num_class = 2
x = tf.placeholder('float', [None, 6])
y = tf.placeholder('float',[None,2])
data = dp.dataPrepare(dp.datas,dp.path)
trainX = data[0]
testX = data[1] # a matrix
trainY = data[2] # a vector with binary number
testY = data[3]
params = random_init(x,num_feature_1st,num_feature_2nd,num_class)
# construct model
pred = multilayer_perceptron(x, num_feature_1st, num_feature_2nd, num_class)
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer().minimize(cost)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
#train
for epoch in range(training_epochs):
avg_cost = 0
total_batch = int(len(trainX[:,0])/batch_size)
for i in range(total_batch):
batch_x, batch_y = next_batch(batch_size,trainX,trainY)
_, c = sess.run([optimizer,cost],feed_dict = {x:batch_x,y:batch_y})
avg_cost += c/total_batch
if epoch % display_step ==0:
print("Epoch: ", "%04d" % (epoch+1), " cost= ", "{:.9f}".format(avg_cost))
print("Optimization Finished!")
Whenever you execute a dynamic node from the computation graph - which is pretty much any node that is not an input - you need to specify all dependent variables. Think about this this way: If you had a mathematical function of the form
y = f(x) = Ax + b (for example)
and you wanted to evaluate that function, you need to specify x as well. You need not, however, specify x if you wanted to evaluate (i.e. read) the value of A, since A is known (at least in this context).
Consequently, you can evaluate (by passing it to tf.Session.run(...) the parameters of your network without specifying the inputs (A in the example above). You cannot, however, evaluate the output of your functions without specifying the inputs (in the example, you need to specify x).
As for your code, the following line will thus not work:
print(sess.run(pred)), since you ask the session to evaluate a function without specifying its inputs.

What are the problems that causes neural networks stagnate in learning?

I was trying to see how accurate a neural network can approximate simple functions, like a scalar-valued polynomial in several variables. So I had these ideas:
Fix a polynomial of several variables, say, f(x_1,..,x_n).
Generate 50000 vectors of length n using numpy.random which will serve as training data.
Evaluate the f(x) at these points, the value will be used as label.
Make test data and label in the same way
Write a neural network and see how accuracy it can approximate f(x) on test set.
Here is my sample neural network implemented in tensorflow
import tensorflow as tf
import numpy as np
input_vector_length = int(10)
output_vector_length = int(1)
train_data_size = int(50000)
test_data_size = int(10000)
train_input_domain = [-10, 10] #Each component in an input vector is between -10 and 10
test_input_domain = [-10, 10]
iterations = 20000
batch_size = 200
regularizer = 0.01
sess = tf.Session()
x = tf.placeholder(tf.float32, shape=[None, input_vector_length], name="x")
y = tf.placeholder(tf.float32, shape =[None, output_vector_length], name="y")
function = tf.reduce_sum(x, 1) + 0.25*tf.pow(tf.reduce_sum(x,1), 2) + 0.025*tf.pow(tf.reduce_sum(x,1), 3)
#make train data input
train_input = (train_input_domain[1]-train_input_domain[0])*np.random.rand(train_data_size, input_vector_length) + train_input_domain[0]
#make train data label
train_label = sess.run(function, feed_dict = {x : train_input})
train_label = train_label.reshape(train_data_size, output_vector_length)
#make test data input
test_input = (test_input_domain[1]-test_input_domain[0])*np.random.rand(test_data_size, input_vector_length) + test_input_domain[0]
#make test data label
test_label = sess.run(function, feed_dict = {x : test_input})
test_label = test_label.reshape(test_data_size, output_vector_length)
def weight_variables(shape, name):
initial = 10*tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variables(shape, name):
initial = 10*tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def take_this_batch(data, batch_index=[]):
A = []
for i in range(len(batch_index)):
A.append(data[i])
return A
W_0 = weight_variables(shape=[input_vector_length, 10], name="W_0")
B_0 = bias_variables(shape=[10], name="W_0")
y_1 = tf.sigmoid(tf.matmul(x, W_0) + B_0)
W_1 = weight_variables(shape=[10, 20], name="W_1")
B_1 = bias_variables(shape=[20], name="B_1")
y_2 = tf.sigmoid(tf.matmul(y_1, W_1) + B_1)
W_2 = weight_variables(shape=[20,40], name="W_2")
B_2 = bias_variables(shape=[40], name="B_2")
y_3 = tf.sigmoid(tf.matmul(y_2, W_2) + B_2)
keep_prob = tf.placeholder(tf.float32, name="keep_prob")
y_drop = tf.nn.dropout(y_3, keep_prob)
W_output = weight_variables(shape=[40, output_vector_length], name="W_output")
B_output = bias_variables(shape=[output_vector_length], name="B_output")
y_output = tf.matmul(y_drop, W_output) + B_output
weight_sum = tf.reduce_sum(tf.square(W_0)) + tf.reduce_sum(tf.square(W_1)) + tf.reduce_sum(tf.square(W_2)) + tf.reduce_sum(tf.square(W_3))
cost = tf.reduce_mean(tf.square(y - y_output)) + regularizer*(weight_sum)
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cost)
error = cost
sess.run(tf.initialize_all_variables())
with sess.as_default():
for step in range(iterations):
batch_index = np.random.randint(low=0, high=train_data_size, size=batch_size)
batch_input = take_this_batch(train_input, batch_index)
batch_label = take_this_batch(train_label, batch_index)
train_step.run(feed_dict = {x : batch_input, y:batch_label, keep_prob:0.5})
if step % 1000 == 0:
current_error = error.eval(feed_dict = {x:batch_input, y:batch_label, keep_prob:1.0})
print("step %d, Current error is %f" % (step,current_error))
print(error.eval(feed_dict={x:test_input, y:test_label, keep_prob:1.0}))
Simply speaking, the performance of this neural network is horrifying! My neural network has three hidden layers of size 10, 20 and 40. The input layer is of size 10, and the output layer has size 1. I used a simple L^2 cost function, and I regularized it with the square of weights and regularizer 0.01.
During training stage, I noticed that the error seems to get stuck and refuses to go down. I am wondering what could go wrong? Thanks a lot for reading this long question. Any suggestion is appreciated.
Since you are using sigmoid as the activation function in the hidden layers, the value at these neurons is reduced to the range of (0,1). Hence, it is a good idea to normalize the input data for this network.

Resources