Python multiprocessing + torch tensors: memory problems - memory

I want to generate a dataset (containing numpy arrays) using the python multiprocessing module. I then want to convert the arrays to torch tensors to train a GNN. After each epoch, I want to replace a certain proportion of this dataset with new data. When running the following minimal example of generating and updating the dataset, after each iteration, there are sharp peaks in the memory usage (please see the profile below), until the script eventually crashes due to an oom-kill event.
What are those spikes in the memory usage?
import numpy as np
from multiprocessing import Pool, cpu_count
from torch_geometric.data import Data
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
def generate_buffer():
repeated_args = [arguments] * buffer_size
# create batches in parallel:
with Pool(processes = cpu_count()) as pool:
buffer = pool.starmap(generate_batch, repeated_args)
# flatten the buffer:
buffer = [item for sublist in buffer for item in sublist]
# convert list of numpy arrays to torch Data object containing torch GPU tensors
for i in range(len(buffer)):
X = torch.tensor(buffer[i][0], dtype = torch.float, device = device)
edge_index = torch.tensor(buffer[i][1], dtype = torch.long, device = device)
edge_attr = torch.tensor(buffer[i][2], dtype = torch.float, device = device)
y = torch.tensor(buffer[i][3], dtype = torch.float, device = device)
buffer[i] = Data(x=X, edge_index=edge_index, edge_attr=edge_attr, y=y)
return buffer
def update_buffer(buffer):
# delete the first entries of the buffer:
del buffer[: (replacements_per_iteration * batch_size * len(error_rate))]
# create a list of repeated arguments for all processes:
repeated_args = [arguments] * replacements_per_iteration
# create batches in parallel:
with Pool(processes = cpu_count()) as pool:
new_data = pool.starmap(generate_batch, repeated_args)
# flatten the data:
new_data = [item for sublist in new_data for item in sublist]
# convert list of numpy arrays to torch Data object containing torch GPU tensors
for i in range(len(new_data)):
X = torch.tensor(new_data[i][0], dtype = torch.float, device = device)
edge_index = torch.tensor(new_data[i][1], dtype = torch.long, device = device)
edge_attr = torch.tensor(new_data[i][2], dtype = torch.float, device = device)
y = torch.tensor(new_data[i][3], dtype = torch.float, device = device)
new_data[i] = Data(x=X, edge_index=edge_index, edge_attr=edge_attr, y=y)
# append to buffer:
buffer.extend(new_data)
del new_data
return buffer
def generate_batch(arguments):
batch = []
# need to create a different seed in every thread:
np.random.seed()
for _ in range(batch_size):
graph = generate_sample(arguments)
batch.append(graph)
return batch
def generate_sample(arguments):
# Generating a sample (syndrome measurement of a rotated surface
# code cycle (a quantum error correction scheme)) and mapping
# it to a graph representation using basic numpy operations
return [X, edge_index, edge_attr, y]
if __name__== '__main__':
buffer = generate_buffer()
for i in range(num_iterations):
# update the buffer:
buffer = update_buffer(buffer)
Not converting the data to torch tensors or not using the multiprocessing module (looping over buffer_size) somewhat flattens the spikes. However, I would like to use the speedup of multiprocessing and torch tensors for my application.
No torch tensors
Loop instead of multiprocessing
Just using one process (Pool(processes = 1))
Thanks for your help!

Related

my LSTM time-series forecast predictions have a shift to the future

I am trying to make predcitions on a time-series data (univariate) using Tensorflow. However, when I see the results, the predictions have a time shift (please see the figure). Anyone ideas what the problem is?
Below is the function I use to prepare the set of inputs and outputs (X and Y) as well as normalizing the data
# define a fucntion to prepare the X and Y for training the model
def df_to_X_y_group(df_as_np, window_size, num): # input is numpy array. j_start and j_end show the features, num is the number of wavelength in a group
X=[] # make an empty list
y=[]
for j in range(0,df_as_np.shape[1]-num,num): #(start, stop, step). defining a window over features of size num wihtout any overlaps
for i in range(len(df_as_np)-window_size): # there is overlap between windows in time
row=[a for a in df_as_np[i:i+window_size, j:j+num]]
X.append(row)
label=[r for r in df_as_np[i+window_size, j:j+num]]
y.append(label)
return np.array(X), np.array(y)
# convert the data to X and y format
window_size_group=50
output_group=2
num=2
X_group , y_group= df_to_X_y_group(df_as_np, window_size_group, num) # 70085 rows (number of training samples), 6 is the size of the window (number of time steps) and 5 is the number of features
X_group.shape , y_group.shape
# Split the data to train, val and test by 70,20 and 10% of the data
X_train_group , y_train_group = X_group[:int(.7 * X_group.shape[0])], y_group[:int(.7 * X_group.shape[0])]
X_val_group , y_val_group = X_group[int(.7 * X_group.shape[0]):int(.9 * X_group.shape[0])] , y_group[int(.7 * X_group.shape[0]):int(.9 * X_group.shape[0])]
X_test_group , y_test_group = X_group[int(.9 * X_group.shape[0]):] , y_group[int(.9 * X_group.shape[0]):]
# Normalize the data
X_train_mean_group = np.mean(X_train_group)
X_train_std_group = np.std(X_train_group)
def preprocess_group(X):
X_norm = (X - X_train_mean_group) / X_train_std_group
return X_norm
# Now convert the data back to before normalization
def postprocess_group(pred):
actual = (pred * X_train_std_group) + X_train_mean_group
return actual
# apply the preprocess to the inputs and outputs
X_train_norm_group=preprocess_group(X_train_group)
X_val_norm_group=preprocess_group(X_val_group)
X_test_norm_group=preprocess_group(X_test_group)
y_train_norm_group=preprocess_group(y_train_group)
y_val_norm_group=preprocess_group(y_val_group)
y_test_norm_group=preprocess_group(y_test_group)
X_train_group.shape , y_train_group.shape , X_val_group.shape , y_val_group.shape , X_test_group.shape , y_test_group .shape
Here is the model:
# Build the model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint # to save the model
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.losses import Huber
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
model_group=Sequential()
model_group.add(InputLayer((X_group.shape[1],X_group.shape[2]))) # size of the input (windows (time steps) of size 6 and 1024 features )
model_group.add(LSTM(128))
model_group.add(Dense(20, 'relu'))
model_group.add(Dense(output_group))
model_group.summary()
# Train the model
cp2=ModelCheckpoint('model/group/overlapnum2' , save_best_only=True) # it saves the best model based on val error
model_group.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=[RootMeanSquaredError()])
model_group.fit(X_train_norm_group, y_train_norm_group, validation_data=(X_val_norm_group, y_val_norm_group), epochs=15, , callbacks=[cp2])
Below is the function to predict and plot the data:
# We make a function which predict and plot
from sklearn.metrics import mean_squared_error as mse
def plot_predictions(model, X, y, start=0, end=100):
predictions = model.predict(X[start:end])
predictions_actual=postprocess(predictions)
y_actual=postprocess(y[start:end])
plt.plot(predictions_actual, c='r', label='Predictions')
plt.plot(y_actual, label= 'Actuals')
plt.xlabel("Order of the data (TIme)")
plt.ylabel("Number of Photons")
plt.legend() # so the label shows up
return mse(y_actual, predictions_actual)
# plot
plot_predictions(model, X_test_norm, y_test_norm, start=0, end=300)
Here is the predcitions results which are shifted in time (for the true results, y(t) should be replaced with y(t+1)):
enter image description here
I have tried tuning several hyperparameters such as batch size, window size, or adding layers but none of these helped.

Expected improvement optimization with GEKKO

I am trying to optimize the expected improvement function for Bayesian optimization applications. For this, I am using the scikit-learn Gaussian Process model embedded into the GEKKO optimization suite. When solving the optimization model the following error is shown:
#error: Model Expression
*** Error in syntax of function string: Missing operator
Position: 128
((0.5)((1+(((2/pi))(atan(((((2)((((v1-i320))/(((2)(sqrt(2))))))))((1+(((((v1-i320))/(((2)(sqrt(2))))))^(4))))))))-(0.0)))=0)
The code is below
import numpy as np
import pandas as pd
from gekko import GEKKO
from gekko.ML import Gekko_GPR
from gekko.ML import CustomMinMaxGekkoScaler
import sklearn.gaussian_process as gpr
# Training data
x_train = np.array([0.6, 0.9, 0.3, 0.45, 1.05, 0.75, 0.15,
0.225, 0.825, 1.125]).reshape(-1,1)
y_train = np.array([-0.809016994, 0.809016994, -0.309016994, -0.951056516,
0.951056516, -1.83772E-16, 0.587785252, 0.156434465,
0.4539905, 0.707106781]).reshape(-1,1)
# Additional information
lb = [0.0] # lower bound
ub = [1.2] # upper bound
n_dim = len(lb) # number of dimension
n_train = x_train.shape[0] # size of the training set
# Function to fit the Gaussian process
def gp_fit(data_s, gp_reg):
d_array = data_s.to_numpy()
x_tr = d_array[:,1].reshape(-1,1)
y_tr = d_array[:,-1].reshape(-1,1)
gp_model = gp_reg.fit(x_tr, y_tr)
return gp_model # it delivers the gp model object
# gekko scaler definition
data = pd.DataFrame(np.hstack((x_train, y_train)), columns=['x', 'y'])
features = ['x']
label = ['y']
scaler = CustomMinMaxGekkoScaler(data,features,label)
data_s = scaler.scaledData() # data scaled
# kernel and gp regressor definition
bounds_m = (1e-4, 3000) # bounds for the hyperparameters
kernel_main = gpr.kernels.Matern(length_scale=np.ones(n_dim),
length_scale_bounds=bounds_m,
nu=2.5)
constant_kernel = gpr.kernels.ConstantKernel(1.0, constant_value_bounds=bounds_m)
white_kernel = gpr.kernels.WhiteKernel(1.0, noise_level_bounds=(1.13e-07, 1.83e-02))
K_cov = constant_kernel*kernel_main + white_kernel
gp_regressor = gpr.GaussianProcessRegressor(kernel=K_cov, alpha=1e-8,
optimizer='fmin_l_bfgs_b',
n_restarts_optimizer=50,
random_state=20)
# gp_model creation
gp_model = gp_fit(data_s, gp_regressor) # trainig the model with the data scaled
# gekko model definition and solution
m = GEKKO(remote=False) # model definition
x = m.Var(0.4, lb=0, ub=1) # definition of variables scaled
y, std = Gekko_GPR(gp_model, m).predict(x, return_std=True) # gp prediction with std
# constants
epsilon = m.Const(0.01, 'epsilon')
best_y = m.Const(1.0, 'best_y')
pi_m = m.Const(np.pi, 'pi')
# equations
Z = (y - best_y - epsilon)/std == 0.0
pdf = 1/(std*m.sqrt(2*pi_m))*m.exp(-0.5*((x-y)/std)**2) == 0.0
erf = 2/pi_m*m.atan(2*((x-y)/(2*m.sqrt(2)))*(1+((x-y)/(2*m.sqrt(2)))**4)) == 0.0
cdf = 0.5*(1+erf) == 0
m.Equations([Z, pdf, erf, cdf])
# objective function
ei = Z*std*cdf + std*pdf
m.Maximize(ei)
m.options.IMODE = 3 # steady state optimization
m.solve(disp=True)
I was able to fix your error, but I am unable to get it fully working. Here is what I suggest:
for your objective function and cdf function, you are using Gekko equations from variables like erf. I suggest reformatting some of that with gekko Intermediate values, like so:
# equations
tZ = m.Intermediate((y - best_y - epsilon)/std)
Z = tZ == 0.0
tpdf = m.Intermediate(1/(std*m.sqrt(2*pi_m))*m.exp(-0.5*((x-y)/std)**2))
pdf = tpdf == 0.0
terf = m.Intermediate(2/pi_m*m.atan(2*((x-y)/(2*m.sqrt(2)))*(1+((x-y)/(2*m.sqrt(2)))**4)))
erf = terf == 0.0
tcdf = m.Intermediate(0.5*(1+terf))
cdf = tcdf == 0.0
m.Equations([Z, pdf, erf, cdf])
# objective function
ei = tZ*std*tcdf + std*tpdf
Changing this causes Gekko to throw a "TOO_FEW_DEGREES_OF_FREEDOM" Error, as you are trying to solve 4 equations with 1 variable. I suggest making these equations a soft constraint (trying to minimize them rather than set them to 0) or add additional variables to the problem statement.

Deploy pytorch model on webcam

I am trying to deploy PyTorch classifier on webcam, but always getting errors, mostly "AttributeError: 'collections.OrderedDict' object has no attribute 'load_state_dict'". The classifier is a binary classifier. Saved the model as .pt file.
Hope for your support to resolve the issue.
Here are the codes I am using:
import numpy as np
import torch
import torch.nn
import torchvision
from torch.autograd import Variable
from torchvision import transforms
import PIL
import cv2
#This is the Label
Labels = { 0 : 'Perfect',
1 : 'Defected'
}
# Let's preprocess the inputted frame
data_transforms = torchvision.transforms.Compose([
torchvision.transforms.Resize(size=(224, 224)),
torchvision.transforms.RandomHorizontalFlip(),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") ##Assigning the Device which will do the calculation
model = torch.load("defect_classifier.pt") #Load model to CPU
model.load_state_dict(torch.load("defect_classifier.pt"))
model = model.to(device) #set where to run the model and matrix calculation
model.eval() #set the device to eval() mode for testing
#Set the Webcam
def Webcam_720p():
cap.set(3,1280)
cap.set(4,720)
def argmax(prediction):
prediction = prediction.cpu()
prediction = prediction.detach().numpy()
top_1 = np.argmax(prediction, axis=1)
score = np.amax(prediction)
score = '{:6f}'.format(score)
prediction = top_1[0]
result = Labels[prediction]
return result,score
def preprocess(image):
image = PIL.Image.fromarray(image) #Webcam frames are numpy array format
#Therefore transform back to PIL image
print(image)
image = data_transforms(image)
image = image.float()
#image = Variable(image, requires_autograd=True)
image = image.cuda()
image = image.unsqueeze(0) #I don't know for sure but Resnet-50 model seems to only
#accpets 4-D Vector Tensor so we need to squeeze another
return image #dimension out of our 3-D vector Tensor
#Let's start the real-time classification process!
cap = cv2.VideoCapture(0) #Set the webcam
Webcam_720p()
fps = 0
show_score = 0
show_res = 'Nothing'
sequence = 0
while True:
ret, frame = cap.read() #Capture each frame
if fps == 4:
image = frame[100:450,150:570]
image_data = preprocess(image)
print(image_data)
prediction = model(image_data)
result,score = argmax(prediction)
fps = 0
if result >= 0.5:
show_res = result
show_score= score
else:
show_res = "Nothing"
show_score = score
fps += 1
cv2.putText(frame, '%s' %(show_res),(950,250), cv2.FONT_HERSHEY_SIMPLEX, 2, (255,255,255), 3)
cv2.putText(frame, '(score = %.5f)' %(show_score), (950,300), cv2.FONT_HERSHEY_SIMPLEX, 1,(255,255,255),2)
cv2.rectangle(frame,(400,150),(900,550), (250,0,0), 2)
cv2.imshow("ASL SIGN DETECTER", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyWindow("ASL SIGN DETECTER")
Dont use this line
model = torch.load("defect_classifier.pt")
instead use model = Your_model_class() , since your model is object of your model class

Tensorflow Grid3LSTMCell visualization

I'm having a difficult time visualizing what this Tensorflow class creates. I want to implement a LSTM RNN that handles 3D data.
class Grid3LSTMCell(GridRNNCell):
"""3D BasicLSTM cell
This creates a 2D cell which receives input and gives output in the first dimension.
The first dimension can optionally be non-recurrent if `non_recurrent_fn` is specified.
The second and third dimensions are LSTM.
"""
def __init__(self, num_units, tied=False, non_recurrent_fn=None,
use_peepholes=False, forget_bias=1.0):
super(Grid3LSTMCell, self).__init__(num_units=num_units, num_dims=3,
input_dims=0, output_dims=0, priority_dims=0, tied=tied,
non_recurrent_dims=None if non_recurrent_fn is None else 0,
cell_fn=lambda n, i: rnn_cell.LSTMCell(
num_units=n, input_size=i, forget_bias=forget_bias,
use_peepholes=use_peepholes),
non_recurrent_fn=non_recurrent_fn)
The class is found in `from tensorflow.contrib.grid_rnn.python.ops import grid_rnn_cell`.
This is difficult to explain, so I've provided a drawing. Here is what I want it to do...
However the comment sounds like it isn't doing this. The comment makes it sound like the RNN is still a flat RNN, where the first dimension is outputting to, what is commonly called, the outputs variable (see below). The second dimension is outputting to the next step in the RNN, and the third dimension is outputting to the next hidden layer.
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
If this is the case, what is the point in having the first and second dimensions? Aren't they essentially the same thing? The BasicLSTMCell sends the output to the next step into outputs -- in other words they are one in the same.
Clarity?
For reference, here is my example code...
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
from tensorflow.contrib.grid_rnn.python.ops import grid_rnn_cell
import numpy as np
#define parameters
learning_rate = 0.01
batch_size = 2
n_input_x = 10
n_input_y = 10
n_input_z = 10
n_hidden = 128
n_classes = 2
n_output = n_input_x * n_classes
x = tf.placeholder("float", [n_input_x, n_input_y, n_input_z])
y = tf.placeholder("float", [n_input_x, n_input_y, n_input_z, n_classes])
weights = {}
biases = {}
for i in xrange(n_input_y * n_input_z):
weights[i] = tf.Variable(tf.random_normal([n_hidden, n_output]))
biases[i] = tf.Variable(tf.random_normal([n_output]))
#generate random data
input_data = np.random.rand(n_input_x, n_input_y, n_input_z)
ground_truth = np.random.rand(n_input_x, n_input_y, n_input_z, n_classes)
#build GridLSTM
def GridLSTM_network(x):
x = tf.reshape(x, [-1,n_input_x])
x = tf.split(0, n_input_y * n_input_z, x)
lstm_cell = grid_rnn_cell.Grid3LSTMCell(n_hidden)
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
output = []
for i in xrange(n_input_y * n_input_z):
output.append(tf.matmul(outputs[i], weights[i]) + biases[i])
return output
#initialize network, cost, optimizer and all variables
pred = GridLSTM_network(x)
# import pdb
# pdb.set_trace()
pred = tf.pack(pred)
pred = tf.transpose(pred,[1,0,2])
pred= tf.reshape(pred, [-1, n_input_x, n_input_y, n_input_z, n_classes])
temp_pred = tf.reshape(pred, [-1,n_classes])
temp_y = tf.reshape(y,[-1, n_classes])
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(temp_pred, temp_y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(0,tf.cast(tf.sub(tf.nn.sigmoid(temp_pred),temp_y), tf.int32))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 0
while 1:
print step
step = step + 1
# pdb.set_trace
sess.run(optimizer, feed_dict={x: input_data, y: ground_truth})

Generating a spectrogram for a sequence of 2D movie frames

I have some data that consists of a sequence of video frames which represent changes in luminance over time relative to a moving baseline. In these videos there are two kinds of 'event' that can occur - 'localised' events, which consist of luminance changes in small groups of clustered pixels, and contaminating 'diffuse' events, which affect most of the pixels in the frame:
I'd like to be able to isolate local changes in luminance from diffuse events. I'm planning on doing this by subtracting an appropriately low-pass filtered version of each frame. In order to design an optimal filter, I'd like to know which spatial frequencies of my frames are modulated during diffuse and local events, i.e. I'd like to generate a spectrogram of my movie over time.
I can find lots of information about generating spectrograms for 1D data (e.g. audio), but I haven't come across much on generating spectrograms for 2D data. What I've tried so far is to generate a 2D power spectrum from the Fourier transform of the frame, then perform a polar transformation about the DC component and then average across angles to get a 1D power spectrum:
I then apply this to every frame in my movie, and generate a raster plot of spectral power over time:
Does this seem like a sensible approach to take? Is there a more 'standard' approach to doing spectral analysis on 2D data?
Here's my code:
import numpy as np
# from pyfftw.interfaces.scipy_fftpack import fft2, fftshift, fftfreq
from scipy.fftpack import fft2, fftshift, fftfreq
from matplotlib import pyplot as pp
from matplotlib.colors import LogNorm
from scipy.signal import windows
from scipy.ndimage.interpolation import map_coordinates
def compute_2d_psd(img, doplot=True, winfun=windows.hamming, winfunargs={}):
nr, nc = img.shape
win = make2DWindow((nr, nc), winfun, **winfunargs)
f2 = fftshift(fft2(img*win))
psd = np.abs(f2*f2)
pol_psd = polar_transform(psd, centre=(nr//2, nc//2))
mpow = np.nanmean(pol_psd, 0)
stdpow = np.nanstd(pol_psd, 0)
freq_r = fftshift(fftfreq(nr))
freq_c = fftshift(fftfreq(nc))
pos_freq = np.linspace(0, np.hypot(freq_r[-1], freq_c[-1]),
pol_psd.shape[1])
if doplot:
fig,ax = pp.subplots(2,2)
im0 = ax[0,0].imshow(img*win, cmap=pp.cm.gray)
ax[0,0].set_axis_off()
ax[0,0].set_title('Windowed image')
lnorm = LogNorm(vmin=psd.min(), vmax=psd.max())
ax[0,1].set_axis_bgcolor('k')
im1 = ax[0,1].imshow(psd, extent=(freq_c[0], freq_c[-1],
freq_r[0], freq_r[-1]), aspect='auto',
cmap=pp.cm.hot, norm=lnorm)
# cb1 = pp.colorbar(im1, ax=ax[0,1], use_gridspec=True)
# cb1.set_label('Power (A.U.)')
ax[0,1].set_title('2D power spectrum')
ax[1,0].set_axis_bgcolor('k')
im2 = ax[1,0].imshow(pol_psd, cmap=pp.cm.hot, norm=lnorm,
extent=(pos_freq[0],pos_freq[-1],0,360),
aspect='auto')
ax[1,0].set_ylabel('Angle (deg)')
ax[1,0].set_xlabel('Frequency (cycles/px)')
# cb2 = pp.colorbar(im2, ax=(ax[0,1],ax[1,1]), use_gridspec=True)
# cb2.set_label('Power (A.U.)')
ax[1,0].set_title('Polar-transformed power spectrum')
ax[1,1].hold(True)
# ax[1,1].fill_between(pos_freq, mpow - stdpow, mpow + stdpow,
# color='r', alpha=0.3)
ax[1,1].axvline(0, c='k', ls='--', alpha=0.3)
ax[1,1].plot(pos_freq, mpow, lw=3, c='r')
ax[1,1].set_xlabel('Frequency (cycles/px)')
ax[1,1].set_ylabel('Power (A.U.)')
ax[1,1].set_yscale('log')
ax[1,1].set_xlim(-0.05, None)
ax[1,1].set_title('1D power spectrum')
fig.tight_layout()
return mpow, stdpow, pos_freq
def make2DWindow(shape,winfunc,*args,**kwargs):
assert callable(winfunc)
r,c = shape
rvec = winfunc(r,*args,**kwargs)
cvec = winfunc(c,*args,**kwargs)
return np.outer(rvec,cvec)
def polar_transform(image, centre=(0,0), n_angles=None, n_radii=None):
"""
Polar transformation of an image about the specified centre coordinate
"""
shape = image.shape
if n_angles is None:
n_angles = shape[0]
if n_radii is None:
n_radii = shape[1]
theta = -np.linspace(0, 2*np.pi, n_angles, endpoint=False).reshape(-1,1)
d = np.hypot(shape[0]-centre[0], shape[1]-centre[1])
radius = np.linspace(0, d, n_radii).reshape(1,-1)
x = radius * np.sin(theta) + centre[0]
y = radius * np.cos(theta) + centre[1]
# nb: map_coordinates can give crazy negative values using higher order
# interpolation, which introduce nans when you take the log later on
output = map_coordinates(image, [x, y], order=1, cval=np.nan,
prefilter=True)
return output
I believe that the approach you describe is in general the best way to do this analysis.
However, i did spot an error in your code. as:
np.abs(f2*f2)
is not the PSD of complex array f2, you need to multiply f2 by it's complex conjugate instead of itself (|f2^2| is not the same as |f2|^2).
Instead you should do something like
(f2*np.conjugate(f2)).astype(float)
Or, more cleanly:
np.abs(f2)**2.
The oscillations in the 2D power-spectrum are a tell-tale sign of this kind of error (I've done this before myself!)

Resources