I'm trying to use the method cv2.estimateAffine3D but without success. Here is my code sample :
import numpy as np
import cv2
shape = (1, 4, 3)
source = np.zeros(shape, np.float32)
# [x, y, z]
source[0][0] = [857, 120, 854]
source[0][1] = [254, 120, 855]
source[0][2] = [256, 120, 255]
source[0][3] = [858, 120, 255]
target = source * 10
retval, M, inliers = cv2.estimateAffine3D(source, target)
When I try to run this sample, I obtain the same error as this other post here.
I'm using OpenCV 2.4.3 and Python 2.7.3
Please help me!

This is a known bug that is fixed in 2.4.4.
If you just need rigid (rotation + translation) alignment, here's the standard method:
def get_rigid(src, dst): # Assumes both or Nx3 matrices
src_mean = src.mean(0)
dst_mean = dst.mean(0)
# Compute covariance
H = reduce(lambda s, (a,b) : s + np.outer(a, b), zip(src - src_mean, dst - dst_mean), np.zeros((3,3)))
u, s, v = np.linalg.svd(H)
R = # Rotation
T = - + dst_mean # Translation
return np.hstack((R, T[:, np.newaxis]))

Change covariance toH = reduce(lambda s, a: s + np.outer(a[0], a[1]), zip(src - src_mean, dst - dst_mean), np.zeros((3,3)))
for python3 in previous post. Can't comment bacause of reputation score.


Saving Gradient in Backward Pass Google-JAX

I am using JAX to implement a simple neural network (NN) and I want to access and save the gradients from the backward pass for further analysis after the NN ran. I can access and look at the gradients temporarily with the python debugger (as long as I am not using jit). But I want to save all gradients over the whole training process and analyze them after the training is done. I have come up with a rather hacky solution for this using id_tap and a global variable (see the code below). But I was wondering whether there is a better solution which does not violate the functional principles of JAX.
Many thanks!
import jax.numpy as jnp
from jax import grad, jit, vmap, random, custom_vjp
from jax.experimental.host_callback import id_tap
# experimental solution
global_save_list = {'x':[],'w':[],'g':[],'des':[]}
def global_save_func(ctx, des):
x, w, g = ctx
def qmvm(x, w):
return, w)
def qmvm_fwd(x, w):
return qmvm(x, w), (x, w)
def qmvm_bwd(ctx, g):
x, w = ctx
# here I would like to save gradients g - or at least running statistics of them
# experimental solution with id_tap
id_tap(global_save_func, ((x, w, g)))
fwd_grad =, w.transpose())
w_grad =, g.transpose())
return fwd_grad, w_grad
qmvm.defvjp(qmvm_fwd, qmvm_bwd)
def run_nn(x, w):
out = qmvm(x, w) # 1st MVM
out = qmvm(out, w) # 2nd MVM
return out
run_nn_batched = vmap(run_nn)
def loss(x, w, target):
out = run_nn_batched(x, w)
return jnp.sum((out - target)**2)
key = random.PRNGKey(42)
subkey1, subkey2, subkey3 = random.split(key, 3)
A = random.uniform(subkey1, (10, 10, 10), minval = -10, maxval = 10)
B = random.uniform(subkey2, (10, 10, 10), minval = -10, maxval = 10)
C = random.uniform(subkey3, (10, 10, 10), minval = -10, maxval = 10)
for e in range(10):
gval = grad(loss, argnums = 0)(A, B, C)
# some type of update rule
# here I would like to access gradients, preferably knowing to which MVM (1st or 2nd) and example they belong
# experimental solution:

error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize' OpenCV

I have this old code that is used to run fine in Python 2.7 a while ago. I just updated the code to run in Python 3.8, but when I try to execute it code in Python 3.8 and OpenCV 3.4 I get a resize error and a warning (below)!
Here is the link to the two tif images that are required to run this code.
It's worth noting that both tif images are in the same folder as the Python code
import cv2
import matplotlib.pyplot as plt
import numpy as np
## Code for C_preferred Mask and C_images##
## There are three outputs to this code:
## Change the image name here
filename_image = '2.tif'
filename_mask = '1.tif'
## OpenCV verison Checking
#print 'OpenCV version used', cv2.__version__
filename = open("Output_C.txt","w")
filename.write("Processing Image : " + str(filename_image) + '\n\n')
## Function to sort the contours : Parameters that you can tune : tolerance_factor and size 0f the image.Here, I have used a fix size of
## (800,800)
def get_contour_precedence(contour, cols):
tolerance_factor = 10
origin = cv2.boundingRect(contour)
return ((origin[1] // tolerance_factor) * tolerance_factor) * cols + origin[0]
## Loading the colored mask, resizing it to (800,800) and converting it from RGB to HSV space, so that the color values are emphasized
p_mask_c = cv2.cvtColor(cv2.resize(cv2.imread(filename_mask),(800,800)),cv2.COLOR_RGB2HSV);
# Loading the original Image
b_image_1 = cv2.resize(cv2.imread(filename_image),(800,800));
# convert the target color to HSV, As our target mask portion to be considered is green. So I have chosen target color to be green
b = 0;
g = 255;
r = 0;
# Converting target color to HSV space
target_color = np.uint8([[[b, g, r]]])
target_color_hsv = cv2.cvtColor(target_color, cv2.COLOR_BGR2HSV)
# boundaries for Hue define the proper color boundaries, saturation and values can vary a lot
target_color_h = target_color_hsv[0,0,0]
tolerance = 20
lower_hsv = np.array([max(0, target_color_h - tolerance), 10, 10])
upper_hsv = np.array([min(179, target_color_h + tolerance), 250, 250])
# apply threshold on hsv image
mask = cv2.inRange(p_mask_c, lower_hsv, upper_hsv)
# Eroding the binary mask, such that every white portion (grids) are seperated from each other, to avoid overlapping and mixing of
# adjacent grids
b_mask = mask;
kernel = np.ones((5,5))
#kernel = cv2.getStructuringElement(cv2.MORPH_CROSS,(3,3))
sharp = cv2.erode(b_mask,kernel, iterations=2)
# Finding all the grids (from binary image)
contours, hierarchy = cv2.findContours(sharp,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
print (' Number of contours', len(contours))
# Sorting contours
contours.sort(key=lambda x:get_contour_precedence(x, np.shape(b_mask)[0]))
#cv2.drawContours(b_image_1, contours, -1, (0,255,0), 1)
# Label variable for each grid/panel
label = 1;
b_image = b_image_1.copy();
temp =np.zeros(np.shape(b_image_1),np.uint8)
print (' size of temp',np.shape(temp), np.shape(b_image))
out_img = b_image_1.copy()
# Processing in each contour/label one by one
for cnt in contours:
cv2.drawContours(b_image_1,[cnt],0,(255,255,0), 1)
## Just to draw labels in the center of each grid
((x, y), r) = cv2.minEnclosingCircle(cnt)
x = int(x)
y = int(y)
r = int(r)
cv2.putText(b_image_1, "#{}".format(label), (int(x) - 10, int(y)),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
cv2.drawContours(temp,[cnt],0,(255,255,255), -1)
#crop_img = np.bitwise_and(b_image,temp)
r = cv2.boundingRect(cnt)
crop_img = b_image[r[1]:r[1]+r[3], r[0]:r[0]+r[2]]
mean = cv2.mean(crop_img);
mean = np.array(mean).reshape(-1,1)
print (' Mean color', mean, np.shape(mean))
if mean[1] < 50:
cv2.putText(out_img, "M", (int(x) - 10, int(y)),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 1)
filename.write("Block number #"+ str(label)+ ' is : ' + 'Magenta'+'\n');
cv2.putText(out_img, "G", (int(x) - 10, int(y)),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 1)
filename.write("Block number #"+ str(label)+ ' is : ' +'Gray'+'\n');
label = label+1;
[ WARN:0] global C:\projects\opencv-python\opencv\modules\imgcodecs\src\grfmt_tiff.cpp (449) cv::TiffDecoder::readData OpenCV TIFF: TIFFRGBAImageOK: Sorry, can not handle images with IEEE floating-point samples
Traceback (most recent call last):
File "", line 32, in
p_mask_c = cv2.cvtColor(cv2.resize(cv2.imread(filename_mask),(800,800)),cv2.COLOR_RGB2HSV);
cv2.error: OpenCV(4.2.0) C:\projects\opencv-python\opencv\modules\imgproc\src\resize.cpp:4045: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'
When you read in the image pass the cv::IMREAD_ANYDEPTH = 2 parameter as the second parameter in cv2.imread().
Changing your lines to
p_mask_c = cv2.cvtColor(cv2.resize(cv2.imread(filename_mask, 2),(800,800)),cv2.COLOR_RGB2HSV);
b_image_1 = cv2.resize(cv2.imread(filename_image, 2),(800,800));
removes the resize error you're seeing.
But you get another error when changing the color since your TIFF image apparently has only one channel so cv2.COLOR_RGB2HSV won't work..
You could also use multiple flags like cv::IMREAD_COLOR = 1,
p_mask_c = cv2.cvtColor(cv2.resize(cv2.imread(filename_mask, 2 | 1),(800,800)),cv2.COLOR_BGR2HSV);
to read in a color image. But you get a different error. Perhaps you understand this image better than I do and can solve the problem from here on out.

Logistic Regression not able to find value of theta

I have hundred Entries in csv file.
Using above data i am trying to build logistic (binary) classifier.
Please advise me where i am doing wrong ? Why i am getting answer in 3*3 Matrix (9 values of theta, where as it should be 3 only)
Here is code:
importing the libraries
import numpy as np
import pandas as pd
from sklearn import preprocessing
reading data from csv file.
df = pd.read_csv("LogisticRegressionFirstBinaryClassifier.csv", header=None)
df.columns = ["Maths", "Physics", "AdmissionStatus"]
X = np.array(df[["Maths", "Physics"]])
y = np.array(df[["AdmissionStatus"]])
X = preprocessing.normalize(X)
X = np.c_[np.ones(X.shape[0]), X]
theta = np.ones((X.shape[1], 1))
print(X.shape) # (100, 3)
print(y.shape) # (100, 1)
print(theta.shape) # (3, 1)
calc_z to caculate dot product of X and theta
def calc_z(X,theta):
Sigmoid function
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def cost_function(X, y, theta):
z = calc_z(X,theta)
h = sigmoid(z)
return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
print("cost_function =" , cost_function(X, y, theta))
def derivativeofcostfunction(X, y, theta):
z = calc_z(X,theta)
h = sigmoid(z)
calculation = - y).T,X)
return calculation
print("derivativeofcostfunction=", derivativeofcostfunction(X, y, theta))
def grad_desc(X, y, theta, lr=.001, converge_change=.001):
cost = cost_function(X, y, theta)
change_cost = 1
num_iter = 1
while(change_cost > converge_change):
old_cost = cost
print (derivativeofcostfunction(X, y, theta))
theta = theta - lr*(derivativeofcostfunction(X, y, theta))
cost = cost_function(X, y, theta)
change_cost = old_cost - cost
num_iter += 1
return theta, num_iter
Here is the output :
[[ 0.4185146 -0.56877556 0.63999433]
[15.39722864 9.73995197 11.07882445]
[12.77277463 7.93485324 9.24909626]]
[[0.33944777 0.58199037 0.52493407]
[0.02106587 0.36300629 0.30297278]
[0.07040604 0.3969297 0.33737757]]
[[-0.05856159 -0.89826735 0.30849185]
[15.18035041 9.59004868 10.92827046]
[12.4804775 7.73302024 9.04599788]]
[[0.33950634 0.58288863 0.52462558]
[0.00588552 0.35341624 0.29204451]
[0.05792556 0.38919668 0.32833157]]
[[-5.17526527e-01 -1.21534937e+00 -1.03387571e-02]
[ 1.49729502e+01 9.44663458e+00 1.07843504e+01]
[ 1.21978140e+01 7.53778010e+00 8.84964495e+00]]
(array([[ 0.34002386, 0.58410398, 0.52463592],
[-0.00908743, 0.34396961, 0.28126016],
[ 0.04572775, 0.3816589 , 0.31948193]]), 46)
I changed this code , just added Transpose while returning the matrix and it fixed my issue.
def derivativeofcostfunction(X, y, theta):
z = calc_z(X,theta)
h = sigmoid(z)
calculation = - y).T,X)
return calculation.T

Transform OpenGL convention camera to OpenCV convention matrix

I am given 4 camera extrinsic parameter matrices, and i wrote some code to display those cameras and their vector systems in 3D.
Here is the code:
def plot_cameras(views):
fig = plt.figure()
ax = fig.gca(projection='3d')
for name, view in views.items():
#for name, view in {'test_cam': 0}.items():
m =
#m = Camera.make_lookat_m(
# colvec([10, 10, 10]),
# colvec( [0,0,0] ),
# colvec([0, 0, -1])
# )
r = m[:3, :3].copy()
r_t = r.T
t = m[:3, 3].copy()
pos =
x_cam, y_cam, z_cam = pos # Camera pose
u = 100*r_t[:, 0]
v = 100*r_t[:, 1]
w = 100*r_t[:, 2] # Camera u,v,w vectors
ax.text(x_cam, y_cam, z_cam, name)
[x_cam, x_cam + u[0]],
[y_cam, y_cam + u[1]],
[z_cam, z_cam + u[2]],
[x_cam, x_cam + v[0]],
[y_cam, y_cam + v[1]],
[z_cam, z_cam + v[2]],
[x_cam, x_cam + w[0]],
[y_cam, y_cam + w[1]],
[z_cam, z_cam + w[2]],
I have 4 cameras at 0, +-25 and +90 degrees of the target.
I am told that these cameras are in OpenCV convention but my function clearly shows they are in OpenGL convention (looking down negative z axis).
Am I properly decomposing the camera matrix and extracting the vectors?
If so, is there a way of transforming my OpenGL-style camera matrices into OpenCV-style?

How to compute the cosine_similarity in pytorch for all rows in a matrix with respect to all rows in another matrix

In pytorch, given that I have 2 matrixes how would I compute cosine similarity of all rows in each with all rows in the other.
For example
Given the input =
matrix_1 = [a b]
[c d]
matrix_2 = [e f]
[g h]
I would like the output to be
output =
[cosine_sim([a b] [e f]) cosine_sim([a b] [g h])]
[cosine_sim([c d] [e f]) cosine_sim([c d] [g h])]
At the moment I am using torch.nn.functional.cosine_similarity(matrix_1, matrix_2) which returns the cosine of the row with only that corresponding row in the other matrix.
In my example I have only 2 rows, but I would like a solution which works for many rows. I would even like to handle the case where the number of rows in the each matrix is different.
I realize that I could use the expand, however I want to do it without using such a large memory footprint.
By manually computing the similarity and playing with matrix multiplication + transposition:
import torch
from scipy import spatial
import numpy as np
a = torch.randn(2, 2)
b = torch.randn(3, 2) # different row number, for the fun
# Given that cos_sim(u, v) = dot(u, v) / (norm(u) * norm(v))
# = dot(u / norm(u), v / norm(v))
# We fist normalize the rows, before computing their dot products via transposition:
a_norm = a / a.norm(dim=1)[:, None]
b_norm = b / b.norm(dim=1)[:, None]
res =, b_norm.transpose(0,1))
# 0.9978 -0.9986 -0.9985
# -0.8629 0.9172 0.9172
# -------
# Let's verify with numpy/scipy if our computations are correct:
a_n = a.numpy()
b_n = b.numpy()
res_n = np.zeros((2, 3))
for i in range(2):
for j in range(3):
# cos_sim(u, v) = 1 - cos_dist(u, v)
res_n[i, j] = 1 - spatial.distance.cosine(a_n[i], b_n[j])
# [[ 0.9978022 -0.99855876 -0.99854881]
# [-0.86285472 0.91716063 0.9172349 ]]
Adding eps for numerical stability base on benjaminplanche's answer:
def sim_matrix(a, b, eps=1e-8):
added eps for numerical stability
a_n, b_n = a.norm(dim=1)[:, None], b.norm(dim=1)[:, None]
a_norm = a / torch.max(a_n, eps * torch.ones_like(a_n))
b_norm = b / torch.max(b_n, eps * torch.ones_like(b_n))
sim_mt =, b_norm.transpose(0, 1))
return sim_mt
same as Zhang Yu's answer but using clamp instead of max and without creating a new tensor. I did a small test with timeit, which indicated that clamp was faster, though I am not proficient in using that tool.
def sim_matrix(a, b, eps=1e-8):
added eps for numerical stability
a_n, b_n = a.norm(dim=1)[:, None], b.norm(dim=1)[:, None]
a_norm = a / torch.clamp(a_n, min=eps)
b_norm = b / torch.clamp(b_n, min=eps)
sim_mt =, b_norm.transpose(0, 1))
return sim_mt
You could use TorchMetrics's from torchmetrics.functional import pairwise_cosine_similarity to calculate cosine similarity for two matrices with different shapes. Refer to
>>> import torch
>>> from torchmetrics.functional import pairwise_cosine_similarity
>>> x = torch.tensor([[2, 3], [3, 5], [5, 8]], dtype=torch.float32)
>>> y = torch.tensor([[1, 0], [2, 1]], dtype=torch.float32)
>>> pairwise_cosine_similarity(x, y)
tensor([[0.5547, 0.8682],
[0.5145, 0.8437],
[0.5300, 0.8533]])
>>> pairwise_cosine_similarity(x)
tensor([[0.0000, 0.9989, 0.9996],
[0.9989, 0.0000, 0.9998],
[0.9996, 0.9998, 0.0000]])
It is unnecessary to use loop in calculate the similarity between the row/column vector in a matrix. Here an example.
import torch as t
a = t.randn(2,4)
# step 1. 计算行向量的长度
len_a = t.sqrt(t.sum(a**2,dim=-1))
b = len_a.unsqueeze(1).expand(-1,2)
c = len_a.expand(2,-1)
# print(b)
# print(c)
# step2. 计算乘积
x = a # a.T
# step3. 计算最后的结果
res = x/(b*c)
You can expand the 2 input batches, perform the pairwise cosine similarity operation, then transpose it:
Non-cloning equivalents of torch.repeat_interleave and torch.repeat are used.
def distance_matrix(x, y, distance_function):
return distance_function(
x.view(x.size(0), 1, x.size(1)).expand(x.size(0), y.size(0), x.size(1)).contiguous().view(-1, x.size(1)),
y.expand(x.size(0), y.size(0), y.size(1)).flatten(end_dim=1),
).view(x.size(0), y.size(0))
from torch.nn import functional as F
distance_matrix(x, y, F.cosine_similarity)
