So I am trying to build a transform , but i am stuck at masking in decoder. So i am using Multi head attention so my input shape of (64,64,128) is converted to (64,8,64,16) for head = 8. For attention i used scaled multiplicative attention. So my attention layer gets input shape (64,8,64,16). I am having trouble calculating mask for the decoder multihead attention. As in scaled multiplicative attention the shape changes after matrix multiplication from (64,8,64,16) to (64,8,64,64) so how should i create a mask for this matrix.
My scaled multiplicative attention layer:
class ScaledMultiplicativeAttentionLayer(Layer):
def __init__(self):
super(ScaledMultiplicativeAttentionLayer,self).__init__()
def call(self,query,key,value,dim_key,mask=None):
print(query.shape)
mul = tf.matmul(query,key,transpose_b=True) / tf.sqrt(tf.cast(dim_key,tf.float32))
print(mul.shape)
if mask is not None:
mul += -1e9 * tf.cast(mask,tf.float32)
sfm = K.softmax(mul)
return tf.matmul(sfm,value)
My multihead attention :
class MultiHeadAttentionLayer(Layer):
def __init__(self,heads,dim_key,dim_val,dim_model,**kwargs):
super(MultiHeadAttentionLayer,self).__init__(**kwargs)
self.attention = ScaledMultiplicativeAttentionLayer()
self.heads = heads
self.dim_key = dim_key
self.dim_val = dim_val
self.dim_model = dim_model
self.Wq = tf.keras.layers.Dense(dim_key)
self.Wk = tf.keras.layers.Dense(dim_key)
self.Wv = tf.keras.layers.Dense(dim_val)
self.Wo = tf.keras.layers.Dense(dim_model)
def reshape_tensor(self,x,heads,flag):
if flag:
x = tf.reshape(x,shape=(x.shape[0],x.shape[1],heads,-1)) # (batch_size,seq_len,dim_key) -> (batch_size,seq_len,head,-1)
x = tf.transpose(x,perm=[0,2,1,3]) # (batch_size,seq_len,head,-1) -> (batch_size,head,seq_len,-1)
else:
x = tf.transpose(x,perm=[0,2,1,3]) # (batch_size,head,seq_len,-1) -> (batch_size,seq_len,head,-1)
x = tf.reshape(x,shape=(x.shape[0],x.shape[1],-1)) # (batch_size,head,seq_len,-1) -> (batch_size,seq_len,dim_model)
return x
def call(self,query,key,val,return_score=False,mask=None):
q_reshaped = self.reshape_tensor(self.Wq(query),self.heads,True)
k_reshaped = self.reshape_tensor(self.Wk(key),self.heads,True)
v_reshaped = self.reshape_tensor(self.Wv(val),self.heads,True)
if mask is not None:
mask = self.reshape_tensor(mask,self.heads,True)
o_reshaped = self.attention(q_reshaped,k_reshaped,v_reshaped,self.dim_key,mask)
output = self.reshape_tensor(o_reshaped,self.heads,False)
if return_score:
return self.Wo(output),K.sum(output,axis=1)
return self.Wo(output)
For now i am creating mask using this function:
def create_mask(shape):
mask = np.triu(np.ones((shape[-2],shape[-1])),1)
mask = np.expand_dims(mask,0)
mask = tf.tile(mask,[shape[0],1,1])
return mask
this create mask of shape same as input shape
I get error shape mismatch error at line
mul += -1e9 * tf.cast(mask,tf.float32)
because mul shape and mask shape is different. I cant seem to get the jist of what actual mask should look like? Help me understand mask and how to calculate mask.
Tried reshaping but no use. reproduce error here Reproduce Error Collab
Related
I've been trying to code a 3D scanner that uses structured light (one camera and projector only). I'm using Taubin and Moreno's software (at their site that is somehow down right now, use wayback machine to check it out.) to obtain the projector intrinsics and extrinsics as a start, and the linear least squares triangulation method described here.
However, regardless of the objects scanned, the point clouds obtained are warped in a convex manner (see images below). This is most likely not a intrinsics/extrinsics/distortion parameter issue, as the same calibration parameters gives a proper point cloud when using the software linked above. I'm also inclined to say that my decoding process is not faulty, as the row and column correspondences appear to be correct (see below). Using a previously decoded dataset also gives the same issue.
def linearLS_triangulation(u_c, u_p, P_c, P_p, A, B):
"""
Performs linear least squares triangulation via an overdetermined linear
system
Reference:
http://users.cecs.anu.edu.au/~hartley/Papers/triangulation/triangulation.pdf
"""
A[0][0] = u_c[0]*P_c[2][0] - P_c[0][0]
A[0][1] = u_c[0]*P_c[2][1] - P_c[0][1]
A[0][2] = u_c[0]*P_c[2][2] - P_c[0][2]
A[1][0] = u_c[1]*P_c[2][0] - P_c[1][0]
A[1][1] = u_c[1]*P_c[2][1] - P_c[1][1]
A[1][2] = u_c[1]*P_c[2][2] - P_c[1][2]
A[2][0] = u_p[0]*P_p[2][0] - P_p[0][0]
A[2][1] = u_p[0]*P_p[2][1] - P_p[0][1]
A[2][2] = u_p[0]*P_p[2][2] - P_p[0][2]
A[3][0] = u_p[1]*P_p[2][0] - P_p[1][0]
A[3][1] = u_p[1]*P_p[2][1] - P_p[1][1]
A[3][2] = u_p[1]*P_p[2][2] - P_p[1][2]
B[0][0] = -(u_c[0] * P_c[2][3] - P_c[0][3])
B[1][0] = -(u_c[1] * P_c[2][3] - P_c[1][3])
B[2][0] = -(u_p[0] * P_p[2][3] - P_p[0][3])
B[3][0] = -(u_p[1] * P_p[2][3] - P_p[1][3])
# Use of the normal equation, np.linalg.lstsq also works!
w = np.linalg.inv(A.T.dot(A)).dot(A.T).dot(B)
return w[:, 0]
def get_cam_points(decoded, K_c):
"""
Get list of camera pixels that have a correspondence to projector pixels
Returned in global coordinates, where world centre is centre of projection
of the camera
"""
[height, width] = np.nonzero(decoded[0])
points_cam = np.zeros([3, height.shape[0]], dtype = np.float)
K_c_inv = np.linalg.inv(K_c)
for i in range(height.shape[0]):
points_cam[:, i] = [width[i], height[i], 1]
points_cam = np.dot(K_c_inv, points_cam)
return points_cam
def get_proj_pixels(width_p, height_p, K_p, dist_p, R_p, T_p):
"""
Passes the resolution of the projector along with the intrinsics and
extrinsics, computing the mapping from projector pixels to the optical
rays returned in [x, y, z] for each pixel in the 'image'
This assumes that the camera is the origin, with rotation and translation
matrixes of the projector respect to that.
"""
column_p = np.arange(width_p, dtype = np.float)
row_p = np.arange(height_p, dtype = np.float)
C, R = np.meshgrid(column_p, row_p)
uv_p = np.zeros([np.ravel(C).shape[0], 1, 2], dtype = np.float)
uv_p[:, 0, :] = np.c_[np.ravel(C),np.ravel(R)]
uv_p = cv2.undistortPoints(uv_p, K_p, dist_p)
uv_p = uv_p[:,0,:]
uv_p = np.c_[uv_p, np.ones([np.ravel(C).shape[0]])]
uv_p = uv_p.transpose()
uv_grid = np.zeros([3, height_p, width_p], dtype = np.float)
uv_grid[0] = np.reshape(uv_p[0, :], [height_p, width_p])
uv_grid[1] = np.reshape(uv_p[1, :], [height_p, width_p])
uv_grid[2] = np.reshape(uv_p[2, :], [height_p, width_p])
return uv_grid
def triangulate_all(decoded, P_c, P_p, dist_p, K_c, K_p, width_p, height_p):
[height, width] = np.nonzero(decoded[0])
points = np.zeros([3, height.shape[0]], dtype = np.float)
points_cam = get_cam_points(decoded, K_c)
points_proj = np.zeros([3, height.shape[0]], dtype = np.float)
uv_grid = get_proj_pixels(width_p, height_p, K_p, dist_p,
P_p[:, :3], P_p[:, 3].reshape(-1, 1))
# Get list of projector pixels corresponding to non-zero camera pixels
for i in range(height.shape[0]):
inter = decoded[:, height[i], width[i]]
points_proj[:, i] = uv_grid[:, inter[1], inter[0]]
A = np.zeros((4, 3), dtype = np.float)
B = np.zeros((4, 1), dtype = np.float)
for i in range(height.shape[0]):
points[:, i] = linearLS_triangulation(points_cam[:, i],
points_proj[:, i],
P_c, P_p, A, B)
return points
print('Loading calibration parameters...')
calib_params = cv2.FileStorage('calibration.yml', cv2.FILE_STORAGE_READ)
dist_c = calib_params.getNode('cam_kc').mat()
dist_p = calib_params.getNode('proj_kc').mat()
K_c = calib_params.getNode('cam_K').mat()
K_p = calib_params.getNode('proj_K').mat()
R_p = calib_params.getNode('R').mat()
R_p = R_p.transpose() # Rotation matrix of projector with respect to camera origin
R_c = np.array([[1,0,0],[0,1,0],[0,0,1]])
T_p = calib_params.getNode('T').mat()
T_c = np.array([0,0,0])
width_p = 1920
height_p = 1080
P_c = np.c_[R_c, T_c]
P_p = np.c_[R_p, T_p]
print('Loading color image...')
color = cv2.imread(scandir + 'Image01.jpg')
color = color/255
print('Loading decoded matrix...')
# A 2 x imgheight x imgwidth (in pixels) matrix, with the first channel being the column (x-direction)
# estimates and the second channel being the row (y-direction) pixel estimates of the projector.
# E.g. a pixel at point [300, 400] (Origin at top left of image!) would correspond to the projector
# pixels of [16, 4] (Origin at top left). A zero would indicate the lack of correspondence for that
# specific pixel
decoded = np.load('Decoded Matrix.npy')
points = triangulate_all(decoded, P_c, P_p, dist_p, K_c, K_p,
width_p, height_p)
Point cloud and original image
Another point cloud, and the decoded row and column estimates
Help would be greatly appreciated! At a loss of what to do.
Edit:
Got rid of the line normalizing the ray vectors
I have a processed image with text in it and I want to find the coordinates of lines which would touch the edges of the text field, but would not cross it and would strech through the whole side of text. Image below shows what I need (the red lines I drew show the example of what coordinates I want to find on a raw image):
It is not so straightforward, I can't just find the edges of processed field of text (upper left, upper right and so on), because it may be, f.e. a start of a paragraph (this is just an example of the possible scenario):
The sides of the text form a straight line, it is the top and bottom edges may be curved, so that could make things easier.
What is the best way to do this?
Any method I can think of is either not practical, inneficient or may usually give false results.
The raw image in case someone needs for processing:
The idea is to find the convex hull of all of the text. After we find the convex hull we find its sides. If the side Has a big change in its y coordinate and a small change in the x coordinate (i.e. the line has a high slope) we will consider it as a side line.
The resulted image:
the code:
import cv2
import numpy as np
def getConvexCoord(convexH, ind):
yLines = []
xLine = []
for index in range(len(ind[0])):
convexIndex = ind[0][index]
# Get point
if convexIndex == len(convexH) - 1:
p0 = convexH[0]
p1 = convexH[convexIndex]
else:
p0 = convexH[convexIndex]
p1 = convexH[convexIndex + 1]
# Add y corrdinate
yLines.append(p0[0, 1])
yLines.append(p1[0, 1])
xLine.append(p0[0, 0])
xLine.append(p1[0, 0])
return yLines,xLine
def filterLine(line):
sortX = sorted(line)
# Find the median
xMedian = np.median(sortX)
while ((sortX[-1] - sortX[0]) > I.shape[0]):
# Find out which is farther from the median and discard
lastValueDistance = np.abs(xMedian - sortX[-1])
firstValueDistance = np.abs(xMedian - sortX[0])
if lastValueDistance > firstValueDistance:
# Discard last
del sortX[-1]
else:
# Discard first
del sortX[0]
# Now return mixX and maxX
return max(sortX),min(sortX)
# Read image
Irgb = cv2.imread('text.jpg')
I = Irgb[:,:,0]
# Threshold
ret, Ithresh = cv2.threshold(I,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
# Find the convex hull of the text
textPixels = np.nonzero(Ithresh)
textPixels = zip(textPixels[1],textPixels[0])
convexH = cv2.convexHull(np.asarray(textPixels))
# Find the side edges in the convex hull
m = []
for index in range((len(convexH))-1):
# Calculate the angle of the line
point0 = convexH[index]
point1 = convexH[index+1]
if(point1[0,0]-point0[0,0]) == 0:
m.append(90)
else:
m.append(float((point1[0,1]-point0[0,1]))/float((point1[0,0]-point0[0,0])))
# Final line
point0 = convexH[index+1]
point1 = convexH[0]
if(point1[0,0]-point0[0,0]) == 0:
m.append(90)
else:
m.append(np.abs(float((point1[0,1]-point0[0,1]))/float((point1[0,0]-point0[0,0]))))
# Take all the lines with the big m
ind1 = np.where(np.asarray(m)>1)
ind2 = np.where(np.asarray(m)<-1)
# For both lines find min Y an max Y
yLines1,xLine1 = getConvexCoord(convexH,ind1)
yLines2,xLine2 = getConvexCoord(convexH,ind2)
yLines = yLines1 + yLines2
# Filter xLines. If we the difference between the min and the max are more than 1/2 the size of the image we filter it out
minY = np.min(np.asarray(yLines))
maxY = np.max(np.asarray(yLines))
maxX1,minX1 = filterLine(xLine1)
maxX2,minX2 = filterLine(xLine2)
# Change final lines to have minY and maxY
line1 = ((minX1,minY),(maxX1,maxY))
line2 = ((maxX2,minY),(minX2,maxY))
# Plot lines
IrgbWithLines = Irgb
cv2.line(IrgbWithLines,line1[0],line1[1],(0, 0, 255),2)
cv2.line(IrgbWithLines,line2[0],line2[1],(0, 0, 255),2)
Remarks:
The algorithm assumes that the y coordinate change is bigger than the x coordinate change. This will not be true for very high perspective distortions (45 degrees). In this case maybe you should use k-means on the slopes and take the group with the higher slopes as the vertical lines.
The lines marked in red colour on the sides could be found using image closing operation.
Please find below the matlab output after imclose operation with structuring element of type square and size 4.'
The matlab code is as follow:
I = rgb2gray(imread('image.jpg'));
imshow(I); title('image');
Ibinary = im2bw(I);
figure,imshow(Ibinary);
se = strel('square',4);
Iclose = imclose(Ibinary,se);
figure,imshow(Iclose); title('side lines');
I am working on palm print identification by palm texture and geometry. i want to binarized hand image in preprocessing step for extracting geometry features like palm width and finger width.
i have used Gaussian filter for reduced noise and Otsu method for thresholding but I could not reach Optimal image! i was wondering if someone help me!
my database downloaded from "IIT Delhi Touch-less Palm print "
I=imread('hand.jpg');
h= fspecial('gaussian', 15,5);
s=imfilter(I,h,'symmetric');
q=graythresh(I)
BW=im2bw(I,q);
I have tried the following code and getting some promising result on your dropbox images. you can try it and share your results for further approach.
clc
clear all
close all
impath = 'E:\GoogleDrive\Mathworks\irisDEt\HandSeg';
[name,path] = uigetfile({'*.jpg';'*.png'},'mytitle',impath);
im =imread([path,name]);
im = imresize(im,0.5);
gms = 15;
red = im(:,:,1);
redmed = medfilt2(red,[gms,gms],'symmetric');
redmedbw = im2bw(redmed,0.9*graythresh(redmed));
redmedbw = bwareaopen(redmedbw,1500);
redmedbw = imclose(redmedbw,strel('disk',5));
figure,imshow(im,[])
figure,imshow(redmed,[])
figure,imshow(redmedbw,[])
My results are:
Code for signature estimation of the structure and extraction on critical(peaks and vallys) from the structure:
function [sig,xysamp,idx]= signature(bw,prec)
boundry = bwboundaries(bw);
xy = boundry{1};
x = xy(:,1);
y = xy(:,2);
len = length(x);
res = (len/prec);
re = rem(res,2);
if re
res = ceil(res);
end
indexes = 1:res:len;
xnew = x(indexes);
ynew = y(indexes);
xysamp = [xnew,ynew] ;
cx = round(mean(xnew));
cy = round(mean(ynew));
xn = abs(xnew-cx);
yn = abs(ynew-cy);
% ang = atand(yn./xn);
sig = (xn.^2+yn.^2);
sig = sig/max(sig);
% Critical Points in Signatures.
diffsig = diff(sig);
% pos = zeros(length(diffsig),1);
idx = 1;
for i = 2:length(diffsig)
if diffsig(i-1)*diffsig(i) <0
idx = [idx,i];
end
end
idx = [idx,i];
Here idx are the indexes of xysamp which gives actual boundry location in the image. the location of peaks and vallys may not be exact as i m doing sampling of boundry and it is a very simple way to approach the structural based problems.
Thank You
Result of critical point extraction:
I am trying to make a shape recognition classifier in which if you give an individual picture of an object (from a scene), it would be able to classify (after machine learning) the shape of an object (cylinder, cube, sphere, etc).
Original scene:
Individual objects it will classify:
I attempted to do this using cv2.approxPolyDB with an attempt to classify a cylinder. However, either my implementation isn't good or this wasn't a good choice of an algorithm to choose in the first place, the objects in the shape of cylinders were assigned a approxPolyDB value of 3 or 4.
Perhaps I can threshold and, in general, if given a value of 3 or 4, assume the object is a cylinder, but I feel like it's not the most reliable method for 3D shape classification. I feel like there is a better way to implement this and a better method as opposed to just hardcoding values. I feel like that with this method, it can easily confuse a cylinder with a cube.
Is there any way I can improve my 3D shape recognition program?
Code:
import cv2
import numpy as np
from pyimagesearch import imutils
from PIL import Image
from time import time
def invert_img(img):
img = (255-img)
return img
def threshold(im):
imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
imgray = cv2.medianBlur(imgray,9)
imgray = cv2.Canny(imgray,75,200)
return imgray
def view_all_contours(im, size_min, size_max):
main = np.array([[]])
cnt_target = im.copy()
for c in cnts:
epsilon = 0.1*cv2.arcLength(c,True)
approx = cv2.approxPolyDP(c,epsilon,True)
area = cv2.contourArea(c)
print 'area: ', area
test = im.copy()
# To weed out contours that are too small or big
if area > size_min and area < size_max:
print c[0,0]
print 'approx: ', len(approx)
max_pos = c.max(axis=0)
max_x = max_pos[0,0]
max_y = max_pos[0,1]
min_pos = c.min(axis=0)
min_x = min_pos[0,0]
min_y = min_pos[0,1]
# Load each contour onto image
cv2.drawContours(cnt_target, c, -1,(0,0,255),2)
print 'Found object'
frame_f = test[min_y:max_y , min_x:max_x]
main = np.append(main, approx[None,:][None,:])
thresh = frame_f.copy()
thresh = threshold(thresh)
contours_small, hierarchy = cv2.findContours(thresh.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cnts_small = sorted(contours_small, key = cv2.contourArea, reverse = True)
cv2.drawContours(frame_f, cnts_small, -1,(0,0,255),2)
cv2.imshow('Thresh', thresh)
cv2.imshow('Show Ya', frame_f)
cv2.waitKey(0)
# Uncomment in order to show all rectangles in image
print '---------------------------------------------'
#cv2.drawContours(cnt_target, cnts, -1,(0,255,0),2)
print main.shape
print main
return cnt_target
time_1 = time()
roi = cv2.imread('images/beach_trash_3.jpg')
hsv = cv2.cvtColor(roi,cv2.COLOR_BGR2HSV)
target = cv2.imread('images/beach_trash_3.jpg')
target = imutils.resize(target, height = 400)
hsvt = cv2.cvtColor(target,cv2.COLOR_BGR2HSV)
img_height = target.shape[0]
img_width = target.shape[1]
# calculating object histogram
roihist = cv2.calcHist([hsv],[0, 1], None, [180, 256], [0, 180, 0, 256] )
# normalize histogram and apply backprojection
cv2.normalize(roihist,roihist,0,255,cv2.NORM_MINMAX)
dst = cv2.calcBackProject([hsvt],[0,1],roihist,[0,180,0,256],1)
# Now convolute with circular disc
disc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
cv2.filter2D(dst,-1,disc,dst)
# threshold and binary AND
ret,thresh = cv2.threshold(dst,50,255,0)
thresh_one = thresh.copy()
thresh = cv2.merge((thresh,thresh,thresh))
res = cv2.bitwise_and(target,thresh)
# Implementing morphological erosion & dilation
kernel = np.ones((9,9),np.uint8) # (6,6) to get more contours (9,9) to reduce noise
thresh_one = cv2.erode(thresh_one, kernel, iterations = 3)
thresh_one = cv2.dilate(thresh_one, kernel, iterations=2)
# Invert the image
thresh_one = invert_img(thresh_one)
# To show prev img
#res = np.vstack((target,thresh,res))
#cv2.imwrite('res.jpg',res)
#cv2.waitKey(0)
#cv2.imshow('Before contours', thresh_one)
cnt_target = target.copy()
cnt_full = target.copy()
# Code to draw the contours
contours, hierarchy = cv2.findContours(thresh_one.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(contours, key = cv2.contourArea, reverse = True)
print time() - time_1
size_min = 200
size_max = 5000
cnt_target = view_all_contours(target, size_min, size_max)
cv2.drawContours(cnt_full, cnts, -1,(0,0,255),2)
res = imutils.resize(thresh_one, height = 700)
cv2.imshow('Original image', target)
cv2.imshow('Preprocessed', thresh_one)
cv2.imshow('All contours', cnt_full)
cv2.imshow('Filtered contours', cnt_target)
cv2.waitKey(0)
I am trying to preform face tracking with the Lucas Kanade algorithm with Haar Cascade Classification. The Lucas Kanade is successful and can track the user, but unfortunately, some of the good features to detect points are wasted on corners in the background. I wish to use Haar Cascade's ability to detect the fact to get coordinates of detected face and apply Lucas Kanade to only within that restricted area.
Basically, I want to use Haar Cascade to detect fact, get x, y, w, and h values, and use those coordinates to apply Lucas Kanade within that restricted area (so that none are wasted on assigning good features to the background and only facial features are detected)
The line of code that is doing the Lucas Kanade algorithm is this code:
p0 = cv2.goodFeaturesToTrack(old_gray, mask = None, **feature_params)
How do I do that?
Code:
from matplotlib import pyplot as plt
import numpy as np
import cv2
rectangle_x = 0
face_classifier = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_default.xml')
cap = cv2.VideoCapture(0)
# params for ShiTomasi corner detection
feature_params = dict( maxCorners = 200,
qualityLevel = 0.01,
minDistance = 10,
blockSize = 7 )
# Parameters for lucas kanade optical flow
lk_params = dict( winSize = (15,15),
maxLevel = 2,
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
# Create some random colors
color = np.random.randint(0,255,(100,3))
# Take first frame and find corners in it
ret, old_frame = cap.read()
cv2.imshow('Old_Frame', old_frame)
cv2.waitKey(0)
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
restart = True
face = face_classifier.detectMultiScale(old_gray, 1.2, 4)
if len(face) == 0:
print "This is empty"
for (x,y,w,h) in face:
focused_face = old_frame[y: y+h, x: x+w]
cv2.imshow('Old_Frame', old_frame)
face_gray = cv2.cvtColor(old_frame,cv2.COLOR_BGR2GRAY)
gray = cv2.cvtColor(focused_face,cv2.COLOR_BGR2GRAY)
corners_t = cv2.goodFeaturesToTrack(gray, mask = None, **feature_params)
corners = np.int0(corners_t)
for i in corners:
ix,iy = i.ravel()
cv2.circle(focused_face,(ix,iy),3,255,-1)
cv2.circle(old_frame,(x+ix,y+iy),3,255,-1)
print ix, " ", iy
plt.imshow(old_frame),plt.show()
##########
#############################
p0 = cv2.goodFeaturesToTrack(old_gray, mask = None, **feature_params)
#############################
# Create a mask image for drawing purposes
mask = np.zeros_like(old_frame)
print "X: ", x
print "Y: ", y
while(1):
ret,frame = cap.read()
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# calculate optical flow
p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)
# Select good points
good_new = p1[st==1]
good_old = p0[st==1]
# draw the circles
for i,(new,old) in enumerate(zip(good_new,good_old)):
a,b = new.ravel()
c,d = old.ravel()
cv2.circle(frame,(a, b),5,color[i].tolist(),-1)
if i == 99:
break
cv2.imshow('frame',frame)
k = cv2.waitKey(30) & 0xff
if k == 27:
break
# Now update the previous frame and previous points
old_gray = frame_gray.copy()
p0 = good_new.reshape(-1,1,2)
cv2.destroyAllWindows()
cap.release()
Here is the code snippet:
p0 = np.array([[[x,y]], [[x0,y0]]], np.float32)
just replace p0 in original code and and assign x,x0... with your desired points
- make sure its a 2d array
- and the type is float 32 for single precision