opencv stereo camera calibration - opencv

I am working on stereo camera calibration with OpenCV according to the standard tutorial given by http://docs.opencv.org/2.4.11/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html#stereorectify. However, the calibrated output is not good and the rms value is 78.26. I already tried any available solutions I can find from Google, while none of them can work.
Detail implementation:
I use 13 image pairs to find object points and image point with the below code.
def getCalibrateParams(leftImgPath, rightImgPath):
# termination criteria
w = 9
h = 7
chess_size = (9, 7)
chess_size_r = (7,9)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
#objp = np.zeros((np.prod(chess_size),3), np.float32)
#objp[:,:2] = np.indices(chess_size).T.reshape(-1,2)
objp = np.zeros((w*h, 3), np.float32)
objp[:,:2] = np.mgrid[0:w, 0:h].T.reshape(-1,2)
# Arrays to store object points and image points from all the images.
objpoints = [] # 3d point in real world space
leftImgpoints = [] # 2d points in image plane.
rightImgPoints = []
leftImg = glob.glob(leftImgPath)
rightImg = glob.glob(rightImgPath)
for fname in leftImg:
img = cv2.imread(fname)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# Find the chess board corners
ret, corners = cv2.findChessboardCorners(gray, (w,h), None)
if not ret:
raise ChessboardNotFoundError('No chessboard could be found!')
else:
objpoints.append(objp)
#increase the accuracy of seeking for corners
cv2.cornerSubPix(gray,corners,(11,11),(-1,-1),criteria)
leftImgpoints.append(corners)
# Draw and display the corners
#cv2.drawChessboardCorners(img, chess_size, corners,ret)
#cv2.imshow('img',img)
#cv2.waitKey()
for fname in rightImg:
img = cv2.imread(fname)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret, corners = cv2.findChessboardCorners(gray, chess_size_r)
if not ret:
raise ChessboardNotFoundError('No chessboard could be found!')
else:
#increase the accuracy of seeking for corners
cv2.cornerSubPix(gray,corners,(11,11),(-1,-1),criteria)
rightImgPoints.append(corners)
return objpoints,leftImgpoints,rightImgPoints
After that, I try to calibrate an image pair with the below code:
objectPoints, imagePoints1, imagePoints2 = getCalibrateParams(leftImgPath, rightImgPath)
#use any image to find the size
img = cv2.imread('/home/wuyang/vr/img/test/test_1_01_02.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
h, w = img.shape[:2]
#single camera calibration to fetch a more accurate camera matrix
ret1, cameraMatrix1, distCoeffs1, rvecs1, tvecs1 = cv2.calibrateCamera(objectPoints, imagePoints1, gray.shape[::-1],None, None)
ret2, cameraMatrix2, distCoeffs2, rvecs2, tvecs2 = cv2.calibrateCamera(objectPoints, imagePoints2, gray.shape[::-1],None, None)
print ret1, ret2
stereo_criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
stereo_flags = cv2.CALIB_FIX_INTRINSIC
rms, cameraMatrix1,distCoeffs1, cameraMatrix2, distCoeffs2, R, T = cv2.stereoCalibrate(objectPoints, imagePoints1,
imagePoints2, imageSize = (w,h),
cameraMatrix1 = cameraMatrix1, distCoeffs1 = distCoeffs1,
cameraMatrix2 = cameraMatrix2, distCoeffs2 = distCoeffs2,
criteria = stereo_criteria, flags = stereo_flags)[:-2]
print 'stereo calibration result: ',rms
#print cv2.CALIB_FIX_INTRINSIC 256
#print cv2.CALIB_USE_INTRINSIC_GUESS 1
#print cv2.CALIB_FIX_PRINCIPAL_POINT 4
#print cv2.CALIB_FIX_FOCAL_LENGTH 16
#print cv2.CALIB_FIX_ASPECT_RATIO 2
#print cv2.CALIB_SAME_FOCAL_LENGTH 512
#print cv2.CALIB_RATIONAL_MODEL 16384
#print cv2.CALIB_ZERO_TANGENT_DIST 8
#print cv2.CALIB_FIX_K1 32
#print cv2.CALIB_FIX_K2 64
#print cv2.CALIB_FIX_K3 128
#print cv2.CALIB_FIX_K4 2048
#print cv2.CALIB_FIX_K5 4096
#print cv2.CALIB_FIX_K6 8192
'''
print 'rms value:', rms
print 'cameraMatrix1:\n', cameraMatrix1
print 'cameraMatrix2:\n', cameraMatrix2
print 'disCoeffs1:\n', distCoeffs1
print 'disCoeffs2:\n', distCoeffs2
print 'rotation vector:\n', R
print 'translation vector:\n', T
'''
#left camera calibration test
'''
computeReprojectionError(objectPoints, imagePoints1, rvecs1, tvecs1, cameraMatrix1, distCoeffs1)
newcameramtx1, roi1 = getCameraMatrix(img, cameraMatrix1, distCoeffs1)
undistort(img, cameraMatrix1, distCoeffs1, newcameramtx1, roi1)
'''
R1, R2, P1, P2, Q = cv2.stereoRectify(cameraMatrix1, distCoeffs1, cameraMatrix2, distCoeffs2,
(w,h), R, T, flags = 0, alpha = -1)[:-2]
# distort images
undistort_map1, rectify_map1 = cv2.initUndistortRectifyMap(cameraMatrix1, distCoeffs1, R1, P1, (w,h), cv2.CV_32FC1)
undistort_map2, rectify_map2 = cv2.initUndistortRectifyMap(cameraMatrix2, distCoeffs2, R2, P2, (w,h), cv2.CV_32FC1)
lpath = '/home/wuyang/vr/img/test/test_2_01_01.jpg'
rpath = '/home/wuyang/vr/img/test/test_2_01_02.jpg'
lImg = cv2.imread(lpath)
rImg = cv2.imread(rpath)
#undistor_output1 = cv2.undistort(test,undistort_map1, rectify_map1, None, newcameramtx)
undistor_output1 = cv2.remap(lImg, undistort_map1, rectify_map1, cv2.INTER_LINEAR)
undistor_output2 = cv2.remap(rImg, undistort_map2, rectify_map2, cv2.INTER_LINEAR)
cv2.imwrite('ss.jpg', undistor_output1)
The flow is quite standard while the output is not good.
The left image to be calibrated: http://imgur.com/8WvzTvc
The calibrated result: enter link description here
Please help to see how to get a reasonable good calibrated result. Thanks a lot!

I would say your captured photos are just not good enough... That is a too high value of rms error. Analyze carefully your pairs of photos and see if they are not blurred. Additionally capture a little more pairs of photos, from different points of view, different distances to the camera and always having examples of the chessboard on the borders of the images. A good calibration should have an error under 0.5. Notice that a bad pair of images could increase highly your error.

Related

Camera calibration with OpenCV-python for autonomous car doesn't work well

Problems
doesn't work well
When I use my code with my Image, it doesn't work well.
I only edited 'wc' and 'hc' from OpenCV DOC
import glob
import cv2 as cv
import numpy as np
wc = 7
hc = 4
# termination criteria
criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
objp = np.zeros((wc * hc, 3), np.float32)
objp[:, :2] = np.mgrid[0:hc, 0:wc].T.reshape(-1, 2)
# Arrays to store object points and image points from all the images.
objpoints = [] # 3d point in real world space
imgpoints = [] # 2d points in image plane.
images = glob. Glob('1.jpg')
for fname in images:
img = cv.imread(fname)
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# Find the chess board corners
ret, corners = cv.findChessboardCorners(gray, (hc, wc), None)
# If found, add object points, image points (after refining them)
print(ret, wc, hc)
if True:
objpoints.append(objp)
corners2 = cv.cornerSubPix(gray, corners, (20, 20), (-1, -1),
criteria) # image, corners, winSize, zeroZone, criteria
imgpoints.append(corners2)
# Draw and display the corners
cv.drawChessboardCorners(img, (hc, wc), corners2, ret)
cv.imwrite('ChessboardCorners.png', img)
cv.waitKey(0)
ret, mtx, dist, rvecs, tvecs = cv.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None)
img = cv.imread('1.jpg')
print(img.shape[:2])
h, w = img.shape[:2]
newcameramtx, roi = cv.getOptimalNewCameraMatrix(mtx, dist, (w, h), 1, (w, h))
# undistort
dst = cv.undistort(img, mtx, dist, None, newcameramtx)
# crop the image
x, y, w, h = roi
dst = dst[y:y + h, x:x + w]
cv.imwrite('calibresult.png', dst)
cv.waitKey(0)
mean_error = 0
for i in range(len(objpoints)):
imgpoints2, _ = cv.projectPoints(objpoints[i], rvecs[i], tvecs[i], mtx, dist)
error = cv.norm(imgpoints[i], imgpoints2, cv.NORM_L2) / len(imgpoints2)
mean_error += error
print("total error: {}".format(mean_error / len(objpoints)))
print("\n\n", fname, "claer")
cv.destroyAllWindows()
exit(0)
original image - not well
ChessboardCorners - (I'm not sure that this is not well)
calibresult image - not well
works well with other images
But, when I use my code with the Image which was in the example in OpenCV DOC, it works well.
import glob
import cv2 as cv
import numpy as np
wc = 6
hc = 7
# termination criteria
criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
objp = np.zeros((wc * hc, 3), np.float32)
objp[:, :2] = np.mgrid[0:hc, 0:wc].T.reshape(-1, 2)
# Arrays to store object points and image points from all the images.
objpoints = [] # 3d point in real world space
imgpoints = [] # 2d points in image plane.
images = glob. Glob('img.png')
for fname in images:
img = cv.imread(fname)
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# Find the chess board corners
ret, corners = cv.findChessboardCorners(gray, (hc, wc), None)
# If found, add object points, image points (after refining them)
print(ret, wc, hc)
if True:
objpoints.append(objp)
corners2 = cv.cornerSubPix(gray, corners, (20, 20), (-1, -1),
criteria) # image, corners, winSize, zeroZone, criteria
imgpoints.append(corners2)
# Draw and display the corners
cv.drawChessboardCorners(img, (hc, wc), corners2, ret)
cv.imwrite('ChessboardCorners.png', img)
cv.waitKey(0)
ret, mtx, dist, rvecs, tvecs = cv.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None)
img = cv.imread('img.png')
print(img.shape[:2])
h, w = img.shape[:2]
newcameramtx, roi = cv.getOptimalNewCameraMatrix(mtx, dist, (w, h), 1, (w, h))
# undistort
dst = cv.undistort(img, mtx, dist, None, newcameramtx)
# crop the image
x, y, w, h = roi
dst = dst[y:y + h, x:x + w]
cv.imwrite('calibresult.png', dst)
cv.waitKey(0)
mean_error = 0
for i in range(len(objpoints)):
imgpoints2, _ = cv.projectPoints(objpoints[i], rvecs[i], tvecs[i], mtx, dist)
error = cv.norm(imgpoints[i], imgpoints2, cv.NORM_L2) / len(imgpoints2)
mean_error += error
print("total error: {}".format(mean_error / len(objpoints)))
print("\n\n", fname, "claer")
cv.destroyAllWindows()
exit(0)
I removed the images because "Your question appears to be spam."
Please see the images on OpenCV DOC
Please, give me the solution to this problem.
Do I need to modify the parameters, or what should I do?
Is my chessboard wrong?
Below is what I have tried.
First, I tried to find correct numbers of 'wc' and 'hc'
I used this code to find.
import glob
import cv2 as cv
import numpy as np
for i in range(3, 50):
for j in range(i + 1, 50): # I used this code becuase I found that the order of the variables does not matter last time.
wc = i
hc = j
# termination criteria
criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
objp = np.zeros((wc * hc, 3), np.float32)
objp[:, :2] = np.mgrid[0:hc, 0:wc].T.reshape(-1, 2)
# Arrays to store object points and image points from all the images.
objpoints = [] # 3d point in real world space
imgpoints = [] # 2d points in image plane.
images = glob.glob('1.jpg')
for fname in images:
img = cv.imread(fname)
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# Find the chess board corners
ret, corners = cv.findChessboardCorners(gray, (hc, wc), None)
# If found, add object points, image points (after refining them)
print(ret, wc, hc)
And the result here:
False 3 4
False 3 5
False 3 6
···
False 4 5
False 4 6
**True 4 7**
False 4 8
False 4 9
···
False 47 48
False 47 49
False 48 49
Process finished with exit code 0
I also found that the Image which was in the example in OpenCV DOC has another 'wc' and 'hc', (4, 4).
And result here:
ChessboardCorners by (4, 4
calibresult by (4, 4)
So, I'm expecting that the 'wc'and 'hc' of my Image (4, 7) might be small.
Should I increase the max and do a brute-force search again?
I can't tell for sure, but it looks like you are only using a single input image for calibration. If that's true, try increasing to at least 5 images (more would be better) and see if that helps. The images should be at different angles and distances.
I notice that your lens has significant distortion. A few suggestions for getting that to work well:
Use the rational model for distortion - the basic kappa 1, kappa 2 model won't do well.
Your data set will need to include image points from all parts of the image, including near the edges and corners of the image. This can be difficult/impossible to achieve using the normal chessboard pattern (because the entire pattern must be visible in the image) - I suggest using the ChAruco calibration pattern/functions. This uses a modified chessboard pattern that includes Aruco markers embedded in the white squares, which allows for partial patterns to be used.
Note that the wc and hc parameters you are searching for are used to describe the chessboard pattern width and height. This should be known to you ahead of time and you shouldn't need to search for it.

Pixel per mm calculation of an image using camera calibration matrix and object distance is not same as pixel dimension of object in MS Paint

I want to calculate number of pixels per grid (i.e. pixels per 11 mm) of the checkerboard. I am doing this to validate that mm/pixel calculation I obtain using calibration matrix and formula (below) is same as what I will see when I open the image in MS Paint.
For the checkerboard image (1920x1080 resolution): 11 grid x 7 grid in size with each grid as 11x 11 mm, at a distance of 500 mm (picture below).
I compute a calibration matrix using code:
import cv2
import numpy as np
import pathlib
#from utils import *
import glob
from argparse import ArgumentParser
topview_image_path = 'checkerboard_top\*.png'
camera_orientation = 'topview'
if camera_orientation == 'topview':
image_path = topview_image_path
def calibrate_chessboard(folder):
# Defining the dimensions of checkerboard
CHECKERBOARD = (6,9)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# Creating vector to store vectors of 3D points for each checkerboard image
objpoints = []
# Creating vector to store vectors of 2D points for each checkerboard image
imgpoints = []
# Defining the world coordinates for 3D points
objp = np.zeros((1, CHECKERBOARD[0] * CHECKERBOARD[1], 3), np.float32)
objp[0,:,:2] = np.mgrid[0:CHECKERBOARD[0], 0:CHECKERBOARD[1]].T.reshape(-1, 2)
prev_img_shape = None
# Extracting path of individual image stored in a given directory
print("image path:", image_path)
images = glob.glob(image_path)
#images = glob.glob(f'{folder}/*.png')
# if len(images) == 0:
# images = glob.glob(f'{folder}/*.jpg')
# print(images)
for fname in images:
img = cv2.imread(fname)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# Find the chess board corners
# If desired number of corners are found in the image then ret = true
ret, corners = cv2.findChessboardCorners(gray, CHECKERBOARD, cv2.CALIB_CB_ADAPTIVE_THRESH + cv2.CALIB_CB_FAST_CHECK + cv2.CALIB_CB_NORMALIZE_IMAGE)
"""
If desired number of corner are detected,
we refine the pixel coordinates and display
them on the images of checker board
"""
if ret == True:
objpoints.append(objp)
# refining pixel coordinates for given 2d points.
corners2 = cv2.cornerSubPix(gray, corners, (11,11),(-1,-1), criteria)
imgpoints.append(corners2)
# Draw and display the corners
img = cv2.drawChessboardCorners(img, CHECKERBOARD, corners2, ret)
cv2.imshow('img',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
h,w = img.shape[:2]
"""
Performing camera calibration by
passing the value of known 3D points (objpoints)
and corresponding pixel coordinates of the
detected corners (imgpoints)
"""
ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None)
return [ret, mtx, dist, rvecs, tvecs]
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument('--type', dest='type',type=str, default='topview',help='is the image topview or sideview?')
parser.add_argument('--folder', dest='folder',type=str, default='cal_images/checkerboard_topview',help='is the image topview or sideview?')
args = parser.parse_args()
WIDTH = 6
HEIGHT = 9
# Calibrate
ret, mtx, dist, rvecs, tvecs = calibrate_chessboard(args.folder)
print(mtx)
print(dist)
mtx_list = ["calibration matrix:\n", str(mtx),
"\ndistortion matrix:", str(dist)]
txt_file = "matrix_" + camera_orientation +".txt"
with open(txt_file, mode='wt', encoding='utf-8') as myfile:
myfile.write('\n'.join(mtx_list))
and get the calibration matrix as:
M = [[2.86276094e+03 0.00000000e+00 8.23315889e+02]
[0.00000000e+00 2.86846709e+03 5.80987675e+02]
[0.00000000e+00 0.00000000e+00 1.00000000e+00]]
This gives me the focal length (f) in pixel units (M[0][0]) i.e. 2862.
I then calculate size in pixels (X_sizepx) of the checkerboard grid 11 mm (X_sizemm) object at distance Z (in my case 500mm) using formula:
X_sizepx = (f/Z) * X_sizemm
Substituting all the values: f = 2862, Z =500, X_sizemm = 11, I get 62.94. So as per opencv calibration and the formula, 11mm should ~63 pixels.
I then open the checkerboard image in MS paint to see the pixel dimension of the square grid in pixels and it says 41 pixels (image below).
This is a big difference if I were to use camera calibration matrix and the the formula. Is there something I am doing wrong?
Note: Z can never be more than 530 mm if I were to assume that Z might be slightly off.

Segmenting products on the shelf

I am trying to detect edges from the products on a shelf using histogram projections. But I am stuck at 2 levels.
The challenges that I m facing are:
How to get the longest non shelf segment from the image i.e Detect the width of the widest product on the shelf from the available one.
How to achieve morphological reconstruction using custom markers.To eliminate
all small horizontal segments, I am generating 2 markers which can be seen in 'markers.png' (Attached). With them, I am calculating the minimum of the reconstruction outputs from both the markers.
Need assistance on this.
Thanks a lot
Below is my python code for the same.
Below is my python code
********************************************************************************
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
import math
# Read the input image
img = cv.imread('C:\\Users\\672059\\Desktop\\p2.png')
# Converting from BGR to RGB. Default is BGR.
# img_rgb = cv.cvtColor(img, cv.COLOR_BGR2RGB)
# Resize the image to 150,150
img_resize = cv.resize(img, (150, 150))
# Get the dimensions of the image
img_h, img_w, img_c = img_resize.shape
# Split the image on channels
red = img[:, :, 0]
green = img[:, :, 1]
blue = img[:, :, 2]
# Defining a vse for erosion
vse = np.ones((img_h, img_w), dtype=np.uint8)
# Morphological Erosion for red channel
red_erode = cv.erode(red, vse, iterations=1)
grad_red = cv.subtract(red, red_erode)
# Morphological Erosion for green channel
green_erode = cv.erode(green, vse, iterations=1)
grad_green = cv.subtract(green, green_erode)
# Morphological Erosion for blue channel
blue_erode = cv.erode(blue, vse, iterations=1)
grad_blue = cv.subtract(blue, blue_erode)
# Stacking the individual channels into one processed image
grad = [grad_red, grad_green, grad_blue]
retrieved_img = np.stack(grad, axis=-1)
retrieved_img = retrieved_img.astype(np.uint8)
retrieved_img_gray = cv.cvtColor(retrieved_img, cv.COLOR_RGB2GRAY)
plt.title('Figure 1')
plt.imshow(cv.bitwise_not(retrieved_img_gray), cmap=plt.get_cmap('gray'))
plt.show()
# Hough Transform of the image to get the longest non shelf boundary from the image!
edges = cv.Canny(retrieved_img_gray, 127, 255)
minLineLength = img_w
maxLineGap = 10
lines = cv.HoughLinesP(edges, 1, np.pi/180, 127, minLineLength=1, maxLineGap=1)
temp = img.copy()
l = []
for x in range(0, len(lines)):
for x1, y1, x2, y2 in lines[x]:
cv.line(temp, (x1, y1), (x2, y2), (0, 255, 0), 2)
d = math.sqrt((x2-x1)**2 + (y2-y1)**2)
l.append(d)
# Defining a hse for erosion
hse = np.ones((1, 7), dtype=np.uint8)
opening = cv.morphologyEx(retrieved_img_gray, cv.MORPH_OPEN, hse)
plt.title('Figure 2')
plt.subplot(1, 2, 1), plt.imshow(img)
plt.subplot(1, 2, 2), plt.imshow(cv.bitwise_not(opening), 'gray')
plt.show()
# Dilation with disk shaped structuring element
horizontal_size = 7
horizontalstructure = cv.getStructuringElement(cv.MORPH_ELLIPSE, (horizontal_size, 1))
dilation = cv.dilate(opening, horizontalstructure)
plt.title('Figure 3')
plt.imshow(cv.bitwise_not(dilation), 'gray')
plt.show()
# Doing canny edge on dilated image
edge = cv.Canny(dilation, 127, 255)
plt.title('Figure 4')
plt.imshow(edges, cmap='gray')
plt.show()
h_projection = edge.sum(axis=1)
print(h_projection)
plt.title('Projection')
plt.plot(h_projection)
plt.show()
listing = []
for i in range(1, len(h_projection)-1):
if h_projection[i-1] == 0 and h_projection[i] == 0:
listing.append(dilation[i])
listing.append(dilation[i-1])
a = np.array([np.array(b) for b in l])
h = len(l)
_, contours, _ = cv.findContours(a, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
x, y, w, h = cv.boundingRect(contours[0])
y = y + i - h
cv.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)
l.clear()
plt.imshow(img)
plt.show()
# Generating a mask
black_bg = np.ones([img_h, img_w], dtype=np.uint8)
# Clone the black bgd image
left = black_bg.copy()
right = black_bg.copy()
# Taking 10% of the image width
ten = int(0.1 * img_w)
left[:, 0:ten+1] = 0
right[:, img_w-ten:img_w+1] = 0
plt.title('Figure 4')
plt.subplot(121), plt.imshow(left, 'gray')
plt.subplot(122), plt.imshow(right, 'gray')
plt.show()
# Marker = left and right. Mask = dilation
mask = dilation
marker_left = left
marker_right = right
********************************************************************************
markers.png link: https://i.stack.imgur.com/45WJ6.png
********************************************************************************
Based on you input image, I would :
take a picture of an empty fridge
then compare the current image with the empty one.
play with morphological operations
get connected components > size N
If you can't take a empty fridge image:
segment the shelves (threshold white parts)
undo do the rotation of the image by using image moments of the shelves
for each shelve:
Threshold on saturation
Do a vertical projection
Count maxima.
Tresholded:
Erode-dilate:
Connected componens (width > 10 * height + > minsize):
And you have shelves.
Now take the average Y form each shelf and cut the original image in pieces:
Dither to 8 colors:
and threshold:
Connected components (h>1.5*w, minsize... this is hard here, I played with it :)

Difficult time trying to do shape recognition for 3D objects

I am trying to make a shape recognition classifier in which if you give an individual picture of an object (from a scene), it would be able to classify (after machine learning) the shape of an object (cylinder, cube, sphere, etc).
Original scene:
Individual objects it will classify:
I attempted to do this using cv2.approxPolyDB with an attempt to classify a cylinder. However, either my implementation isn't good or this wasn't a good choice of an algorithm to choose in the first place, the objects in the shape of cylinders were assigned a approxPolyDB value of 3 or 4.
Perhaps I can threshold and, in general, if given a value of 3 or 4, assume the object is a cylinder, but I feel like it's not the most reliable method for 3D shape classification. I feel like there is a better way to implement this and a better method as opposed to just hardcoding values. I feel like that with this method, it can easily confuse a cylinder with a cube.
Is there any way I can improve my 3D shape recognition program?
Code:
import cv2
import numpy as np
from pyimagesearch import imutils
from PIL import Image
from time import time
def invert_img(img):
img = (255-img)
return img
def threshold(im):
imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
imgray = cv2.medianBlur(imgray,9)
imgray = cv2.Canny(imgray,75,200)
return imgray
def view_all_contours(im, size_min, size_max):
main = np.array([[]])
cnt_target = im.copy()
for c in cnts:
epsilon = 0.1*cv2.arcLength(c,True)
approx = cv2.approxPolyDP(c,epsilon,True)
area = cv2.contourArea(c)
print 'area: ', area
test = im.copy()
# To weed out contours that are too small or big
if area > size_min and area < size_max:
print c[0,0]
print 'approx: ', len(approx)
max_pos = c.max(axis=0)
max_x = max_pos[0,0]
max_y = max_pos[0,1]
min_pos = c.min(axis=0)
min_x = min_pos[0,0]
min_y = min_pos[0,1]
# Load each contour onto image
cv2.drawContours(cnt_target, c, -1,(0,0,255),2)
print 'Found object'
frame_f = test[min_y:max_y , min_x:max_x]
main = np.append(main, approx[None,:][None,:])
thresh = frame_f.copy()
thresh = threshold(thresh)
contours_small, hierarchy = cv2.findContours(thresh.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cnts_small = sorted(contours_small, key = cv2.contourArea, reverse = True)
cv2.drawContours(frame_f, cnts_small, -1,(0,0,255),2)
cv2.imshow('Thresh', thresh)
cv2.imshow('Show Ya', frame_f)
cv2.waitKey(0)
# Uncomment in order to show all rectangles in image
print '---------------------------------------------'
#cv2.drawContours(cnt_target, cnts, -1,(0,255,0),2)
print main.shape
print main
return cnt_target
time_1 = time()
roi = cv2.imread('images/beach_trash_3.jpg')
hsv = cv2.cvtColor(roi,cv2.COLOR_BGR2HSV)
target = cv2.imread('images/beach_trash_3.jpg')
target = imutils.resize(target, height = 400)
hsvt = cv2.cvtColor(target,cv2.COLOR_BGR2HSV)
img_height = target.shape[0]
img_width = target.shape[1]
# calculating object histogram
roihist = cv2.calcHist([hsv],[0, 1], None, [180, 256], [0, 180, 0, 256] )
# normalize histogram and apply backprojection
cv2.normalize(roihist,roihist,0,255,cv2.NORM_MINMAX)
dst = cv2.calcBackProject([hsvt],[0,1],roihist,[0,180,0,256],1)
# Now convolute with circular disc
disc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
cv2.filter2D(dst,-1,disc,dst)
# threshold and binary AND
ret,thresh = cv2.threshold(dst,50,255,0)
thresh_one = thresh.copy()
thresh = cv2.merge((thresh,thresh,thresh))
res = cv2.bitwise_and(target,thresh)
# Implementing morphological erosion & dilation
kernel = np.ones((9,9),np.uint8) # (6,6) to get more contours (9,9) to reduce noise
thresh_one = cv2.erode(thresh_one, kernel, iterations = 3)
thresh_one = cv2.dilate(thresh_one, kernel, iterations=2)
# Invert the image
thresh_one = invert_img(thresh_one)
# To show prev img
#res = np.vstack((target,thresh,res))
#cv2.imwrite('res.jpg',res)
#cv2.waitKey(0)
#cv2.imshow('Before contours', thresh_one)
cnt_target = target.copy()
cnt_full = target.copy()
# Code to draw the contours
contours, hierarchy = cv2.findContours(thresh_one.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(contours, key = cv2.contourArea, reverse = True)
print time() - time_1
size_min = 200
size_max = 5000
cnt_target = view_all_contours(target, size_min, size_max)
cv2.drawContours(cnt_full, cnts, -1,(0,0,255),2)
res = imutils.resize(thresh_one, height = 700)
cv2.imshow('Original image', target)
cv2.imshow('Preprocessed', thresh_one)
cv2.imshow('All contours', cnt_full)
cv2.imshow('Filtered contours', cnt_target)
cv2.waitKey(0)

OpenCV: Lucas Kanade applied within certain area (to detect facial features)

I am trying to preform face tracking with the Lucas Kanade algorithm with Haar Cascade Classification. The Lucas Kanade is successful and can track the user, but unfortunately, some of the good features to detect points are wasted on corners in the background. I wish to use Haar Cascade's ability to detect the fact to get coordinates of detected face and apply Lucas Kanade to only within that restricted area.
Basically, I want to use Haar Cascade to detect fact, get x, y, w, and h values, and use those coordinates to apply Lucas Kanade within that restricted area (so that none are wasted on assigning good features to the background and only facial features are detected)
The line of code that is doing the Lucas Kanade algorithm is this code:
p0 = cv2.goodFeaturesToTrack(old_gray, mask = None, **feature_params)
How do I do that?
Code:
from matplotlib import pyplot as plt
import numpy as np
import cv2
rectangle_x = 0
face_classifier = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_default.xml')
cap = cv2.VideoCapture(0)
# params for ShiTomasi corner detection
feature_params = dict( maxCorners = 200,
qualityLevel = 0.01,
minDistance = 10,
blockSize = 7 )
# Parameters for lucas kanade optical flow
lk_params = dict( winSize = (15,15),
maxLevel = 2,
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
# Create some random colors
color = np.random.randint(0,255,(100,3))
# Take first frame and find corners in it
ret, old_frame = cap.read()
cv2.imshow('Old_Frame', old_frame)
cv2.waitKey(0)
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
restart = True
face = face_classifier.detectMultiScale(old_gray, 1.2, 4)
if len(face) == 0:
print "This is empty"
for (x,y,w,h) in face:
focused_face = old_frame[y: y+h, x: x+w]
cv2.imshow('Old_Frame', old_frame)
face_gray = cv2.cvtColor(old_frame,cv2.COLOR_BGR2GRAY)
gray = cv2.cvtColor(focused_face,cv2.COLOR_BGR2GRAY)
corners_t = cv2.goodFeaturesToTrack(gray, mask = None, **feature_params)
corners = np.int0(corners_t)
for i in corners:
ix,iy = i.ravel()
cv2.circle(focused_face,(ix,iy),3,255,-1)
cv2.circle(old_frame,(x+ix,y+iy),3,255,-1)
print ix, " ", iy
plt.imshow(old_frame),plt.show()
##########
#############################
p0 = cv2.goodFeaturesToTrack(old_gray, mask = None, **feature_params)
#############################
# Create a mask image for drawing purposes
mask = np.zeros_like(old_frame)
print "X: ", x
print "Y: ", y
while(1):
ret,frame = cap.read()
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# calculate optical flow
p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)
# Select good points
good_new = p1[st==1]
good_old = p0[st==1]
# draw the circles
for i,(new,old) in enumerate(zip(good_new,good_old)):
a,b = new.ravel()
c,d = old.ravel()
cv2.circle(frame,(a, b),5,color[i].tolist(),-1)
if i == 99:
break
cv2.imshow('frame',frame)
k = cv2.waitKey(30) & 0xff
if k == 27:
break
# Now update the previous frame and previous points
old_gray = frame_gray.copy()
p0 = good_new.reshape(-1,1,2)
cv2.destroyAllWindows()
cap.release()
Here is the code snippet:
p0 = np.array([[[x,y]], [[x0,y0]]], np.float32)
just replace p0 in original code and and assign x,x0... with your desired points
- make sure its a 2d array
- and the type is float 32 for single precision

Resources