Replace openCV face detection with MTCNN algorithm - opencv

I am using openCV for face detection. Sometimes, openCV makes an issue to detect faces. the function name is
def detectFace(img_path):
img = functions.detectFace(img_path)[0] #detectFace returns (1, 224, 224, 3)
return img[:, :, ::-1] #bgr to rgb
I want above output from MTCNN algorithm
detectFace function code
def detectFace(img, target_size=(224, 224), grayscale = False, enforce_detection = True):
img_path = ""
img = "/media/khawar/HDD_Khawar/Projects/" + img
print(img)
#-----------------------
exact_image = False
if type(img).__module__ == np.__name__:
exact_image = True
base64_img = False
if len(img) > 11 and img[0:11] == "data:image/":
base64_img = True
#-----------------------
opencv_path = get_opencv_path()
face_detector_path = opencv_path+"haarcascade_frontalface_default.xml"
eye_detector_path = opencv_path+"haarcascade_eye.xml"
if os.path.isfile(face_detector_path) != True:
raise ValueError("Confirm that opencv is installed on your environment! Expected path ",face_detector_path," violated.")
#--------------------------------
face_detector = cv2.CascadeClassifier(face_detector_path)
eye_detector = cv2.CascadeClassifier(eye_detector_path)
if base64_img == True:
img = loadBase64Img(img)
elif exact_image != True: #image path passed as input
if os.path.isfile(img) != True:
raise ValueError("Confirm that ",img," exists")
img = cv2.imread(img)
img_raw = img.copy()
#--------------------------------
faces = []
try:
faces = face_detector.detectMultiScale(img, 1.3, 5)
except:
pass
#print("found faces in ",image_path," is ",len(faces))
if len(faces) > 0:
print(faces[0])
x,y,w,h = faces[0]
detected_face = img[int(y):int(y+h), int(x):int(x+w)]
detected_face_gray = cv2.cvtColor(detected_face, cv2.COLOR_BGR2GRAY)
#---------------------------
#face alignment
eyes = eye_detector.detectMultiScale(detected_face_gray)
if len(eyes) >= 2:
#find the largest 2 eye
base_eyes = eyes[:, 2]
items = []
for i in range(0, len(base_eyes)):
item = (base_eyes[i], i)
items.append(item)
df = pd.DataFrame(items, columns = ["length", "idx"]).sort_values(by=['length'], ascending=False)
eyes = eyes[df.idx.values[0:2]]
#-----------------------
#decide left and right eye
eye_1 = eyes[0]; eye_2 = eyes[1]
if eye_1[0] < eye_2[0]:
left_eye = eye_1
right_eye = eye_2
else:
left_eye = eye_2
right_eye = eye_1
#-----------------------
#find center of eyes
left_eye_center = (int(left_eye[0] + (left_eye[2] / 2)), int(left_eye[1] + (left_eye[3] / 2)))
left_eye_x = left_eye_center[0]; left_eye_y = left_eye_center[1]
right_eye_center = (int(right_eye[0] + (right_eye[2]/2)), int(right_eye[1] + (right_eye[3]/2)))
right_eye_x = right_eye_center[0]; right_eye_y = right_eye_center[1]
#-----------------------
#find rotation direction
if left_eye_y > right_eye_y:
point_3rd = (right_eye_x, left_eye_y)
direction = -1 #rotate same direction to clock
else:
point_3rd = (left_eye_x, right_eye_y)
direction = 1 #rotate inverse direction of clock
#-----------------------
#find length of triangle edges
a = distance(left_eye_center, point_3rd)
b = distance(right_eye_center, point_3rd)
c = distance(right_eye_center, left_eye_center)
#-----------------------
#apply cosine rule
if b != 0 and c != 0: #this multiplication causes division by zero in cos_a calculation
cos_a = (b*b + c*c - a*a)/(2*b*c)
angle = np.arccos(cos_a) #angle in radian
angle = (angle * 180) / math.pi #radian to degree
#-----------------------
#rotate base image
if direction == -1:
angle = 90 - angle
img = Image.fromarray(img_raw)
img = np.array(img.rotate(direction * angle))
#you recover the base image and face detection disappeared. apply again.
faces = face_detector.detectMultiScale(img, 1.3, 5)
if len(faces) > 0:
x,y,w,h = faces[0]
detected_face = img[int(y):int(y+h), int(x):int(x+w)]
#-----------------------
#face alignment block end
#---------------------------
#face alignment block needs colorful images. that's why, converting to gray scale logic moved to here.
if grayscale == True:
detected_face = cv2.cvtColor(detected_face, cv2.COLOR_BGR2GRAY)
detected_face = cv2.resize(detected_face, target_size)
img_pixels = image.img_to_array(detected_face)
img_pixels = np.expand_dims(img_pixels, axis = 0)
#normalize input in [0, 1]
img_pixels /= 255
return img_pixels
else:
if (exact_image == True) or (enforce_detection != True):
if grayscale == True:
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = cv2.resize(img, target_size)
img_pixels = image.img_to_array(img)
img_pixels = np.expand_dims(img_pixels, axis = 0)
img_pixels /= 255
return img_pixels
else:
print(img)
raise ValueError("Face could not be detected. Please confirm that the picture is a face photo or consider to set enforce_detection param to False.")

Try this out .
import mtcnn
import matplotlib.pyplot as plt
# load image from file
filename = "glediston-bastos-ZtmmR9D_2tA-unsplash.jpg"
pixels = plt.imread(filename)
print("Shape of image/array:",pixels.shape)
imgplot = plt.imshow(pixels)
plt.show()
# draw an image with detected objects
def draw_facebox(filename, result_list):
# load the image
data = plt.imread(filename)
# plot the image
plt.imshow(data)
# get the context for drawing boxes
ax = plt.gca()
# plot each box
for result in result_list:
# get coordinates
x, y, width, height = result['box']
# create the shape
rect = plt.Rectangle((x, y), width, height, fill=False, color='green')
# draw the box
ax.add_patch(rect)
# show the plot
plt.show()
# filename = 'test1.jpg' # filename is defined above, otherwise uncomment
# load image from file
# pixels = plt.imread(filename) # defined above, otherwise uncomment
# detector is defined above, otherwise uncomment
#detector = mtcnn.MTCNN()
# detect faces in the image
faces = detector.detect_faces(pixels)
# display faces on the original image
draw_facebox(filename, faces)
# draw the dots
for key, value in result['keypoints'].items():
# create and draw dot
dot = plt.Circle(value, radius=20, color='orange')
ax.add_patch(dot)

You are using detectFace function within deepface? It currently wraps opencv, ssd, dlib and mtcnn to detect and align faces.
def detectFace(img_path):
backends = ['opencv', 'ssd', 'dlib', 'mtcnn']
img = functions.detectFace(img_path, detector_backend = backends[3])[0] #detectFace returns (1, 224, 224, 3)
return img[:, :, ::-1] #bgr to rgb
The result of the detectFace function is detected and aligned with mtcnn now.
Besides, you can run face recognition with mtcnn backend as well.
from deepface import DeepFace
obj = DeepFace.verify("img1.jpg", "img2.jpg", detector_backend = 'mtcnn')

Related

Why the output of Dense Optical Flow in OpenCV is not continuous?

I want to track whether worker throw garbage into the truck manually in the video.
Q1: Is Dense Optical Flow in OpenCV a good solution for me?
Q2: I tried to code a sample. But why the flow image is not continuous?
frame 41 and 43 is good , but frame 42 is black
Q3: Can I spy on the color change in a small area to track if garbage through ?
small area like this:
here is my code. you can run my code and video in my github repository
https://github.com/Pinocchio2018/QuestionHelper/blob/main/openCV_related/dence_optical_flow_problem/test.py
import numpy as np
import cv2 as cv
def put_frame_no(image, frame_no):
# font
font = cv.FONT_HERSHEY_SIMPLEX
# org
org = (50, 450)
# fontScale
font_scale = 2
# Blue color in BGR
color = (0, 0, 255)
# Line thickness of 2 px
thickness = 2
# Using cv2.putText() method
image = cv.putText(image, "frame no: " + str(frame_no), org, font,
font_scale, color, thickness, cv.LINE_AA)
return image
cap = cv.VideoCapture(cv.samples.findFile("0116-sample4-edited-short-throw.mp4"))
ret, frame1 = cap.read()
prv_frame = cv.cvtColor(frame1, cv.COLOR_BGR2GRAY)
hsv = np.zeros_like(frame1)
hsv[..., 1] = 255
cv.namedWindow("flow image", cv.WINDOW_NORMAL)
cv.resizeWindow("flow image", 800, 600)
frame_no = 0
while 1:
ret, origin_img = cap.read()
if not ret:
print('No frames grabbed!')
break
next_frame = cv.cvtColor(origin_img, cv.COLOR_BGR2GRAY)
flow = cv.calcOpticalFlowFarneback(prv_frame, next_frame, None, 0.5, 3, 15, 3, 5, 1.2, 0)
mag, ang = cv.cartToPolar(flow[..., 0], flow[..., 1])
hsv[..., 0] = ang * 180 / np.pi / 2
hsv[..., 2] = cv.normalize(mag, None, 0, 255, cv.NORM_MINMAX)
flow_image = cv.cvtColor(hsv, cv.COLOR_HSV2BGR)
flow_image = put_frame_no(flow_image, frame_no)
origin_img = put_frame_no(origin_img, frame_no)
frame_no += 1
vis_frame = np.concatenate((origin_img, flow_image), axis=1)
cv.imshow('flow image', vis_frame)
# cv.imshow('origin', flow_image)
k = cv.waitKey(30) & 0xff
if k == 27:
break
elif k == ord('s'):
cv.imwrite('opticalfb.png', origin_img)
cv.imwrite('opticalhsv.png', flow_image)
prv_frame = next_frame
cv.destroyAllWindows()

Hough Line Transform in Python keeps throwing error

I'm trying to use opencv to do a simple Lane Keep Assist System but I'm getting this error from the Hough Transform function:
cv2.error: OpenCV(4.6.0)
/io/opencv/modules/highgui/src/precomp.hpp:155: error: (-215:Assertion
failed) src_depth != CV_16F && src_depth != CV_32S in function
'convertToShow'
This is the code I have:
import cv2
import numpy as np
def detect_line_segments(frame):
# tuning min_threshold, minLineLength, maxLineGap is a trial and error process by hand
rho = 1 # distance precision in pixel, i.e. 1 pixel
angle = np.pi / 180 # angular precision in radian, i.e. 1 degree
min_threshold = 2 # minimal of votes
# frame = frame.astype(np.uint8)
line_segments = cv2.HoughLinesP(frame, rho, angle, min_threshold, minLineLength=8, maxLineGap=4)
return line_segments
def detectEdges(frame):
rho = 1
angle = np.pi / 180
min_threshold = 10
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
lower_blue = np.array([60, 40, 40])
upper_blue = np.array([150, 255, 255])
mask = cv2.inRange(hsv, lower_blue, upper_blue)
mask = cv2.resize(mask, (960, 540))
# cv2.imshow('Test', mask)
# cv2.waitKey(0)
edges = cv2.Canny(mask, 200, 400)
#edgesUpdt = np.array(edges, dtype=np.uint8)
# cv2.imshow('Test', edgesUpdt)
# cv2.waitKey(0)
return edges
def region_of_interest(edges):
print(edges)
height, width = edges.shape
mask = np.zeros_like(edges)
# only focus bottom half of the screen
polygon = np.array([[
(0, height * 1 / 2),
(width, height * 1 / 2),
(width, height),
(0, height),
]], np.int32)
cv2.fillPoly(mask, polygon, 255)
cropped_edges = cv2.bitwise_and(edges, mask)
cv2.imshow('Test', cropped_edges)
cv2.waitKey(0)
return cropped_edges
def detect_line_segments(cropped_edges):
# cropped_edges = cropped_edges.astype(np.float32)
cv2.imshow('Test', cropped_edges)
cv2.waitKey(0)
# tuning min_threshold, minLineLength, maxLineGap is a trial and error process by hand
rho = 2 # distance precision in pixel, i.e. 1 pixel
angle = np.pi / 60 # angular precision in radian, i.e. 1 degree
min_threshold = 50 # minimal of votes
line_segments = cv2.HoughLinesP(cropped_edges, rho, angle, min_threshold,
np.array([], dtype=np.uint8), minLineLength=40, maxLineGap=80)
cv2.imshow('Test', line_segments)
cv2.waitKey(0)
return line_segments
def main():
frame = cv2.imread(r'/home/a1ph4/Desktop/LKAS system/Media/image.jpg')
edges = detectEdges(frame)
# test1 = region_of_interest(edges)
croppedEdges = detect_line_segments(edges)
if __name__ == '__main__':
main()
And this is the image I'm using
Image
Please help.

HSV Range for Line Follower for various light conditions

I have the following problem: when detecting a while line under various lighting conditions, a mask (based on HSV) results in good performance in only one scenario (very bright or very shaded areas). As seen below.
My code is as follows, I am using HSV. The threshold for upper and lower is a constant value (+x/-x)
## SHADE
shadeLower1 = np.array([127,30,117] , dtype=np.uint8)
shadeUpper1 = np.array([147,51,138], dtype=np.uint8)
## SUN
sunLower2 = np.array([4,0,184], dtype=np.uint8)
sunUpper2 = np.array([104,57,255], dtype=np.uint8)
mask1 = cv2.inRange(hsv, shadeLower1, shadeUpper1)
mask2 = cv2.inRange(hsv, sunLower2, sunUpper2)
mask = cv2.max(mask1, mask2)
For instance, it will be fine in the shaded region (the white tape is perfect) and once it reaches the sunny area, the mask window is just saturated with white and I loose my white object.
Any help would be appreciated in what to do!
Shaded Area
Sunny Area
I mostly did the same thing you did for thresholding, but I used bitwise_and instead of bitwise_or (bitwise_or is the same as cv2.max). The lines are a little messy, but hopefully good enough for you to use. You might be able to clean them up more if you take the hue channel into account to exclude the red (I avoided it since white is technically all hues).
It might even be worth it to try and filter across multiple color spaces and combine the masks.
import cv2
import numpy as np
# find path and return its contour
def findPath(hsv):
# threshold on s an v channel
h,s,v = cv2.split(hsv);
mask1 = cv2.inRange(s, 0, 45);
mask2 = cv2.inRange(v, 115, 255);
mask3 = cv2.bitwise_and(mask1, mask2, mask = None);
# close
kernel = np.ones((5,5),np.uint8)
mask3 = cv2.dilate(mask3,kernel,iterations = 1);
mask3 = cv2.erode(mask3,kernel, iterations = 1);
# open
mask3 = cv2.erode(mask3,kernel,iterations = 1);
mask3 = cv2.dilate(mask3,kernel, iterations = 1);
# find contours
_, contours, _ = cv2.findContours(mask3, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE);
# find biggest contour
biggest = None;
biggest_size = -1;
for contour in contours:
area = cv2.contourArea(contour);
if area > biggest_size:
biggest = contour;
biggest_size = area;
return biggest;
# skeletonize the mask
def skeleton(mask):
# get structure
img = mask.copy();
size = np.size(img);
skel = np.zeros_like(mask);
elem = cv2.getStructuringElement(cv2.MORPH_CROSS,(3,3));
while True:
# skeleton iteration
eroded = cv2.erode(img,elem);
temp = cv2.dilate(eroded,elem);
temp = cv2.subtract(img,temp);
skel = cv2.bitwise_or(skel,temp);
# check for end condition
img = eroded.copy() ;
zeros = size - cv2.countNonZero(img);
if zeros == size:
break;
# connect small gaps
kernel = np.ones((2,2), np.uint8);
skel = cv2.dilate(skel, kernel, iterations = 1);
# filter out little lines
_, contours, _ = cv2.findContours(skel, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE);
# filter contours by size
big_cntrs = [];
for contour in contours:
perimeter = cv2.arcLength(contour, True);
if perimeter > 50:
big_cntrs.append(contour);
thin_lines = np.zeros_like(skel);
thin_lines = cv2.drawContours(thin_lines, big_cntrs, -1, 255, -1);
skel = thin_lines;
# dilate and close to connect lines
kernel = np.ones((3,3), np.uint8)
skel = cv2.dilate(skel, kernel, iterations = 5);
skel = cv2.erode(skel, kernel, iterations = 4);
# show
return skel;
# load image
imgs = [];
l1 = cv2.imread("line1.png");
l2 = cv2.imread("line2.png");
imgs.append(l1);
imgs.append(l2);
# convert
hsvs = [];
for img in imgs:
hsvs.append(cv2.cvtColor(img, cv2.COLOR_BGR2HSV));
# draw contours
masks = [];
for a in range(len(imgs)):
# get contour
contour = findPath(hsvs[a]);
# create mask
mask = np.zeros_like(hsvs[a][:,:,0]);
cv2.drawContours(mask, [contour], -1, (255), -1);
mask = cv2.medianBlur(mask, 5);
masks.append(mask);
# skeleton
skelly_masks = [];
for mask in masks:
skelly = skeleton(mask.copy());
skelly_masks.append(skelly);
# draw on original
for a in range(len(imgs)):
imgs[a][np.where(masks[a] == 255)] = (155,0,0); # 155 to avoid blinding people
imgs[a][np.where(skelly_masks[a] == 255)] = (0,0,155);
cv2.imshow(str(a), imgs[a]);
cv2.imwrite("img" + str(a) + ".png", imgs[a]);
cv2.waitKey(0);

Detect Narrow Line in very noise image

I have performed preprocessing steps in an noisy acoustic image and now I need to detect narrow black lines.
Can you think of a better way to detect these lines?
My goal is to detect the line in the red box in this image.
Failed Answer: - This is not a perfect solution but will require further work to make it robust for various images. I noticed that there is very less noise in the black lines, and thus Canny does not found a lot of edges within this region. Code and results below:-
import numpy as np
import cv2
gray = cv2.imread('2.png')
edges = cv2.Canny(gray,10,60,apertureSize = 7)
cv2.imwrite('2-1-edges-10-60.jpg',edges)
kernel = np.ones((5,5),np.uint8)
closeEdges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
cv2.imwrite('2-2-edges-10-60-dilated-1.jpg',closeEdges)
invertEdges = 255 - closeEdges
cv2.imwrite('2-3-invertedges-10-60.jpg',invertEdges)
minLineLength=100
lines = cv2.HoughLinesP(image=invertEdges,rho=1,theta=np.pi/180, threshold=200,lines=np.array([]), minLineLength=minLineLength,maxLineGap=80)
a,b,c = lines.shape
for i in range(a):
cv2.line(gray, (lines[i][0][0], lines[i][0][1]), (lines[i][0][2], lines[i][0][3]), (0, 0, 255), 1, cv2.LINE_AA)
cv2.imwrite('2-4-houghlines.jpg',gray)
Using connected component on inverse of output image and finding maximum size elements could be helpful.
Another way of approaching this is use of gradient image and directly finding area of small range of gradient magnitude. This approach would be much more flexible as it will not require using fixed threshold values - 10 and 60 as above. Threshold values can be adaptive according to image gradient/you can normalize gradient of image before using hard-coded thresholds.
Better Answer(30-40% accurate)
import numpy as np
import cv2
import os
# Store all images in this folder
path='images-1'
def autocrop(image, threshold=0):
if len(image.shape) == 3:
flatImage = np.max(image, 2)
else:
flatImage = image
rows = np.where(np.max(flatImage, 0) > threshold)[0]
if rows.size:
cols = np.where(np.max(flatImage, 1) > threshold)[0]
image = image[cols[0]: cols[-1] + 1, rows[0]: rows[-1] + 1]
else:
image = image[:1, :1]
return image
def skeleton(img):
size = np.size(img)
skel = np.zeros(img.shape,np.uint8)
element = cv2.getStructuringElement(cv2.MORPH_CROSS,(3,3))
done = False
while( not done):
eroded = cv2.erode(img,element)
temp = cv2.dilate(eroded,element)
temp = cv2.subtract(img,temp)
skel = cv2.bitwise_or(skel,temp)
img = eroded.copy()
zeros = size - cv2.countNonZero(img)
if zeros==size:
done = True
return skel
def gamma_correction(img, correction):
img = img/255.0
img = cv2.pow(img, correction)
return np.uint8(img*255)
def auto_canny(image, sigma=0.33):
# compute the median of the single channel pixel intensities
v = np.median(image)
# apply automatic Canny edge detection using the computed median
lower = int(max(0, (1.0 - sigma) * v))
upper = int(min(255, (1.0 + sigma) * v))
edged = cv2.Canny(image, lower, upper)
# return the edged image
return edged
for file in os.listdir(path):
if file.endswith(".png"):
current = os.path.join(path, file)
img = cv2.imread(current, 0)
print 'processing ' + current
img = autocrop(img, 0)
cv2.imwrite(current + '-0-cropped.jpg', img)
height, width = img.shape[:2]
img = cv2.resize(img, (width, width))
cv2.imwrite(current + '-0-resized.jpg', img)
# cv2.imwrite(current +'-2-auto_canny_default.jpg', auto_canny(img))
# img = cv2.medianBlur(img,5)
# cv2.imwrite(current +'-0-medianBlur.jpg',img)
# th3 = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,11,2)
# cv2.imwrite(current +'-1-threshold_gaussian.jpg',th3)
# laplacian = cv2.Laplacian(img,cv2.CV_64F)
# cv2.imwrite(current + '-3-threshold_gaussian.jpg', laplacian)
#img = cv2.bilateralFilter(img, 3, 3, 5)
edges = cv2.Canny(img,10,20,apertureSize = 5)
cv2.imwrite(current +'-1-edges-10-60.jpg',edges)
kernel = np.ones((3,3),np.uint8)
edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
cv2.imwrite(current +'-1-edgesClosed-10-60.jpg', edges)
edges = 255-edges
cv2.imwrite(current +'-2-edgesClosedInverted-10-60.jpg', edges)
im2, contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
imgColor = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
maxArea = 0
for cnt in contours:
if maxArea < cv2.contourArea(cnt):
maxArea = cv2.contourArea(cnt)
for cnt in contours:
rect = cv2.minAreaRect(cnt) #I have used min Area rect for better result
width = rect[1][0]
height = rect[1][1]
if cv2.contourArea(cnt) > int(maxArea/2.5) and ( width < height/2 or height < width/2):
cv2.drawContours(imgColor, cnt, -1, (0,255,0), 1)
cv2.imwrite(current+'-5-Contours.jpg',imgColor)
# edges = skeleton(255-edges)
# cv2.imwrite(current +'-2-skeleton.jpg', edges)
# edges = 255-edges
# minLineLength=int(width/4)
# threshold = 20
# maxLineGap = 1
# rho = 1
# lines = cv2.HoughLinesP(image=edges,rho=rho,theta=np.pi/180, threshold=threshold,lines=np.array([]), minLineLength=minLineLength,maxLineGap=maxLineGap)
# if lines is not None:
# a,b,c = lines.shape
# for i in range(a):
# cv2.line(img, (lines[i][0][0], lines[i][0][1]), (lines[i][0][2], lines[i][0][3]), (0, 0, 255), 1, cv2.LINE_AA)
# cv2.line(edges, (lines[i][0][0], lines[i][0][1]), (lines[i][0][2], lines[i][0][3]), (0, 0, 255), 1, cv2.LINE_AA)
# cv2.imwrite(current+'-5-houghlines.jpg',img)
# cv2.imwrite(current+'-6-houghlines.jpg',edges)
# print 'cool'
# else:
# cv2.imwrite(current+'-5-houghlines.jpg',img)
Also, do check following links:
Detection of Continuous, Smooth and Thin Edges in Noisy Images Using Constrained Particle Swarm Optimisation
http://www.imagemagick.org/discourse-server/viewtopic.php?t=14491
http://answers.opencv.org/question/3454/detecting-thick-edges/

Difficult time trying to do shape recognition for 3D objects

I am trying to make a shape recognition classifier in which if you give an individual picture of an object (from a scene), it would be able to classify (after machine learning) the shape of an object (cylinder, cube, sphere, etc).
Original scene:
Individual objects it will classify:
I attempted to do this using cv2.approxPolyDB with an attempt to classify a cylinder. However, either my implementation isn't good or this wasn't a good choice of an algorithm to choose in the first place, the objects in the shape of cylinders were assigned a approxPolyDB value of 3 or 4.
Perhaps I can threshold and, in general, if given a value of 3 or 4, assume the object is a cylinder, but I feel like it's not the most reliable method for 3D shape classification. I feel like there is a better way to implement this and a better method as opposed to just hardcoding values. I feel like that with this method, it can easily confuse a cylinder with a cube.
Is there any way I can improve my 3D shape recognition program?
Code:
import cv2
import numpy as np
from pyimagesearch import imutils
from PIL import Image
from time import time
def invert_img(img):
img = (255-img)
return img
def threshold(im):
imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
imgray = cv2.medianBlur(imgray,9)
imgray = cv2.Canny(imgray,75,200)
return imgray
def view_all_contours(im, size_min, size_max):
main = np.array([[]])
cnt_target = im.copy()
for c in cnts:
epsilon = 0.1*cv2.arcLength(c,True)
approx = cv2.approxPolyDP(c,epsilon,True)
area = cv2.contourArea(c)
print 'area: ', area
test = im.copy()
# To weed out contours that are too small or big
if area > size_min and area < size_max:
print c[0,0]
print 'approx: ', len(approx)
max_pos = c.max(axis=0)
max_x = max_pos[0,0]
max_y = max_pos[0,1]
min_pos = c.min(axis=0)
min_x = min_pos[0,0]
min_y = min_pos[0,1]
# Load each contour onto image
cv2.drawContours(cnt_target, c, -1,(0,0,255),2)
print 'Found object'
frame_f = test[min_y:max_y , min_x:max_x]
main = np.append(main, approx[None,:][None,:])
thresh = frame_f.copy()
thresh = threshold(thresh)
contours_small, hierarchy = cv2.findContours(thresh.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cnts_small = sorted(contours_small, key = cv2.contourArea, reverse = True)
cv2.drawContours(frame_f, cnts_small, -1,(0,0,255),2)
cv2.imshow('Thresh', thresh)
cv2.imshow('Show Ya', frame_f)
cv2.waitKey(0)
# Uncomment in order to show all rectangles in image
print '---------------------------------------------'
#cv2.drawContours(cnt_target, cnts, -1,(0,255,0),2)
print main.shape
print main
return cnt_target
time_1 = time()
roi = cv2.imread('images/beach_trash_3.jpg')
hsv = cv2.cvtColor(roi,cv2.COLOR_BGR2HSV)
target = cv2.imread('images/beach_trash_3.jpg')
target = imutils.resize(target, height = 400)
hsvt = cv2.cvtColor(target,cv2.COLOR_BGR2HSV)
img_height = target.shape[0]
img_width = target.shape[1]
# calculating object histogram
roihist = cv2.calcHist([hsv],[0, 1], None, [180, 256], [0, 180, 0, 256] )
# normalize histogram and apply backprojection
cv2.normalize(roihist,roihist,0,255,cv2.NORM_MINMAX)
dst = cv2.calcBackProject([hsvt],[0,1],roihist,[0,180,0,256],1)
# Now convolute with circular disc
disc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
cv2.filter2D(dst,-1,disc,dst)
# threshold and binary AND
ret,thresh = cv2.threshold(dst,50,255,0)
thresh_one = thresh.copy()
thresh = cv2.merge((thresh,thresh,thresh))
res = cv2.bitwise_and(target,thresh)
# Implementing morphological erosion & dilation
kernel = np.ones((9,9),np.uint8) # (6,6) to get more contours (9,9) to reduce noise
thresh_one = cv2.erode(thresh_one, kernel, iterations = 3)
thresh_one = cv2.dilate(thresh_one, kernel, iterations=2)
# Invert the image
thresh_one = invert_img(thresh_one)
# To show prev img
#res = np.vstack((target,thresh,res))
#cv2.imwrite('res.jpg',res)
#cv2.waitKey(0)
#cv2.imshow('Before contours', thresh_one)
cnt_target = target.copy()
cnt_full = target.copy()
# Code to draw the contours
contours, hierarchy = cv2.findContours(thresh_one.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(contours, key = cv2.contourArea, reverse = True)
print time() - time_1
size_min = 200
size_max = 5000
cnt_target = view_all_contours(target, size_min, size_max)
cv2.drawContours(cnt_full, cnts, -1,(0,0,255),2)
res = imutils.resize(thresh_one, height = 700)
cv2.imshow('Original image', target)
cv2.imshow('Preprocessed', thresh_one)
cv2.imshow('All contours', cnt_full)
cv2.imshow('Filtered contours', cnt_target)
cv2.waitKey(0)

Resources