Trying to detect faces using OpenCv's inbuilt face detector - opencv

Face Detection using OpenCV's inbuilt face detector.
What does the code between the asterisks mean?
A caffe model is being used here.
# loop over the detections
for i in range(0, **detections.shape[2]**):
# extract the confidence (i.e., probability) associated with the
# prediction
confidence = **detections[0, 0, i, 2]**
# filter out weak detections by ensuring the `confidence` is
# greater than the minimum confidence
if confidence > args["confidence"]:
# compute the (x, y)-coordinates of the bounding box for the
# object
**box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")**
# draw the bounding box of the face along with the associated
# probability
text = "{:.2f}%".format(confidence * 100)
**y = startY - 10 if startY - 10 > 10 else startY + 10
cv2.rectangle(image, (startX, startY), (endX, endY),
(0, 0, 255), 2)**
cv2.putText(image, text, (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)
# show the output image
cv2.imshow("Output", image)
cv2.waitKey(0)

Related

Cannot open display in WSL 2, py-qt5

How to display the application in windows.
Code for Reference:
from tkinter import N
import numpy as np
from keras.preprocessing.image import img_to_array
import cv2
import imutils
from keras.models import load_model
import numpy as np
# parameters for loading data and images
detection_model_path = 'ER_Project//haar-cascade-files-master/haarcascade_frontalface_default.xml'
emotion_model_path = 'ER_Project/_mini_XCEPTION.102-0.66.hdf5'
# hyper-parameters for bounding boxes shape
# loading models
face_detection = cv2.CascadeClassifier(detection_model_path)
emotion_classifier = load_model(emotion_model_path, compile=False)
EMOTIONS = ["angry", "disgust", "scared", "happy", "sad", "surprised",
"neutral"]
#feelings_faces = []
# for index, emotion in enumerate(EMOTIONS):
# feelings_faces.append(cv2.imread('emojis/' + emotion + '.png', -1))
# starting video streaming
cv2.namedWindow('your_face')
camera = cv2.VideoCapture(0)
while True:
print("Hello")
frame = camera.read()[1]
# reading the frame
frame = imutils.resize(frame, width=300)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = face_detection.detectMultiScale(
gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30), flags=cv2.CASCADE_SCALE_IMAGE)
canvas = np.zeros((250, 300, 3), dtype="uint8")
frameClone = frame.copy()
if len(faces) > 0:
faces = sorted(faces, reverse=True,
key=lambda x: (x[2] - x[0]) * (x[3] - x[1]))[0]
(fX, fY, fW, fH) = faces
# Extract the ROI of the face from the grayscale image, resize it to a fixed 28x28 pixels, and then prepare
# the ROI for classification via the CNN
roi = gray[fY:fY + fH, fX:fX + fW]
roi = cv2.resize(roi, (64, 64))
roi = roi.astype("float") / 255.0
roi = img_to_array(roi)
roi = np.expand_dims(roi, axis=0)
preds = emotion_classifier.predict(roi)[0]
emotion_probability = np.max(preds)
label = EMOTIONS[preds.argmax()]
else:
continue
for (i, (emotion, prob)) in enumerate(zip(EMOTIONS, preds)):
# construct the label text
text = "{}: {:.2f}%".format(emotion, prob * 100)
# draw the label + probability bar on the canvas
# emoji_face = feelings_faces[np.argmax(preds)]
w = int(prob * 300)
cv2.rectangle(canvas, (7, (i * 35) + 5),
(w, (i * 35) + 35), (0, 0, 255), -1)
cv2.putText(canvas, text, (10, (i * 35) + 23),
cv2.FONT_HERSHEY_SIMPLEX, 0.45,
(255, 255, 255), 2)
cv2.putText(frameClone, label, (fX, fY - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)
cv2.rectangle(frameClone, (fX, fY), (fX + fW, fY + fH),
(0, 0, 255), 2)
# for c in range(0, 3):
# frame[200:320, 10:130, c] = emoji_face[:, :, c] * \
# (emoji_face[:, :, 3] / 255.0) + frame[200:320,
# 10:130, c] * (1.0 - emoji_face[:, :, 3] / 255.0)
cv2.imshow('your_face', frameClone)
cv2.imshow("Probabilities", canvas)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
camera.release()
cv2.destroyAllWindows()
NUMA SUPPORT:
2022-04-20 04:36:21.181568: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-04-20 04:36:21.181664: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3951 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1660 Ti, pci bus id: 0000:01:00.0, compute capability: 7.5
I need to run this openCV gui app on windows.

Opencv, combination of chroma key correction and contour detection gives unexpected result

I am doing contour detection on a chroma key corrected image. Everything works fine when I filter out just the blues, but when I try to get a better chroma correction by also filtering the reds, suddenly my contours cannot be detected anymore. Anyone any suggestions?
WITH BLUE FILTER:
img = cv2.imread('yellowcropped.jpg', 1)
lower_blue = np.array([0, 0, 15]) ##[R value, G value, B value]
upper_blue = np.array([255, 255, 60])
mask = cv2.inRange(image_copy, lower_blue, upper_blue)
WITH BLUE AND RED FILTER:
lower_blue = np.array([180, 0, 15]) ##[R value, G value, B value]
upper_blue = np.array([255, 255, 60])
(notice the top left image get's much crisper, but NO CONTOURS are detected anymore.)
BELOW MY CONTOUR FINDING CODE:
imgContour = image_original.copy()
imgBlur = cv2.GaussianBlur(img, (7, 7), 1)
imgGray = imgBlur
imgCanny = cv2.Canny(imgGray,threshold1,threshold2)
kernel = np.ones((5, 5))
imgDil = cv2.dilate(imgCanny, kernel, iterations=1)
getContours(imgDil,imgContour)
def getContours(img,imgContour):
""" DRAWS AND FINDS CONTOURS, THEN RETURNS a list of lists incl x0, y0, w, h"""
contour_list = []
contours, hierarchy = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
# print('contours:', contours)
for cnt in contours:
area = cv2.contourArea(cnt)
areaMin = cv2.getTrackbarPos("Area", "Parameters")
if area > areaMin and area < 5000:
cv2.drawContours(imgContour, cnt, -1, (255, 0, 0), 7)
peri = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.02 * peri, True)
# print(len(approx))
x , y , w, h = cv2.boundingRect(approx)
print('contour bounding:', x,y,w,h)
center_x = int(x + w/2)
center_y = int(y + h/2)
cv2.circle(imgContour,(center_x, center_y), 5, (0, 0, 255), 5)
cv2.rectangle(imgContour, (x , y ), (x + w , y + h ), (0, 255, 0), 5)
cv2.putText(imgContour, "Points: " + str(len(approx)), (x + w + 20, y + 20), cv2.FONT_HERSHEY_COMPLEX, .7,
(0, 255, 0), 2)
cv2.putText(imgContour, "Area: " + str(int(area)), (x + w + 20, y + 45), cv2.FONT_HERSHEY_COMPLEX, 0.7,
(0, 255, 0), 2)
if area < 3500:
cv2.putText(imgContour, "THIS IS A SMALL PART" , (x + w + 20, y + 70), cv2.FONT_HERSHEY_COMPLEX, 0.7,
(0, 255, 0), 2)
contour_list.append([x,y,w,h])
return contour_listenter code here
So i still do not entirely know what went wrong here but I found a solution for anyone in the future looking to first chroma key correct (Remove background) and then do contour detection:
I dropped the gaussian filter, dilate and canny and instead just inverted the image's colours (contour detection only detects white parts on black background) using:
mask = cv2.bitwise_not(mask)
I then changed the contour detection from cv2.RETR_EXTERNAL to cv2.RETR_LIST
Somehow that fixed it, the result is now really good.

openCV Cascade Classifier rectangle box and its 4 coordinates

I am trying to figure rectangle coordinates (4 points) that comes up when a object or face is detected using Cascade Classifier
I am using python and the code is based on the widely available sample:
import cv2
custom_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
def find_face(img):
to_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
object_detected = custom_cascade.detectMultiScale(to_gray, 1.4, 4)
for (x, y, w, h) in object_detected:
cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 1)
# NOTE
# Circle coordinate shows start of the corner (very top left)
# Need to find all four coordinate of the rectangle
cv2.circle(img, (x, y), 2, (0, 255, 0), 2)
return x, y, w, h
How i can get the pixel coordinates and manipulate it? Is it the same with x + w & y + h or some way around. What if i need to find centroid of an object?
(x, y) are the top left coordinates of the rectangle drawn by cv2.rectangle. w is the width and h is the height
You can manipulate that value to get the midpoint
or another way you can use cv2.boundingRect (cnt)
if you want to find centroid change this line cv2.circle(img, (x, y), 2, (0, 255, 0), 2) to this
cent_x = int( x+w/2) cent_y = int(y+h/2) cv2.circle(img, (cent_x,cent_y), 2, (0, 255, 0), 2)
Ok guys,
After brushing up my openCV and maths a bit and went through some docs, this one works:
center_point_x = abs(x + 0.5 * w)
center_point_y = abs(y + 0.5 * h)
cv2.circle(img, (int(center_point_x), int(center_point_y)), 2, (255, 0, 255), 2)
There is possibilities to use numpy as well. But i did a quick one to check for the operation. Hopefully this will help others.
Thank you all.

How to test OpenCV DNN module accuracy? It does not predict correct detections for YOLOv3. Whereas Darknet detector detects correctly

OpenCV DNN module does not predict correct detections for YOLOv3. Whereas the Darknet detector detects correctly.
System information (version)
OpenCV => 4.2.1 and 4.4.x
Operating System / Platform => Ubuntu 18.04 64Bit
I tested results with compiled OpenCV from source code and I tried with pre-built opencv-python also but OpenCV DNN detects wrong objects.
Whereas Darknet detector detects correctly.
Correct detection with darknet detector:
Wrong detection with OpenCV DNN module:
YOLOv3 network and model weights are from https://github.com/AlexeyAB/darknet
modelWeights: yolov3.weights
modelConfiguration: yolov3.cfg
ClassesFile: coco.names
Detailed description
Please see the output images at the link appended below. (correct detection with darknet detector)
compared with the wrong detection (with OpenCV DNN)
Output images available in this Google Drive link.
The above link includes test-images also for steps to test
# The following code is partial to demonstrate steps
net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
layerNames = net.getLayerNames()
layerNames = [layerNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# construct a blob from the input frame and then perform a forward pass of the YOLO object detector,
# giving us our bounding boxes and associated probabilities
blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416),
swapRB=True, crop=False)
net.setInput(blob)
layerOutputs = net.forward(layerNames)
# initialize our lists of detected bounding boxes, confidences,
# and class IDs, respectively
boxes = []
confidences = []
classIDs = []
# loop over each of the layer outputs
for output in layerOutputs:
# loop over each of the detections
for detection in output:
# extract the class ID and confidence (i.e., probability)
# of the current object detection
scores = detection[5:]
classID = np.argmax(scores)
confidence = scores[classID]
# filter out weak predictions by ensuring the detected
# probability is greater than the minimum probability
if confidence > args["confidence"]:
# scale the bounding box coordinates back relative to
# the size of the image, keeping in mind that YOLO
# actually returns the center (x, y)-coordinates of
# the bounding box followed by the boxes' width and
# height
box = detection[0:4] * np.array([W, H, W, H])
(centerX, centerY, width, height) = box.astype("int")
# use the center (x, y)-coordinates to derive the top
# and and left corner of the bounding box
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
# update our list of bounding box coordinates,
# confidences, and class IDs
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
classIDs.append(classID)
# apply non-maxima suppression to suppress weak, overlapping
# bounding boxes
idxs = cv2.dnn.NMSBoxes(boxes, confidences, args["confidence"], args["threshold"])
dets = []
if len(idxs) > 0:
# loop over the indexes we are keeping
for i in idxs.flatten():
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
dets.append([x, y, x+w, y+h, confidences[i]])
if len(boxes) > 0:
i = int(0)
for box in boxes:
# extract the bounding box coordinates
(x, y) = (int(box[0]), int(box[1]))
(w, h) = (int(box[2]), int(box[3]))
# draw a bounding box rectangle and label on the image
# color = [int(c) for c in COLORS[classIDs[i]]]
# cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]]
cv2.rectangle(frame, (x, y), (w, h), color, 2)
cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.75, color, 2)# 1.0 0.5, color, 2)
i += 1
cv2.imwrite("detection-output.jpg", frame)
i think your detection is correct, since all of your labels is car, the problem is the text you have in this line:
cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.75, color, 2)
you should put the class name in the text but i cant find where the text is defined. your code should be like this :
cv2.putText(frame, classes[class_ids[index]], (x + 5, y + 20), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, colors,2)
but in my experience , darknet has better detection than opencv dnn.

'numpy.ndarray' object has no attribute 'split'

I have read that AttributeError: 'numpy.ndarray' object has no attribute 'split' and Why is pytesseract causing AttributeError: 'NoneType' object has no attribute 'bands'? but found no soultion to my answer.
I am following this article https://www.pyimagesearch.com/2018/09/17/opencv-ocr-and-text-recognition-with-tesseract/#comment-481113. This shows how can we detect and extract text on images using tesseacact. I have insalled everything on my system but when i run the code i receive the following error :
Traceback (most recent call last):
File "text_recognition.py", line 157, in <module>
text = pytesseract.image_to_string(roi, config=config)
File "C:\Users\prince.bhatia\AppData\Local\Programs\Python\Python36\lib\site-p
ackages\pytesseract\pytesseract.py", line 104, in image_to_string
if len(image.split()) == 4:
AttributeError: 'numpy.ndarray' object has no attribute 'split'
below is my complete code:
# USAGE
# python text_recognition.py --east frozen_east_text_detection.pb --image images/example_01.jpg
# python text_recognition.py --east frozen_east_text_detection.pb --image images/example_04.jpg --padding 0.05
# import the necessary packages
from imutils.object_detection import non_max_suppression
import numpy as np
import pytesseract
import argparse
import cv2
def decode_predictions(scores, geometry):
# grab the number of rows and columns from the scores volume, then
# initialize our set of bounding box rectangles and corresponding
# confidence scores
(numRows, numCols) = scores.shape[2:4]
rects = []
confidences = []
# loop over the number of rows
for y in range(0, numRows):
# extract the scores (probabilities), followed by the
# geometrical data used to derive potential bounding box
# coordinates that surround text
scoresData = scores[0, 0, y]
xData0 = geometry[0, 0, y]
xData1 = geometry[0, 1, y]
xData2 = geometry[0, 2, y]
xData3 = geometry[0, 3, y]
anglesData = geometry[0, 4, y]
# loop over the number of columns
for x in range(0, numCols):
# if our score does not have sufficient probability,
# ignore it
if scoresData[x] < args["min_confidence"]:
continue
# compute the offset factor as our resulting feature
# maps will be 4x smaller than the input image
(offsetX, offsetY) = (x * 4.0, y * 4.0)
# extract the rotation angle for the prediction and
# then compute the sin and cosine
angle = anglesData[x]
cos = np.cos(angle)
sin = np.sin(angle)
# use the geometry volume to derive the width and height
# of the bounding box
h = xData0[x] + xData2[x]
w = xData1[x] + xData3[x]
# compute both the starting and ending (x, y)-coordinates
# for the text prediction bounding box
endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
startX = int(endX - w)
startY = int(endY - h)
# add the bounding box coordinates and probability score
# to our respective lists
rects.append((startX, startY, endX, endY))
confidences.append(scoresData[x])
# return a tuple of the bounding boxes and associated confidences
return (rects, confidences)
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", type=str,
help="path to input image")
ap.add_argument("-east", "--east", type=str,
help="path to input EAST text detector")
ap.add_argument("-c", "--min-confidence", type=float, default=0.5,
help="minimum probability required to inspect a region")
ap.add_argument("-w", "--width", type=int, default=320,
help="nearest multiple of 32 for resized width")
ap.add_argument("-e", "--height", type=int, default=320,
help="nearest multiple of 32 for resized height")
ap.add_argument("-p", "--padding", type=float, default=0.0,
help="amount of padding to add to each border of ROI")
args = vars(ap.parse_args())
# load the input image and grab the image dimensions
image = cv2.imread(args["image"])
orig = image.copy()
(origH, origW) = image.shape[:2]
# set the new width and height and then determine the ratio in change
# for both the width and height
(newW, newH) = (args["width"], args["height"])
rW = origW / float(newW)
rH = origH / float(newH)
# resize the image and grab the new image dimensions
image = cv2.resize(image, (newW, newH))
(H, W) = image.shape[:2]
# define the two output layer names for the EAST detector model that
# we are interested -- the first is the output probabilities and the
# second can be used to derive the bounding box coordinates of text
layerNames = [
"feature_fusion/Conv_7/Sigmoid",
"feature_fusion/concat_3"]
# load the pre-trained EAST text detector
print("[INFO] loading EAST text detector...")
net = cv2.dnn.readNet(args["east"])
# construct a blob from the image and then perform a forward pass of
# the model to obtain the two output layer sets
blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),
(123.68, 116.78, 103.94), swapRB=True, crop=False)
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)
# decode the predictions, then apply non-maxima suppression to
# suppress weak, overlapping bounding boxes
(rects, confidences) = decode_predictions(scores, geometry)
boxes = non_max_suppression(np.array(rects), probs=confidences)
# initialize the list of results
results = []
# loop over the bounding boxes
for (startX, startY, endX, endY) in boxes:
# scale the bounding box coordinates based on the respective
# ratios
startX = int(startX * rW)
startY = int(startY * rH)
endX = int(endX * rW)
endY = int(endY * rH)
# in order to obtain a better OCR of the text we can potentially
# apply a bit of padding surrounding the bounding box -- here we
# are computing the deltas in both the x and y directions
dX = int((endX - startX) * args["padding"])
dY = int((endY - startY) * args["padding"])
# apply padding to each side of the bounding box, respectively
startX = max(0, startX - dX)
startY = max(0, startY - dY)
endX = min(origW, endX + (dX * 2))
endY = min(origH, endY + (dY * 2))
# extract the actual padded ROI
roi = orig[startY:endY, startX:endX]
print(roi)
# in order to apply Tesseract v4 to OCR text we must supply
# (1) a language, (2) an OEM flag of 4, indicating that the we
# wish to use the LSTM neural net model for OCR, and finally
# (3) an OEM value, in this case, 7 which implies that we are
# treating the ROI as a single line of text
config = ("-l eng --oem 1 --psm 7")
text = pytesseract.image_to_string(roi, config=config)#here i cam receiving error
# add the bounding box coordinates and OCR'd text to the list
# of results
results.append(((startX, startY, endX, endY), text))
# sort the results bounding box coordinates from top to bottom
results = sorted(results, key=lambda r:r[0][1])
# loop over the results
for ((startX, startY, endX, endY), text) in results:
# display the text OCR'd by Tesseract
print("OCR TEXT")
print("========")
print("{}\n".format(text))
# strip out non-ASCII text so we can draw the text on the image
# using OpenCV, then draw the text and a bounding box surrounding
# the text region of the input image
text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
output = orig.copy()
cv2.rectangle(output, (startX, startY), (endX, endY),
(0, 0, 255), 2)
cv2.putText(output, text, (startX, startY - 20),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3)
# show the output image
cv2.imshow("Text Detection", output)
cv2.waitKey(0)

Resources