Cannot open display in WSL 2, py-qt5 - opencv

How to display the application in windows.
Code for Reference:
from tkinter import N
import numpy as np
from keras.preprocessing.image import img_to_array
import cv2
import imutils
from keras.models import load_model
import numpy as np
# parameters for loading data and images
detection_model_path = 'ER_Project//haar-cascade-files-master/haarcascade_frontalface_default.xml'
emotion_model_path = 'ER_Project/_mini_XCEPTION.102-0.66.hdf5'
# hyper-parameters for bounding boxes shape
# loading models
face_detection = cv2.CascadeClassifier(detection_model_path)
emotion_classifier = load_model(emotion_model_path, compile=False)
EMOTIONS = ["angry", "disgust", "scared", "happy", "sad", "surprised",
"neutral"]
#feelings_faces = []
# for index, emotion in enumerate(EMOTIONS):
# feelings_faces.append(cv2.imread('emojis/' + emotion + '.png', -1))
# starting video streaming
cv2.namedWindow('your_face')
camera = cv2.VideoCapture(0)
while True:
print("Hello")
frame = camera.read()[1]
# reading the frame
frame = imutils.resize(frame, width=300)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = face_detection.detectMultiScale(
gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30), flags=cv2.CASCADE_SCALE_IMAGE)
canvas = np.zeros((250, 300, 3), dtype="uint8")
frameClone = frame.copy()
if len(faces) > 0:
faces = sorted(faces, reverse=True,
key=lambda x: (x[2] - x[0]) * (x[3] - x[1]))[0]
(fX, fY, fW, fH) = faces
# Extract the ROI of the face from the grayscale image, resize it to a fixed 28x28 pixels, and then prepare
# the ROI for classification via the CNN
roi = gray[fY:fY + fH, fX:fX + fW]
roi = cv2.resize(roi, (64, 64))
roi = roi.astype("float") / 255.0
roi = img_to_array(roi)
roi = np.expand_dims(roi, axis=0)
preds = emotion_classifier.predict(roi)[0]
emotion_probability = np.max(preds)
label = EMOTIONS[preds.argmax()]
else:
continue
for (i, (emotion, prob)) in enumerate(zip(EMOTIONS, preds)):
# construct the label text
text = "{}: {:.2f}%".format(emotion, prob * 100)
# draw the label + probability bar on the canvas
# emoji_face = feelings_faces[np.argmax(preds)]
w = int(prob * 300)
cv2.rectangle(canvas, (7, (i * 35) + 5),
(w, (i * 35) + 35), (0, 0, 255), -1)
cv2.putText(canvas, text, (10, (i * 35) + 23),
cv2.FONT_HERSHEY_SIMPLEX, 0.45,
(255, 255, 255), 2)
cv2.putText(frameClone, label, (fX, fY - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)
cv2.rectangle(frameClone, (fX, fY), (fX + fW, fY + fH),
(0, 0, 255), 2)
# for c in range(0, 3):
# frame[200:320, 10:130, c] = emoji_face[:, :, c] * \
# (emoji_face[:, :, 3] / 255.0) + frame[200:320,
# 10:130, c] * (1.0 - emoji_face[:, :, 3] / 255.0)
cv2.imshow('your_face', frameClone)
cv2.imshow("Probabilities", canvas)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
camera.release()
cv2.destroyAllWindows()
NUMA SUPPORT:
2022-04-20 04:36:21.181568: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:922] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-04-20 04:36:21.181664: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3951 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1660 Ti, pci bus id: 0000:01:00.0, compute capability: 7.5
I need to run this openCV gui app on windows.

Related

Why the output of Dense Optical Flow in OpenCV is not continuous?

I want to track whether worker throw garbage into the truck manually in the video.
Q1: Is Dense Optical Flow in OpenCV a good solution for me?
Q2: I tried to code a sample. But why the flow image is not continuous?
frame 41 and 43 is good , but frame 42 is black
Q3: Can I spy on the color change in a small area to track if garbage through ?
small area like this:
here is my code. you can run my code and video in my github repository
https://github.com/Pinocchio2018/QuestionHelper/blob/main/openCV_related/dence_optical_flow_problem/test.py
import numpy as np
import cv2 as cv
def put_frame_no(image, frame_no):
# font
font = cv.FONT_HERSHEY_SIMPLEX
# org
org = (50, 450)
# fontScale
font_scale = 2
# Blue color in BGR
color = (0, 0, 255)
# Line thickness of 2 px
thickness = 2
# Using cv2.putText() method
image = cv.putText(image, "frame no: " + str(frame_no), org, font,
font_scale, color, thickness, cv.LINE_AA)
return image
cap = cv.VideoCapture(cv.samples.findFile("0116-sample4-edited-short-throw.mp4"))
ret, frame1 = cap.read()
prv_frame = cv.cvtColor(frame1, cv.COLOR_BGR2GRAY)
hsv = np.zeros_like(frame1)
hsv[..., 1] = 255
cv.namedWindow("flow image", cv.WINDOW_NORMAL)
cv.resizeWindow("flow image", 800, 600)
frame_no = 0
while 1:
ret, origin_img = cap.read()
if not ret:
print('No frames grabbed!')
break
next_frame = cv.cvtColor(origin_img, cv.COLOR_BGR2GRAY)
flow = cv.calcOpticalFlowFarneback(prv_frame, next_frame, None, 0.5, 3, 15, 3, 5, 1.2, 0)
mag, ang = cv.cartToPolar(flow[..., 0], flow[..., 1])
hsv[..., 0] = ang * 180 / np.pi / 2
hsv[..., 2] = cv.normalize(mag, None, 0, 255, cv.NORM_MINMAX)
flow_image = cv.cvtColor(hsv, cv.COLOR_HSV2BGR)
flow_image = put_frame_no(flow_image, frame_no)
origin_img = put_frame_no(origin_img, frame_no)
frame_no += 1
vis_frame = np.concatenate((origin_img, flow_image), axis=1)
cv.imshow('flow image', vis_frame)
# cv.imshow('origin', flow_image)
k = cv.waitKey(30) & 0xff
if k == 27:
break
elif k == ord('s'):
cv.imwrite('opticalfb.png', origin_img)
cv.imwrite('opticalhsv.png', flow_image)
prv_frame = next_frame
cv.destroyAllWindows()

How to implementation ONNX file for real time semantic segmentation using Deep Neural Network

i have a problem in my code as shown in this code
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
labels = ['Background', 'Korosi', 'Tanah', 'Tanaman']
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")
net = cv2.dnn.readNetFromONNX('anomali_model1.onnx')
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers)]
capture = cv2.VideoCapture(0)
while True: re, img = capture.read()
#img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
#height, width, channels = img.shape
#blob = cv2.dnn.blobFromImage(img, 0.00392, (256, 256),
#swapRB=True, crop=False)
blob = cv2.dnn.blobFromImage(img, swapRB=True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(len(classes), 3))
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
color = colors[class_ids[i]]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
cv2.putText(img, label, (x, y + 30), font, 2, color, 3)
cv2.imshow("Image",cv2.resize(img, (800,600)))
if cv2.waitKey(1) & 0xFF == ord('q'):
break
video_capture.release()
cv2.destroyAllWindows()
And i get error like this:
error Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_10644\3517982312.py in <module>
9 blob = cv2.dnn.blobFromImage(img, swapRB=True, crop=False)
10 net.setInput(blob)
---> 11 outs = net.forward(output_layers)
12
13 class_ids = []
error: OpenCV(3.4.17) D:\a\opencv-python\opencv-python\opencv\modules\dnn\src\layers\convolution_layer.cpp:331: error: (-2:Unspecified error) Number of input channels should be multiple of 3 but got 640 in function 'cv::dnn::ConvolutionLayerImpl::getMemoryShapes'
can you help me to solve this problem? because I've looked into various sources but did not find a solution.
library version for this code
python version 3.7
tensorflow version 2.0
opencv version 3.4.17
I hope you all can solve this problem and share with me

Real-time OCR Videstreaming is lagging when running real-time and implementing text recognition

I'm done installing packages and debugging it. This code is from https://github.com/LaggyHammer/real-time-OCR. When I run it, my camera or the video-streaming gets lag or it has delayed from my end.
Here's the code of what I executed through CLI.
# coding: utf-8
# =====================================================================
# Filename: video_text_detection.py
#
# py Ver: python 3.6 or later
#
# Description: Recognizes regions of text in a given video or through the webcam feed
#
# Usage: python real_time_ocr.py --east frozen_east_text_detection.pb
# or
# python real_time_ocr.py --east frozen_east_text_detection.pb --video test.avi
#
# Note: Requires opencv 3.4.2 or later
# For more in-script documentation, look at video_text_detection_modular.py
#
# Author: Ankit Saxena (ankch24#gmail.com)
# =====================================================================
from imutils.video import VideoStream
from imutils.video import FPS
from imutils.object_detection import non_max_suppression
import numpy as np
import argparse
import imutils
import time
import cv2
import pytesseract
# setting up tesseract path
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
def box_extractor(scores, geometry, min_confidence):
num_rows, num_cols = scores.shape[2:4]
rectangles = []
confidences = []
for y in range(num_rows):
scores_data = scores[0, 0, y]
x_data0 = geometry[0, 0, y]
x_data1 = geometry[0, 1, y]
x_data2 = geometry[0, 2, y]
x_data3 = geometry[0, 3, y]
angles_data = geometry[0, 4, y]
for x in range(num_cols):
if scores_data[x] < min_confidence:
continue
offset_x, offset_y = x * 4.0, y * 4.0
angle = angles_data[x]
cos = np.cos(angle)
sin = np.sin(angle)
box_h = x_data0[x] + x_data2[x]
box_w = x_data1[x] + x_data3[x]
end_x = int(offset_x + (cos * x_data1[x]) + (sin * x_data2[x]))
end_y = int(offset_y + (cos * x_data2[x]) - (sin * x_data1[x]))
start_x = int(end_x - box_w)
start_y = int(end_y - box_h)
rectangles.append((start_x, start_y, end_x, end_y))
confidences.append(scores_data[x])
return rectangles, confidences
def get_arguments():
ap = argparse.ArgumentParser()
ap.add_argument('-v', '--video', type=str,
help='path to optional video file')
ap.add_argument('-east', '--east', type=str, required=True,
help='path to EAST text detection model')
ap.add_argument('-c', '--min_confidence', type=float, default=0.5,
help='minimum confidence to process a region')
ap.add_argument('-w', '--width', type=int, default=320,
help='resized image width (multiple of 32)')
ap.add_argument('-e', '--height', type=int, default=320,
help='resized image height (multiple of 32)')
ap.add_argument('-p', '--padding', type=float, default=0.0,
help='padding on each ROI border')
arguments = vars(ap.parse_args())
return arguments
if __name__ == '__main__':
args = get_arguments()
w, h = None, None
new_w, new_h = args['width'], args['height']
ratio_w, ratio_h = None, None
layer_names = ['feature_fusion/Conv_7/Sigmoid', 'feature_fusion/concat_3']
print("[INFO] loading EAST text detector...")
net = cv2.dnn.readNet(args["east"])
if not args.get('video', False):
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
time.sleep(0)
else:
vs = cv2.VideoCapture(args['video'])
fps = FPS().start()
while True:
frame = vs.read()
frame = frame[1] if args.get('video', False) else frame
if frame is None:
break
frame = imutils.resize(frame, width=500)
orig = frame.copy()
orig_h, orig_w = orig.shape[:2]
if w is None or h is None:
h, w = frame.shape[:2]
ratio_w = w / float(new_w)
ratio_h = h / float(new_h)
frame = cv2.resize(frame, (new_w, new_h))
blob = cv2.dnn.blobFromImage(frame, 1.0, (new_w, new_h), (123.68, 116.78, 103.94),
swapRB=True, crop=False)
net.setInput(blob)
scores, geometry = net.forward(layer_names)
rectangles, confidences = box_extractor(scores, geometry, min_confidence=args['min_confidence'])
boxes = non_max_suppression(np.array(rectangles), probs=confidences)
for (start_x, start_y, end_x, end_y) in boxes:
start_x = int(start_x * ratio_w)
start_y = int(start_y * ratio_h)
end_x = int(end_x * ratio_w)
end_y = int(end_y * ratio_h)
dx = int((end_x - start_x) * args['padding'])
dy = int((end_y - start_y) * args['padding'])
start_x = max(0, start_x - dx)
start_y = max(0, start_y - dy)
end_x = min(orig_w, end_x + (dx * 2))
end_y = min(orig_h, end_y + (dy * 2))
# ROI to be recognized
roi = orig[start_y:end_y, start_x:end_x]
# recognizing text
config = '-l eng --oem 1 --psm 7'
text = pytesseract.image_to_string(roi, config=config)
cv2.rectangle(orig, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)
cv2.putText(orig, text, (start_x, start_y - 20),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3)
fps.update()
cv2.imshow("Detection", orig)
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
break
fps.stop()
print(f"[INFO] elapsed time {round(fps.elapsed(), 2)}")
print(f"[INFO] approx. FPS : {round(fps.fps(), 2)}")
if not args.get('video', False):
vs.stop()
else:
vs.release()
cv2.destroyAllWindows()
Is there a way easiest way to make the videostreaming smoother with this code?

playing cards detection with custom Yolo with OpenCv. How to know the inputs and outputs from the custom Yolo .cfg file

I want to detect playing cards and found .cfg and .weights for it. Classes has 52cards names. Following code is giving index out of range error. I couldn't understand the outputs of Yolo and how to get the detected labels. I am new to this, have been trying to understand. Can someone please help!
import cv2
import numpy as np
# Load Yolo
net = cv2.dnn.readNet("yolocards_608.weights", "yolocards.cfg")
classes = []
with open("cards.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
# Loading image
img = cv2.imread("playing_cards_image.jpg")
img = cv2.resize(img, None, fx=0.4, fy=0.4)
height, width, channels = img.shape
# Detecting objects
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
# Showing informations on the screen
class_ids = []
confidences = []
boxes = []
for out in outs:
print(out.shape)
for detection in out:
scores = detection[:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
font = cv2.FONT_HERSHEY_PLAIN
for j in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
print(class_ids[i])
label = str(classes[class_ids[i]])
print(label)
color = colors[i]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
cv2.putText(img, label, (x, y + 30), font, 3, color, 3)
error:
0
Ah
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-46-adaf82305ab8> in <module>
6 label = str(classes[class_ids[i]])
7 print(label)
----> 8 color = colors[i]
9 cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
10 cv2.putText(img, label, (x, y + 30), font, 3, color, 3)
IndexError: index 52 is out of bounds for axis 0 with size 52

Moroccan License Plate Recognition (LPR) using OpenCV and Tesseract

I'm working on a project about recognizing moroccan license plates which look like this image :
Moroccan License Plate
Please how can I use OpenCV to cut the license plate out and Tesseract to read the numbers and arabic letter in the middle.
I have looked into this research paper : https://www.researchgate.net/publication/323808469_Moroccan_License_Plate_recognition_using_a_hybrid_method_and_license_plate_features
I have installed OpenCV and Tesseract for python in Windows 10. When I run the tesseract on the text only part of the license plate using "fra" language I get 7714315l Bv. How can I separate the data?
Edit:
The arabic letters we use in Morocco are :
أ ب ت ج ح د هـ
The expected result is : 77143 د 6
The vertical lines are irrelevant, I have to use them to separate the image and read data separately.
Thanks in advance!
You can use HoughTransform since the two vertical lines are irrelevant, to crop the image:
import numpy as np
import cv2
image = cv2.imread("lines.jpg")
grayImage = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
dst = cv2.Canny(grayImage, 0, 150)
cv2.imwrite("canny.jpg", dst)
lines = cv2.HoughLinesP(dst, 1, np.pi / 180, 50, None, 60, 20)
lines_x = []
# Get height and width to constrain detected lines
height, width, channels = image.shape
for i in range(0, len(lines)):
l = lines[i][0]
# Check if the lines are vertical or not
angle = np.arctan2(l[3] - l[1], l[2] - l[0]) * 180.0 / np.pi
if (l[2] > width / 4) and (l[0] > width / 4) and (70 < angle < 100):
lines_x.append(l[2])
# To draw the detected lines
#cv2.line(image, (l[0], l[1]), (l[2], l[3]), (0, 0, 255), 3, cv2.LINE_AA)
#cv2.imwrite("lines_found.jpg", image)
# Sorting to get the line with the maximum x-coordinate for proper cropping
lines_x.sort(reverse=True)
crop_image = "cropped_lines"
for i in range(0, len(lines_x)):
if i == 0:
# Cropping to the end
img = image[0:height, lines_x[i]:width]
else:
# Cropping from the start
img = image[0:height, 0:lines_x[i]]
cv2.imwrite(crop_image + str(i) + ".jpg", img)
I am sure you know now how to get the middle part ;)
Hope it helps!
EDIT:
Using some morphological operations, you can also extract the characters individually:
import numpy as np
import cv2
image = cv2.imread("lines.jpg")
grayImage = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
dst = cv2.Canny(grayImage, 50, 100)
dst = cv2.morphologyEx(dst, cv2.MORPH_RECT, np.zeros((5,5), np.uint8),
iterations=1)
cv2.imwrite("canny.jpg", dst)
im2, contours, heirarchy = cv2.findContours(dst, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_NONE)
for i in range(0, len(contours)):
if cv2.contourArea(contours[i]) > 200:
x,y,w,h = cv2.boundingRect(contours[i])
# The w constrain to remove the vertical lines
if w > 10:
cv2.rectangle(image, (x, y), (x+w, y+h), (0, 0, 255), 1)
cv2.imwrite("contour.jpg", image)
Result:
This what I achieved by now...
The detection on second image was made by using the code found here: License plate detection with OpenCV and Python
Full code (which work from the third image an on) is this:
import cv2
import numpy as np
import tesserocr as tr
from PIL import Image
image = cv2.imread("cropped.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cv2.imshow('gray', image)
thresh = cv2.adaptiveThreshold(gray, 250, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 255, 1)
cv2.imshow('thresh', thresh)
kernel = np.ones((1, 1), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
im2, ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
clean_plate = 255 * np.ones_like(img_dilation)
for i, ctr in enumerate(sorted_ctrs):
x, y, w, h = cv2.boundingRect(ctr)
roi = img_dilation[y:y + h, x:x + w]
# these are very specific values made for this image only - it's not a factotum code
if h > 70 and w > 100:
rect = cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
clean_plate[y:y + h, x:x + w] = roi
cv2.imshow('ROI', rect)
cv2.imwrite('roi.png', roi)
img = cv2.imread("roi.png")
blur = cv2.medianBlur(img, 1)
cv2.imshow('4 - blur', blur)
pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
api = tr.PyTessBaseAPI()
try:
api.SetImage(pil_img)
boxes = api.GetComponentImages(tr.RIL.TEXTLINE, True)
text = api.GetUTF8Text()
finally:
api.End()
# clean the string a bit
text = str(text).strip()
plate = ""
# 77143-1916 ---> NNNNN|symbol|N
for char in text:
firstSection = text[:5]
# the arabic symbol is easy because it's nearly impossible for the OCR to misunderstood the last 2 digit
# so we have that the symbol is always the third char from the end (right to left)
symbol = text[-3]
lastChar = text[-1]
plate = firstSection + "[" + symbol + "]" + lastChar
print(plate)
cv2.waitKey(0)
For arabic symbols you should install additional languages from TesseractOCR (and possibly use the version 4 of it).
Output: 77143[9]6
The number between brackets is the arabic symbol (undetected).
Hope I helped you.

Resources