I want to add some text to be displayed over the video of my webcam but I can't seem to get it. I've added text to an image before with Opencv but the method seems different for videos so How would I go about doing that. This is my webcam script:
import cv2
import numpy as np
# Create a VideoCapture object and read from input file
# If the input is the camera, pass 0 instead of the video file name
cap = cv2.VideoCapture(0)
# Check if camera opened successfully
if (cap.isOpened()== False):
print("Error opening video stream or file")
# Read until video is completed
while(cap.isOpened()):
# Capture frame-by-frame
ret, frame = cap.read()
if ret == True:
# Display the resulting frame
cv2.imshow('Frame',frame)
# Press Q on keyboard to exit
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Break the loop
else:
break
# When everything done, release the video capture object
cap.release()
# Closes all the frames
cv2.destroyAllWindows()
Have a look at the OpenCV's docs about putText. Here's a quick hack I did to display some bounding box labels:
def __draw_label(img, text, pos, bg_color):
font_face = cv2.FONT_HERSHEY_SIMPLEX
scale = 0.4
color = (0, 0, 0)
thickness = cv2.FILLED
margin = 2
txt_size = cv2.getTextSize(text, font_face, scale, thickness)
end_x = pos[0] + txt_size[0][0] + margin
end_y = pos[1] - txt_size[0][1] - margin
cv2.rectangle(img, pos, (end_x, end_y), bg_color, thickness)
cv2.putText(img, text, pos, font_face, scale, color, 1, cv2.LINE_AA)
In your code something like this should do:
if ret == True:
# draw the label into the frame
__draw_label(frame, 'Hello World', (20,20), (255,0,0))
# Display the resulting frame
cv2.imshow('Frame',frame)
Did you somehow do the drawing after you called imshow? I don't see any reason why videos should behave differently.
Related
I am getting this image as raw y16 format (pointing 3 fingers in front of sensor) after some processing on the actual data using 'opencv-python' and sensor calibration. I am getting the image as shadow only (as seen in image), I am trying to get it more visible like the normal raw grayscale image (IR image). Any suggestion/solution to improve the image quality to see it more better?
The actual frame data is in yuv2 format (16bit)
video capture
video set to Y16 (raw data), convert_rgb->0, prop_format->-1
frame read
some operation on frame data
-frame reshape to rows, cols*2
-to unsigned int (.astype(np.uint16)
-to big-endian format (bit shift)
-view as unsigned int (np.uint16)
frame_roi crop
applied medianBlur
frame_roi bit shift (to improve visiblity)
applied normalization
applied CLAHE
show image
import numpy as np
import cv2
cap = cv2.VideoCapture(0, cv2.CAP_MSMF)
cols, rows = 340, 240
cap.set(cv2.CAP_PROP_FRAME_WIDTH, cols)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, rows)
cap.set(cv2.CAP_PROP_FPS, 30)
cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter.fourcc('Y','1','6',' '))
cap.set(cv2.CAP_PROP_CONVERT_RGB, 0)
cap.set(cv2.CAP_PROP_FORMAT, -1)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
while(cap.isOpened()):
ret, frame = cap.read()
if not ret:
break
frame = frame.reshape(rows, cols*2)
frame = frame.astype(np.uint16)
frame = (frame[:, 0::2] << 8) + frame[:, 1::2]
frame = frame.view(np.uint16)
frame_roi = frame[:, 10:-10]
frame_roi = cv2.medianBlur(frame_roi, 3)
frame_roi = frame_roi << 3
normed = cv2.normalize(frame_roi, None, 0, 255, cv2.NORM_MINMAX,cv2.CV_8U)
cl1 = clahe.apply(normed)
cv2.imshow('preview',cl1)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
//norm.png
I have a video and want to know when a person enters and stays in a specific area in the video, then sets a time in the video (video time), when he enters, and when he leaves to use this times later for cutting the video.
Im only a little bit experienced in opencv but have currently no experience with tensorflow or keras.
This is for a video analysis.
I have tried some things like BackgroundSubtractorMOG, use another resolution, etc.
https://s18.directupload.net/images/190517/wym8r59b.png
https://s18.directupload.net/images/190517/pi52vgv7.png
def calc_accum_avg(frame, accumulated_weight):
global background
if background is None:
background = frame.copy().astype("float")
return None
cv2.accumulateWeighted(frame, background, accumulated_weight)
def segment(frame, threshold=25):
global background
diff = cv2.absdiff(background.astype("uint8"),frame)
_, thresholded = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(thresholded.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
if len(contours) == 0:
return None
else:
move_segment = max(contours, key = cv2.contourArea)
return (thresholded, move_segment)
def main():
video = cv2.VideoCapture("/home/felix/Schreibtisch/OpenCVPython/large_video.mp4")
video.set(3, 1920)
video.set(4, 1080)
length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
print(length)
num_frames = 0
fgbg = cv2.bgsegm.createBackgroundSubtractorMOG()
while True:
ret,frame = video.read()
fgmask = fgbg.apply(frame)
if frame is None:
return
frame_copy = fgmask.copy()
#frame2_copy =frame.copy()
roi_visualiser = frame[roi_visualiser_top:roi_visualiser_bottom,roi_visualiser_right:roi_visualiser_left]
roi_board = frame[roi_board_top:roi_board_bottom,roi_board_right:roi_board_left]
gray = cv2.cvtColor(roi_visualiser, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (9,9), 0)
#gray = cv2.cvtColor(roi_board, cv2.COLOR_BGR2GRAY)
#gray = cv2.GaussianBlur(gray, (9,9), 0)
if num_frames < 2:
calc_accum_avg(gray, accumulated_weight)
#calc_accum_avg(gray2, accumulated_weight)
if num_frames <= 1:
cv2.imshow("Finger Count", frame_copy)
else:
hand = segment(gray)
if hand is not None:
thresholded, move_segment = hand
cv2.drawContours(frame_copy, [move_segment + (roi_visualiser_right, roi_visualiser_top)], -1, (255,0,0), 1)
#cv2.drawContours(frame_copy2, [move_segment + (roi_board_right, roi_board_top)], -1, (255,0,0), 1)
fingers = count_moves(thresholded, move_segment)
if fingers > 0:
print("ja") #test funktioniert
else:
print("Nein")
cv2.putText(frame_copy, str(fingers), (70,45), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2) #no need
cv2.imshow("Thresholded", thresholded) #no need
cv2.rectangle(frame_copy, (roi_visualiser_left, roi_visualiser_top), (roi_visualiser_right, roi_visualiser_bottom), (255,0,0), 1)
cv2.rectangle(frame_copy, (roi_board_left, roi_board_top), (roi_board_right, roi_board_bottom), (255,0,0), 1)
num_frames += 1
cv2.imshow("Finger Count", frame_copy)
I get no error messages all runs fine, but i dont get the correct result i need.
[1]: https://i.stack.imgur.com/dQbQi.png
[2]: https://i.stack.imgur.com/MqOAc.png
Have you tried BackgroundSubtractorMOG2? It can distinguish shadows, you can use that to prevent false positives.
To make the processing more efficient, first create a subimage of the area where the person enters/leaves. Apply the backgroundsubtraction on the subimage. Also, if the frames are noisy, applying a blur before backgroundsubtraction can improve the result.
Check the resulting mask for white objects of significant size. If detected, store the frame number using video.get(CV_CAP_PROP_POS_FRAMES) in an array and stop recording frame numbers until the mask is fully black again.
I'm building a script to overlay bounding boxes onto my video via a CSV file. Each frame has n number of bounding boxes, and so I am just iterating over the bounding boxes in each frame and drawing a cv2.rectangle on the frame. As a result, I am writing to a frame many times, for all frames.
While my VideoWriter constructor takes in 23.97 FPS as a parameter, the resulting FPS is much lower. Is there an algorithm or a way in which I can set a proper FPS to compensate for the FPS drop after writing to the video?
Below is my code-snippet:
avg_fws = counter_written/float(total_frames-1)
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = video.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('data/bounding_videos/%s.avi' % video_file, fourcc, fps * avg_fws, (width,height))
counter = 1
print (counter_written)
while (video.isOpened()):
ret, frame = video.read()
if ret == True:
if len(frames_dict) != 0:
for i in frames_dict[counter].keys():
box_dim = frames_dict[counter][i]
x, y, w, h = box_dim
cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 255, 255), 3)
out.write(frame)
else:
out.write(frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
else:
break
counter += 1
video.release()
out.release()
cv2.destroyAllWindows()
The counters are just for me to keep track / access the frames, and avg_fws is the average frames written per second which basically is total_num_of_frames_written / total_num_of_frames_in_video.
The problem with your code is that, you are writing multiple frames in your for loop for each rectangle drawn. What you need to do is draw all rectangles at once on a frame and write it only for a single time. It can be done by moving out.write(frame) out of your for loop.
I am converting video into video-frames using the given code
Converting video into frames:
import cv2
import numpy as np
import os
# Playing video from file:
cap = cv2.VideoCapture('/home/administrator/Desktop/Projects/gait- recognition/video/p008-n05.avi')
frames = []
try:
if not os.path.exists('data'):
os.makedirs('data')
except OSError:
print ('Error: Creating directory of data')
currentFrame = 0
while(True):
# Capture frame-by-frame
ret, frame = cap.read()
# Saves image of the current frame in jpg file
name = './data/frame' + str(currentFrame) + '.jpg'
print ('Creating...' + name)
cv2.imwrite(name, frame)
frames.append(frame)
# To stop duplicate images
currentFrame += 1
if ret == False:
break
# When everything done, release the capture
cap.release()
imgs = np.array(frames)
which is giving me output as images in the form of a single multi-dimensional array. This is required here as
Create features from input frames in shape (TIME, HEIGHT, WIDTH, CHANNELS)
spatial_features = net_pose.feed_forward_features(imgs)
Gives error as: SETTING AN ARRAY ELEMENT WITH A SEQUENCE
How should I modify the opencv code to get the desired shape of the tensor images?
I've been developing a face detection app integrated with ROS to be used with DRONES (bebop parrot to be exact). The code itself can be found on google and what it does is, basically, shows every face that appear on screen. My problem is: I want the drone to follow my face (and only mine) but as i said before, the code can detect multiples faces at time.
Here's the code:
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
# some people can just use the haarcascade_frontalface_default.xml without specifying the path
test = face_cascade.load('../../../../opencv/data/haarcascades/haarcascade_frontalface_default.xml')
# start the video capture
video_capture = cv2.VideoCapture(0)
# while-loop to detect face on webcam until you press 'q'.
while not rospy.is_shutdown():
# Capture frame-by-frame
ret, frame = video_capture.read()
frame = imutils.resize(frame, width=600)
#convert the frame (of the webcam) to gray)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
mask = cv2.erode(gray, None, iterations=2)
mask = cv2.dilate(mask, None, iterations=2)
# detecting the faces
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30), flags=cv2.CASCADE_SCALE_IMAGE)
# Draw a rectangle around the faces
for (x, y, w, h) in faces:
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
x_imagem = x + (w/2)
y_imagem = y + (h/2)
cv2.circle(frame, (x+(w/2), y+(h/2)), 5, (0,0,255), -1)
#600 x 450;
if(x_imagem > 200 and x_imagem < 400):
rospy.loginfo("CENTRO")
elif(x_imagem < 200): #ROSTO PRA ESQUERDA, ENTAO VAI PARA DIREITA
rospy.loginfo("ROSTO NA ESQUERDA")
pub_face.publish("esq")
elif(x_imagem > 400): #ROSTO PRA DIREITA, ENTAO VAI PARA ESQUERDA
rospy.loginfo("ROSTO NA DIREITA")
pub_face.publish("dir")
# Display the resulting frame
cv2.imshow('Video', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
rospy.loginfo("FIM DO PROGRAMA DE DETECCAO DE ROSTOS")
break
# When everything is done, release the capture
video_capture.release()
cv2.destroyAllWindows()
As you can see, we have an array of FACES at:
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30), flags=cv2.CASCADE_SCALE_IMAGE)
The code will draw a rectangle where the face is, is there a way that I can make it show only the biggest rectangle (in that case, my face)?
Hope I could make it clear!
Is there a way that I can make it show only the biggest rectangle (in that case, my face)?
Iterate thought the list of 'faces' and calculate the area of each Region of Interest (ROI), and grab the largest area.