OpenCV append frame to existing video (.avi) file? - opencv

i have a small question. Is it possible to add (append) new frames to an existing .avi video file using OpenCV, without overwriting the whole file ? I am using OpenCV2.4.2 on Windows7, with QT.

If you want to use OpenCV, you have to read and write all the data content.
import cv2
import os
# this two lines are for loading the videos.
# in this case the video are named as: cut1.mp4, cut2.mp4, ..., cut15.mp4
# videofiles = [n for n in os.listdir('.') if n[0]=='c' and n[-4:]=='.mp4']
# videofiles = sorted(videofiles, key=lambda item: int( item.partition('.')[0][3:]))
videofiles = [n for n in os.listdir('.') if n[0]=='c' and n[-4:]=='.avi']
videofiles = sorted(videofiles, key=lambda item: int( item.partition('.')[0][3:]))
video_index = 0
cap = cv2.VideoCapture(videofiles[0])
# video resolution: 1624x1234 px
# out = cv2.VideoWriter("video.avi",
# cv2.cv.CV_FOURCC('F','M','P', '4'),
# 15, (1624, 1234), 1)
# fourcc = cv2.VideoWriter_fourcc(*'MP4V')
# out = cv2.VideoWriter('cutout.mp4', fourcc, 20, (640, 480))
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('cutout.avi', fourcc, 20.0, (640, 480))
while(cap.isOpened()):
ret, frame = cap.read()
if frame is None:
print ("end of video " + str(video_index) + " .. next one now")
video_index += 1
if video_index >= len(videofiles):
break
cap = cv2.VideoCapture(videofiles[ video_index ])
ret, frame = cap.read()
cv2.imshow('frame',frame)
out.write(frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
out.release()
cv2.destroyAllWindows()
print ("end.")

Have a look at the VideoWriter class.

Related

Getting a (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'

i am getting a (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor' while making a face detection using openCv. Here is my code:
import cv2
import numpy as np
# Load HAAR face classifier
face_classifier = cv2.CascadeClassifier('Haarcascades/haarcascade_frontalface_default.xml')
# Load functions
def face_extractor(img):
# Function detects faces and returns the cropped face
# If no face detected, it returns the input image
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
faces = face_classifier.detectMultiScale(gray, 1.3, 5)
if faces is ():
return None
# Crop all faces found
for (x,y,w,h) in faces:
cropped_face = img[y:y+h, x:x+w]
return cropped_face
# Initialize Webcam
cap = cv2.VideoCapture(0)
count = 0
# Collect 100 samples of your face from webcam input
while True:
ret, frame = cap.read()
if face_extractor(frame) is not None:
count += 1
face = cv2.resize(face_extractor(frame), (200, 200))
face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
# Save file in specified directory with unique name
file_name_path = './faces/user/' + str(count) + '.jpg'
cv2.imwrite(file_name_path, face)
# Put count on images and display live count
cv2.putText(face, str(count), (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (0,255,0), 2)
cv2.imshow('Face Cropper', face)
else:
print("Face not found")
pass
if cv2.waitKey(1) == 13 or count == 100: #13 is the Enter Key
break
cap.release()
cv2.destroyAllWindows()
print("Collecting Samples Complete")
and here is the output which is an error
error: OpenCV(4.2.0) C:\projects\opencv-python\opencv\modules\imgproc\src\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'
I tried converting / to \ but that resulted in another SyntaxError: EOL while scanning string literal.
Please help me solving this issue i have also tr
You need to specify the path to your haarcascade_frontalface_default.xml file.
Put double \ in your path as i did
I modified your code and hope it helps
`#importing library
import cv2
import numpy as np
# Load HAAR face classifier
face_classifier =
cv2.CascadeClassifier
('D:\\OpenCV\\opencv\\build\\etc\\Haarcascades\\haarcascade_frontalface_default.xml')
# crop the image into 20 by 20 px and change it to B&W
def face_extractor(img):
# Function detects faces and returns the cropped face
# If no face detected, it returns the input image
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
faces = face_classifier.detectMultiScale(gray, 1.3, 5)
if faces is ():
return None
# Crop all faces found
for (x,y,w,h) in faces:
cropped_face = img[y:y+h, x:x+w]
return cropped_face
#Asks for the total number of users
num=int(input("Enter no of user you want to add "))
user=1
# Initialize Webcam
cap = cv2.VideoCapture(0)
#count the total number of faces
count = 0
print("Start Capturing the input Data Set ")
d=input('Enter Face name')
# Collect 100 samples of your face from webcam input
while True:
ret, frame = cap.read()
print(type(frame))
#condition to check if face is in the frame or not
if face_extractor(frame) is not None:
count += 1
face = cv2.resize(face_extractor(frame), (200, 200))
face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
# Save file in specified directory with unique name
file_name_path = './faces/user/' +str(d)+'-' +str(count) + '.jpg'
cv2.imwrite(file_name_path, face)
# Put count on images and display live count
cv2.putText(face, str(count), (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (0,255,0), 2)
cv2.imshow('Face Cropper', face)
else:
print("Face not found")
pass
#checks if ech user 100 images is captured
if(count%100)==0 and count!= num*100 and count!=0:
print("Place the new user Signature")
cv2.waitKey()
#checks if total images are captured
if cv2.waitKey(1) == 13 or count == num*100: #13 is the Enter Key
break
#closes all windows
cap.release()
cv2.destroyAllWindows()
print("Collecting Samples Complete")`

detect object in image with almost similar background

I have to detect mice in a cage, input images look like following:
at the moment I am using cv.createBackgroundSubtractorMOG2() in the video stream to find the area containing the mice and afterwards Canny Edge detector to extract the contours of the mice.
However, this is not working that well.. the more the mice is moving the better, but I guess there could be a better approach to detect the mice.
Does anyne have a different idea how to detect the mice?
thanks in advance
After subtracting the background, you could use a threshold to remove noise. Try saving the subtracted image and seeing what it looks like. Here's a script I use to tweak filter parameters (run it with the subtracted image):
import cv2
import numpy as np
screenshot_path = 'screenshot.bmp'
def nothing(x):
pass
# Creating a window for later use
cv2.namedWindow('mask', cv2.WINDOW_NORMAL)
cv2.namedWindow('trackbar', cv2.WINDOW_NORMAL)
# Starting with 100's to prevent error while masking
h, s, v = 100, 100, 100
# Creating track bar
cv2.createTrackbar('h', 'trackbar', 0, 180, nothing)
cv2.createTrackbar('s', 'trackbar', 0, 255, nothing)
cv2.createTrackbar('v', 'trackbar', 164, 255, nothing)
cv2.createTrackbar('h2', 'trackbar', 120, 180, nothing)
cv2.createTrackbar('s2', 'trackbar', 12, 255, nothing)
cv2.createTrackbar('v2', 'trackbar', 253, 255, nothing)
frame = cv2.imread(screenshot_path)
# converting to HSV
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
while (1):
# get info from track bar and appy to result
h = cv2.getTrackbarPos('h', 'trackbar')
s = cv2.getTrackbarPos('s', 'trackbar')
v = cv2.getTrackbarPos('v', 'trackbar')
h2 = cv2.getTrackbarPos('h2', 'trackbar')
s2 = cv2.getTrackbarPos('s2', 'trackbar')
v2 = cv2.getTrackbarPos('v2', 'trackbar')
# Normal masking algorithm
lower = np.array([h, s, v])
upper = np.array([h2, s2, v2])
mask = cv2.inRange(hsv, lower, upper)
result = cv2.bitwise_and(frame,frame,mask = mask)
cv2.imshow('result', result)
print(h, s, v, h2, s2, v2)
k = cv2.waitKey(5) & 0xFF
if k == 27:
break
cv2.destroyAllWindows()
If that doesn't work, I would use an object tracker API like CSRT
# python opencv_object_tracking.py
# python opencv_object_tracking.py --video dashcam_boston.mp4 --tracker csrt
# import the necessary packages
from imutils.video import VideoStream
from imutils.video import FPS
import argparse
import imutils
import time
import cv2
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-v", "--video", type=str,
help="path to input video file")
ap.add_argument("-t", "--tracker", type=str, default="kcf",
help="OpenCV object tracker type")
args = vars(ap.parse_args())
# extract the OpenCV version info
(major, minor) = cv2.__version__.split(".")[:2]
# if we are using OpenCV 3.2 OR BEFORE, we can use a special factory
# function to create our object tracker
if int(major) == 3 and int(minor) < 3:
tracker = cv2.Tracker_create(args["tracker"].upper())
# otherwise, for OpenCV 3.3 OR NEWER, we need to explicity call the
# approrpiate object tracker constructor:
else:
# initialize a dictionary that maps strings to their corresponding
# OpenCV object tracker implementations
OPENCV_OBJECT_TRACKERS = {
"csrt": cv2.TrackerCSRT_create,
"kcf": cv2.TrackerKCF_create,
"boosting": cv2.TrackerBoosting_create,
"mil": cv2.TrackerMIL_create,
"tld": cv2.TrackerTLD_create,
"medianflow": cv2.TrackerMedianFlow_create,
"mosse": cv2.TrackerMOSSE_create
}
# grab the appropriate object tracker using our dictionary of
# OpenCV object tracker objects
tracker = OPENCV_OBJECT_TRACKERS[args["tracker"]]()
# initialize the bounding box coordinates of the object we are going
# to track
initBB = None
# if a video path was not supplied, grab the reference to the web cam
if not args.get("video", False):
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
time.sleep(1.0)
# otherwise, grab a reference to the video file
else:
vs = cv2.VideoCapture(args["video"])
# initialize the FPS throughput estimator
fps = None
# loop over frames from the video stream
while True:
# grab the current frame, then handle if we are using a
# VideoStream or VideoCapture object
frame = vs.read()
frame = frame[1] if args.get("video", False) else frame
# check to see if we have reached the end of the stream
if frame is None:
break
# resize the frame (so we can process it faster) and grab the
# frame dimensions
frame = imutils.resize(frame, width=500)
(H, W) = frame.shape[:2]
# check to see if we are currently tracking an object
if initBB is not None:
# grab the new bounding box coordinates of the object
(success, box) = tracker.update(frame)
# check to see if the tracking was a success
if success:
(x, y, w, h) = [int(v) for v in box]
cv2.rectangle(frame, (x, y), (x + w, y + h),
(0, 255, 0), 2)
# update the FPS counter
fps.update()
fps.stop()
# initialize the set of information we'll be displaying on
# the frame
info = [
("Tracker", args["tracker"]),
("Success", "Yes" if success else "No"),
("FPS", "{:.2f}".format(fps.fps())),
]
# loop over the info tuples and draw them on our frame
for (i, (k, v)) in enumerate(info):
text = "{}: {}".format(k, v)
cv2.putText(frame, text, (10, H - ((i * 20) + 20)),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
# show the output frame
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the 's' key is selected, we are going to "select" a bounding
# box to track
if key == ord("s"):
# select the bounding box of the object we want to track (make
# sure you press ENTER or SPACE after selecting the ROI)
initBB = cv2.selectROI("Frame", frame, fromCenter=False,
showCrosshair=True)
# start OpenCV object tracker using the supplied bounding box
# coordinates, then start the FPS throughput estimator as well
tracker.init(frame, initBB)
fps = FPS().start()
# if the `q` key was pressed, break from the loop
elif key == ord("q"):
break
# if we are using a webcam, release the pointer
if not args.get("video", False):
vs.stop()
# otherwise, release the file pointer
else:
vs.release()
# close all windows
cv2.destroyAllWindows()

Putting my pictures over black ground not working OpenCV

So this is what I have now:
As you can see, the neural style transfer thing is only going over the area the detection box is detecting. I am trying to put the transformed cool picture (which will always be less than 1200 x 900 because the detection box is 1200 x 900) in a black picture with dimensions 1200 x 900 so that I can save the video file.
My box is measured with: startX, endX, startY, and endY. The way I am trying to put the cool picture over the background right now is: black_background[startY:endY, startX:endX] = output, where output also has the size (endY - startY, endX - startX).
My way is not working, any insights? And also, for some reason, when I do "*black_background[startY:endY, startX:endX] = output", there is often a few pixel off broadcasting issue, like can't add (859, 100, 3) with (860, 100, 3). Is there a non-buggy solution to the black background issue? I feel like manually doing *black_background[startY:endY, startX:endX] = output is weird.
Here's my full code, I marked the if loop that actually matters with -----, thank you!
from __future__ import print_function
from imutils.video import VideoStream
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import time
import cv2
from imutils import paths
import itertools
# We need to input model prototxt
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--prototxt", required=True,
help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model", required=True,
help="path to Caffe pre-trained model")
ap.add_argument("-c", "--confidence", type=float, default=0.2,
help="minimum probability to filter weak detections")
ap.add_argument("-nm", "--neuralmodels", required=True,
help="path to directory containing neural style transfer models")
args = vars(ap.parse_args())
# we should identify the class first, and then transfer that block
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
"sofa", "train", "tvmonitor"]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
# load our serialized model from disk
print("[INFO] loading model...")
DetectionNet = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
# grab the paths to all neural style transfer models in our 'models'
# directory, provided all models end with the '.t7' file extension
modelPaths = paths.list_files(args["neuralmodels"], validExts=(".t7",))
modelPaths = sorted(list(modelPaths))
# generate unique IDs for each of the model paths, then combine the
# two lists together
models = list(zip(range(0, len(modelPaths)), (modelPaths)))
# use the cycle function of itertools that can loop over all model
# paths, and then when the end is reached, restart again
modelIter = itertools.cycle(models)
(modelID, modelPath) = next(modelIter)
NTSnet = cv2.dnn.readNetFromTorch(modelPath)
# initialize the video stream, allow the cammera sensor to warmup,
# and initialize the FPS counter
print("[INFO] starting video stream...")
vs = VideoStream(src=1).start()
fps = FPS().start()
fourcc = cv2.VideoWriter_fourcc(*'XVID')
output_video = cv2.VideoWriter('output.avi', fourcc, 20.0, (1200, 900))
while True:
# grab the frame from the threaded video stream and resize it
# to have a maximum width of 400 pixels
frame = vs.read()
frame = imutils.resize(frame, width=1200, height=900)
# grab the frame dimensions and convert it to a blob
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)),
0.007843, (300, 300), 127.5)
# pass the blob through the network and obtain the detections and
# predictions
DetectionNet.setInput(blob)
detections = DetectionNet.forward()
# loop over the detections
for i in np.arange(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with
# the prediction
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the `confidence` is
# greater than the minimum confidence
if confidence > args["confidence"]:
# extract the index of the class label from the
# `detections`, then compute the (x, y)-coordinates of
# the bounding box for the object
idx = int(detections[0, 0, i, 1])
if(CLASSES[idx] == "person" and confidence > .90):
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# draw the prediction on the frame
label = "{}: {:.2f}%".format("PERSON",
confidence * 100)
cv2.rectangle(frame, (startX, startY), (endX, endY),
COLORS[idx], 2)
y = startY - 15 if startY - 15 > 15 else startY + 15
cv2.putText(frame, label, (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
# print box area in background
newimage = frame[startY:endY, startX:endX]
(h, w) = newimage.shape[:2]
#print(h,w)
#print(startX, endX, startY, endY)
noise_picture = cv2.imread('white_noise.jpg')
black_background = cv2.imread('black.png')
-------------------------------------------------------------------
*if(h > 0 and w > 0):
# to_be_transformed is the detection box area
# resize that area for MobileNetSSD
#to_be_transformed = imutils.resize(to_be_transformed, height=450)
(height_orig, width_orig) = noise_picture.shape[:2]
noise_picture[startY:endY, startX:endX] = newimage
noise_picture = imutils.resize(noise_picture, height=450)
# run it through the network, output is the image
(h, w) = noise_picture.shape[:2]
# print(h, w)
blob2 = cv2.dnn.blobFromImage(noise_picture, 1.0, (w, h), (103.939, 116.779, 123.680), swapRB=False, crop=False)
NTSnet.setInput(blob2)
output = NTSnet.forward()
output = output.reshape((3, output.shape[2], output.shape[3]))
output[0] += 103.939
output[1] += 116.779
output[2] += 123.680
output /= 255.0
output = output.transpose(1, 2, 0)
# set the 600 x 450 back to the original size
black_background = imutils.resize(black_background, width=1200, height = 900)
output = imutils.resize(output, width=1200)
#black_background[startY:endY, startX:endX] = output[startY:endY, startX:endX]
output = output[startY:endY, startX:endX]
(h2, w2) = output.shape[:2]
if(h2>0 and w2>0 ):
cv2.imshow('hmm', output)
black_background[startY:endY, startX:endX] = output
cv2.imshow("uh", black_background)
#output_video.write(black_background)
#output_video.write(background)*
---------------------------------------------------------------
# show the output frame, which is the whole thing
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
# update the FPS counter
fps.update()
# stop the timer and display FPS information
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()
Oh man, second time I made this mistake. You have to do * 255 when you are adding your output picture to your background. This is really weird, it seems like imread works if you only put numbers in [0, 1], but once you have a value that goes over 1, it treats the range as [0, 255], don't take my words on it though.

Saving multiple videos at the same time with opencv

For my project I have multiple cameras and I need to save the video feed to multiple output files at the same time. Here's my code:
import cv2
def one_cam_is_open(list_of_cams):
for cam in list_of_cams:
if cam.isOpened():
return True
return False
cam_list = []
# Capture video from camera
for i in range(0, 2):
cap = cv2.VideoCapture(i)
if cap!=-1:
cam_list.append(cv2.VideoCapture(i))
# the width and height of all cameras will be 1900x1080
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) + 0.5)
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) + 0.5)
dim = (1900, 1080)
# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Be sure to use the lower case
video_writers = {}
for i, cam in enumerate(cam_list):
video_writers[cam] = cv2.VideoWriter("output_" + str(i) + ".mp4", fourcc, 20.0, dim)
while one_cam_is_open(cam_list):
for (cam, out) in video_writers:#TypeError: cannot unpack non-iterable cv2.VideoCapture object
ret, frame = cam.read()
if ret == True:
frame = cv2.flip(frame, 0)
# write the flipped frame
out.write(frame)
cv2.imshow('frame', frame)
if (cv2.waitKey(1) & 0xFF) == ord('q'): # Hit `q` to exit
break
else:
break
# Release everything if job is finished
for (cam, out) in video_writers:
out.release()
cam.release()
cv2.destroyAllWindows()
As noted in the code, I get the error TypeError: cannot unpack non-iterable cv2.VideoCapture object when I try to unpack the references to the VideoWriter obejects for each camera.
Is there a way to write to multiple video files using VideoWriter?

Cropping A Detected Object On A Video With Tensorflow Api And Opencv

-Python 3.6
-Tensorflow 1.11 with GPU support.
-Opencv 3.4.2
I am working on Tensorflow Api, and I have already trained my dataset. It works fine. But I have to crop the detected object and make some preprocess on it. It seems easy, because Tensroflow draws the detected object with green box as well. When I try to find the coordinates of the object it gives me numbers of range 0 to 1. When I put the coordinates on Opencv Crop Image I have to multply the image with pictures height and width, but it works wrong.
Tensorflow.org says that I can use "tf.image.crop_and_resize" function. But I can't run it on my own code.
This is my run_inference_for_single_image function and returns output_dict:
def run_inference_for_single_image(image, graph):
with graph.as_default():
#with tf.Session() as sess:
# Get handles to input and output tensors
ops = tf.get_default_graph().get_operations()
all_tensor_names = {output.name for op in ops for output in op.outputs}
tensor_dict = {}
for key in [
'num_detections', 'detection_boxes', 'detection_scores',
'detection_classes', 'detection_masks'
]:
tensor_name = key + ':0'
if tensor_name in all_tensor_names:
tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
tensor_name)
if 'detection_masks' in tensor_dict:
# The following processing is only for single image
detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
# Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image.shape[0], image.shape[1])
detection_masks_reframed = tf.cast(
tf.greater(detection_masks_reframed, 0.5), tf.uint8)
# Follow the convention by adding back the batch dimension
tensor_dict['detection_masks'] = tf.expand_dims(
detection_masks_reframed, 0)
image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
# Run inference
output_dict = sess.run(tensor_dict,
feed_dict={image_tensor: np.expand_dims(image, 0)})
# all outputs are float32 numpy arrays, so convert types as appropriate
output_dict['num_detections'] = int(output_dict['num_detections'][0])
output_dict['detection_classes'] = output_dict[
'detection_classes'][0].astype(np.uint8)
output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
output_dict['detection_scores'] = output_dict['detection_scores'][0]
if 'detection_masks' in output_dict:
output_dict['detection_masks'] = output_dict['detection_masks'][0]
return output_dict
This is my video capture funtion. It Crops the wrong coordinates.
video = cv2.VideoCapture(0)
ret = video.set(3,1080)
ret = video.set(4,720)
while(True):
# Acquire frame and expand frame dimensions to have shape: [1, None, None, 3]
# i.e. a single-column array, where each item in the column has the pixel RGB value
ret,frame = video.read()
frame = cv2.flip(frame, 1)
frame_expanded = np.expand_dims(frame, axis=0)
# Perform the actual detection by running the model with the image as input
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: frame_expanded})
vis_util.visualize_boxes_and_labels_on_image_array(
frame,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8,
min_score_thresh=0.50)
# Draw the results of the detection (aka 'visulaize the results')
output_dict = run_inference_for_single_image(frame, detection_graph)
max_boxes_to_draw = output_dict['detection_boxes'].shape[0]
for i in range(min(max_boxes_to_draw, output_dict['detection_boxes'].shape[0])):
if output_dict['detection_scores'][i] > 0.95:
if output_dict['detection_classes'][i] in category_index.keys():
class_name = category_index[output_dict['detection_classes'][i]]['name']
print(output_dict['detection_boxes'][i])
crop_img = frame[int((output_dict['detection_boxes'][i][0]) * 720): int(
(output_dict['detection_boxes'][i][2]) * 720),
int((output_dict['detection_boxes'][i][1]) * 1080):int(
(output_dict['detection_boxes'][i][3]) * 1080)]
cv2.imshow("asdasd", crop_img)
print(class_name)
cv2.imshow('Object detector', frame)
# Press 'q' to quit
if cv2.waitKey(1) == ord('q'):
break
It might be about output_dict.
class_name = category_index[output_dict['detection_classes'][i]]['name'] => This codes give me the name of the class. It works well.
I found an answer for my question. This is the solution code:
while(True):
# Acquire frame and expand frame dimensions to have shape: [1, None, None, 3]
# i.e. a single-column array, where each item in the column has the pixel RGB value
ret,frame = video.read()
frame = cv2.flip(frame, 1)
frame_expanded = np.expand_dims(frame, axis=0)
# Perform the actual detection by running the model with the image as input
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: frame_expanded})
vis_util.visualize_boxes_and_labels_on_image_array(
frame,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8,
min_score_thresh=0.50)
# Draw the results of the detection (aka 'visulaize the results')
output_dict = run_inference_for_single_image(frame, detection_graph)
max_boxes_to_draw = output_dict['detection_boxes'].shape[0]
for i in range(min(max_boxes_to_draw, output_dict['detection_boxes'].shape[0])):
if output_dict['detection_scores'][i] > 0.80:
if output_dict['detection_classes'][i] in category_index.keys():
class_name = category_index[output_dict['detection_classes'][i]]['name']
#print(output_dict['detection_boxes'][i])
ymin = boxes[0, i, 0]
xmin = boxes[0, i, 1]
ymax = boxes[0, i, 2]
xmax = boxes[0, i, 3]
im_width = 1280
im_height = 720
(xminn, xmaxx, yminn, ymaxx) = (xmin * im_width, xmax * im_width, ymin * im_height, ymax * im_height)
crop_img=tf.image.crop_to_bounding_box(frame,int(yminn), int(xminn), int(ymaxx-yminn), int(xmaxx-xminn))
crop_img=frame[int(yminn):int(ymaxx),int(xminn):int(xmaxx)]
# print(session.run(file))
"""crop_img = frame[int((output_dict['detection_boxes'][i][0]) * 720): int(
(output_dict['detection_boxes'][i][2]) * 720),
int((output_dict['detection_boxes'][i][1]) * 1080):int(
(output_dict['detection_boxes'][i][3]) * 1080)]"""
cv2.imshow("asdsda",crop_img)
#print(class_name)
cv2.imshow('Object detector', frame)
# Press 'q' to quit
if cv2.waitKey(1) == ord('q'):
break
im_width = 1280 means nothing for me but it works on my project, but It works. Thanks for helps.

Resources