I have to detect mice in a cage, input images look like following:
at the moment I am using cv.createBackgroundSubtractorMOG2() in the video stream to find the area containing the mice and afterwards Canny Edge detector to extract the contours of the mice.
However, this is not working that well.. the more the mice is moving the better, but I guess there could be a better approach to detect the mice.
Does anyne have a different idea how to detect the mice?
thanks in advance
After subtracting the background, you could use a threshold to remove noise. Try saving the subtracted image and seeing what it looks like. Here's a script I use to tweak filter parameters (run it with the subtracted image):
import cv2
import numpy as np
screenshot_path = 'screenshot.bmp'
def nothing(x):
pass
# Creating a window for later use
cv2.namedWindow('mask', cv2.WINDOW_NORMAL)
cv2.namedWindow('trackbar', cv2.WINDOW_NORMAL)
# Starting with 100's to prevent error while masking
h, s, v = 100, 100, 100
# Creating track bar
cv2.createTrackbar('h', 'trackbar', 0, 180, nothing)
cv2.createTrackbar('s', 'trackbar', 0, 255, nothing)
cv2.createTrackbar('v', 'trackbar', 164, 255, nothing)
cv2.createTrackbar('h2', 'trackbar', 120, 180, nothing)
cv2.createTrackbar('s2', 'trackbar', 12, 255, nothing)
cv2.createTrackbar('v2', 'trackbar', 253, 255, nothing)
frame = cv2.imread(screenshot_path)
# converting to HSV
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
while (1):
# get info from track bar and appy to result
h = cv2.getTrackbarPos('h', 'trackbar')
s = cv2.getTrackbarPos('s', 'trackbar')
v = cv2.getTrackbarPos('v', 'trackbar')
h2 = cv2.getTrackbarPos('h2', 'trackbar')
s2 = cv2.getTrackbarPos('s2', 'trackbar')
v2 = cv2.getTrackbarPos('v2', 'trackbar')
# Normal masking algorithm
lower = np.array([h, s, v])
upper = np.array([h2, s2, v2])
mask = cv2.inRange(hsv, lower, upper)
result = cv2.bitwise_and(frame,frame,mask = mask)
cv2.imshow('result', result)
print(h, s, v, h2, s2, v2)
k = cv2.waitKey(5) & 0xFF
if k == 27:
break
cv2.destroyAllWindows()
If that doesn't work, I would use an object tracker API like CSRT
# python opencv_object_tracking.py
# python opencv_object_tracking.py --video dashcam_boston.mp4 --tracker csrt
# import the necessary packages
from imutils.video import VideoStream
from imutils.video import FPS
import argparse
import imutils
import time
import cv2
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-v", "--video", type=str,
help="path to input video file")
ap.add_argument("-t", "--tracker", type=str, default="kcf",
help="OpenCV object tracker type")
args = vars(ap.parse_args())
# extract the OpenCV version info
(major, minor) = cv2.__version__.split(".")[:2]
# if we are using OpenCV 3.2 OR BEFORE, we can use a special factory
# function to create our object tracker
if int(major) == 3 and int(minor) < 3:
tracker = cv2.Tracker_create(args["tracker"].upper())
# otherwise, for OpenCV 3.3 OR NEWER, we need to explicity call the
# approrpiate object tracker constructor:
else:
# initialize a dictionary that maps strings to their corresponding
# OpenCV object tracker implementations
OPENCV_OBJECT_TRACKERS = {
"csrt": cv2.TrackerCSRT_create,
"kcf": cv2.TrackerKCF_create,
"boosting": cv2.TrackerBoosting_create,
"mil": cv2.TrackerMIL_create,
"tld": cv2.TrackerTLD_create,
"medianflow": cv2.TrackerMedianFlow_create,
"mosse": cv2.TrackerMOSSE_create
}
# grab the appropriate object tracker using our dictionary of
# OpenCV object tracker objects
tracker = OPENCV_OBJECT_TRACKERS[args["tracker"]]()
# initialize the bounding box coordinates of the object we are going
# to track
initBB = None
# if a video path was not supplied, grab the reference to the web cam
if not args.get("video", False):
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
time.sleep(1.0)
# otherwise, grab a reference to the video file
else:
vs = cv2.VideoCapture(args["video"])
# initialize the FPS throughput estimator
fps = None
# loop over frames from the video stream
while True:
# grab the current frame, then handle if we are using a
# VideoStream or VideoCapture object
frame = vs.read()
frame = frame[1] if args.get("video", False) else frame
# check to see if we have reached the end of the stream
if frame is None:
break
# resize the frame (so we can process it faster) and grab the
# frame dimensions
frame = imutils.resize(frame, width=500)
(H, W) = frame.shape[:2]
# check to see if we are currently tracking an object
if initBB is not None:
# grab the new bounding box coordinates of the object
(success, box) = tracker.update(frame)
# check to see if the tracking was a success
if success:
(x, y, w, h) = [int(v) for v in box]
cv2.rectangle(frame, (x, y), (x + w, y + h),
(0, 255, 0), 2)
# update the FPS counter
fps.update()
fps.stop()
# initialize the set of information we'll be displaying on
# the frame
info = [
("Tracker", args["tracker"]),
("Success", "Yes" if success else "No"),
("FPS", "{:.2f}".format(fps.fps())),
]
# loop over the info tuples and draw them on our frame
for (i, (k, v)) in enumerate(info):
text = "{}: {}".format(k, v)
cv2.putText(frame, text, (10, H - ((i * 20) + 20)),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
# show the output frame
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the 's' key is selected, we are going to "select" a bounding
# box to track
if key == ord("s"):
# select the bounding box of the object we want to track (make
# sure you press ENTER or SPACE after selecting the ROI)
initBB = cv2.selectROI("Frame", frame, fromCenter=False,
showCrosshair=True)
# start OpenCV object tracker using the supplied bounding box
# coordinates, then start the FPS throughput estimator as well
tracker.init(frame, initBB)
fps = FPS().start()
# if the `q` key was pressed, break from the loop
elif key == ord("q"):
break
# if we are using a webcam, release the pointer
if not args.get("video", False):
vs.stop()
# otherwise, release the file pointer
else:
vs.release()
# close all windows
cv2.destroyAllWindows()
Related
TL;DR I'm using:
adaptive thresholding
segmenting by keys (width/height ratio) - see green boxes in image result
psm 10 to treat each key as a character
but it fails to recognize some keys, falsely identifies others or identifies 2 for 1 char (see the L character in the image result, it's an L and P), etc.
Note: I cropped the image and re-ran the results to get it to fit on this site, but before cropping it did slightly better (recognized more keys, fewer false positives, etc).
I just want it to recognize the alphabet keys. Ultimately I will want it to work for realtime video.
config:
'-l eng --oem 1 --psm 10 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZ"'
I've tried scaling the image differently, scaling the individual key segments, using opening/closing/etc but it doesn't recognize all the keys.
original image
image result
Update: new results if I make the image straighter (bird's eye) and remove the whitelisting, it manages to detect all for the most part (although it thinks the O is a 0 and the I is a |, which is understandable). Why is this and how could I make this adaptive enough for a dynamic video when it is so sensitive to these conditions?
Code:
import pytesseract
import numpy as np
try:
from PIL import Image
except ImportError:
import Image
import cv2
from tqdm import tqdm
from collections import defaultdict
def get_missing_chars(dict):
capital_alphabet = [chr(ascii) for ascii in range(65, 91)]
return [let for let in capital_alphabet if let not in dict]
def draw_box_and_char(img, contour_dims, c, box_col, text_col):
x, y, w, h = contour_dims
top_left = (x, y)
bot_right = (x + w, y+h)
font_offset = 3
text_pos = (x+h//2+12, y+h-font_offset)
img_copy = img.copy()
cv2.rectangle(img_copy, top_left, bot_right, box_col, 2)
cv2.putText(img_copy, c, text_pos, cv2.FONT_HERSHEY_SIMPLEX, fontScale=.5, color=text_col, thickness=1, lineType=cv2.LINE_AA)
return img_copy
def detect_keys(img):
scaling = .25
img = cv2.resize(img, None, fx=scaling, fy=scaling, interpolation=cv2.INTER_AREA)
print("img shape", img.shape)
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ratio_min = 0.7
area_min = 1000
nbrhood_size = 1001
bias = 2
# adapt to different lighting
bin_img = cv2.adaptiveThreshold(gray_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv2.THRESH_BINARY_INV, nbrhood_size, bias)
items = cv2.findContours(bin_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = items[0] if len(items) == 2 else items[1]
key_contours = []
for c in contours:
x, y, w, h = cv2.boundingRect(c)
ratio = h/w
area = cv2.contourArea(c)
# square-like ratio, try to get character
if ratio > ratio_min and area > area_min:
key_contours.append(c)
detected = defaultdict(int)
n_kept = 0
img_copy = cv2.cvtColor(bin_img, cv2.COLOR_GRAY2RGB)
let_to_contour = {}
n_contours = len(key_contours)
# offset to get smaller square within the key segment for easier char recognition
offset = 10
show_each_char = False
for _, c in tqdm(enumerate(key_contours), total=n_contours):
x, y, w, h = cv2.boundingRect(c)
ratio = h/w
area = cv2.contourArea(c)
base = np.zeros(bin_img.shape, dtype=np.uint8)
base.fill(255)
n_kept += 1
new_y = y+offset
new_x = x+offset
new_h = h-2*offset
new_w = w-2*offset
base[new_y:new_y+new_h, new_x:new_x+new_w] = bin_img[new_y:new_y+new_h, new_x:new_x+new_w]
segment = cv2.bitwise_not(base)
# try scaling up individual keys
# scaling = 2
# segment = cv2.resize(segment, None, fx=scaling, fy=scaling, interpolation=cv2.INTER_CUBIC)
# psm 10: treats the segment as a single character
custom_config = r'-l eng --oem 1 --psm 10 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZ"'
d = pytesseract.image_to_data(segment, config=custom_config, output_type='dict')
conf = d['conf']
c = d['text'][-1]
if c:
# sometimes recognizes multiple keys even though there is only 1
for sub_c in c:
# save character and contour to draw on image and show bounds/detection
if sub_c not in let_to_contour or (sub_c in let_to_contour and conf > let_to_contour[sub_c]['conf']):
let_to_contour[sub_c] = {'conf': conf, 'cont': (new_x, new_y, new_w, new_h)}
else:
c = "?"
text_col = (0, 0, 255)
if show_each_char:
contour_dims = (new_x, new_y, new_w, new_h)
box_col = (0, 255, 0)
text_col = (0, 0, 0)
segment_with_boxes = draw_box_and_char(segment, contour_dims, c, box_col, text_col)
cv2.imshow('segment', segment_with_boxes)
cv2.waitKey(0)
cv2.destroyAllWindows()
# draw boxes around recognized keys
for c, data in let_to_contour.items():
box_col = (0, 255, 0)
text_col = (0, 0, 0)
img_copy = draw_box_and_char(img_copy, data['cont'], c, box_col, text_col)
detected = {k: 1 for k in let_to_contour}
for det in let_to_contour:
print(det, let_to_contour[det])
print("total detected: ", let_to_contour.keys())
missing = get_missing_chars(detected)
print(f"n_missing: {len(missing)}")
print(f"chars missing: {missing}")
return img_copy
if __name__ == "__main__":
img_file = "keyboard.jpg"
img = cv2.imread(img_file)
img_with_detected_keys = detect_keys(img)
cv2.imshow("detected", img_with_detected_keys)
cv2.waitKey(0)
cv2.destroyAllWindows()
I have this old code that is used to run fine in Python 2.7 a while ago. I just updated the code to run in Python 3.8, but when I try to execute it code in Python 3.8 and OpenCV 3.4 I get a resize error and a warning (below)!
Here is the link to the two tif images that are required to run this code.
It's worth noting that both tif images are in the same folder as the Python code
import cv2
import matplotlib.pyplot as plt
import numpy as np
## Code for C_preferred Mask and C_images##
## There are three outputs to this code:
#"Block_order_C.PNG"
#"Out_img.PNG"
#"Output_C.txt"
## Change the image name here
filename_image = '2.tif'
filename_mask = '1.tif'
## OpenCV verison Checking
#print 'OpenCV version used', cv2.__version__
filename = open("Output_C.txt","w")
filename.write("Processing Image : " + str(filename_image) + '\n\n')
## Function to sort the contours : Parameters that you can tune : tolerance_factor and size 0f the image.Here, I have used a fix size of
## (800,800)
def get_contour_precedence(contour, cols):
tolerance_factor = 10
origin = cv2.boundingRect(contour)
return ((origin[1] // tolerance_factor) * tolerance_factor) * cols + origin[0]
## Loading the colored mask, resizing it to (800,800) and converting it from RGB to HSV space, so that the color values are emphasized
p_mask_c = cv2.cvtColor(cv2.resize(cv2.imread(filename_mask),(800,800)),cv2.COLOR_RGB2HSV);
# Loading the original Image
b_image_1 = cv2.resize(cv2.imread(filename_image),(800,800));
cv2.imshow("c_mask_preferred",p_mask_c)
cv2.waitKey();
# convert the target color to HSV, As our target mask portion to be considered is green. So I have chosen target color to be green
b = 0;
g = 255;
r = 0;
# Converting target color to HSV space
target_color = np.uint8([[[b, g, r]]])
target_color_hsv = cv2.cvtColor(target_color, cv2.COLOR_BGR2HSV)
# boundaries for Hue define the proper color boundaries, saturation and values can vary a lot
target_color_h = target_color_hsv[0,0,0]
tolerance = 20
lower_hsv = np.array([max(0, target_color_h - tolerance), 10, 10])
upper_hsv = np.array([min(179, target_color_h + tolerance), 250, 250])
# apply threshold on hsv image
mask = cv2.inRange(p_mask_c, lower_hsv, upper_hsv)
cv2.imshow("mask",mask)
cv2.waitKey()
# Eroding the binary mask, such that every white portion (grids) are seperated from each other, to avoid overlapping and mixing of
# adjacent grids
b_mask = mask;
kernel = np.ones((5,5))
#kernel = cv2.getStructuringElement(cv2.MORPH_CROSS,(3,3))
sharp = cv2.erode(b_mask,kernel, iterations=2)
# Finding all the grids (from binary image)
contours, hierarchy = cv2.findContours(sharp,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
print (' Number of contours', len(contours))
# Sorting contours
contours.sort(key=lambda x:get_contour_precedence(x, np.shape(b_mask)[0]))
#cv2.drawContours(b_image_1, contours, -1, (0,255,0), 1)
# Label variable for each grid/panel
label = 1;
b_image = b_image_1.copy();
temp =np.zeros(np.shape(b_image_1),np.uint8)
print (' size of temp',np.shape(temp), np.shape(b_image))
out_img = b_image_1.copy()
# Processing in each contour/label one by one
for cnt in contours:
cv2.drawContours(b_image_1,[cnt],0,(255,255,0), 1)
## Just to draw labels in the center of each grid
((x, y), r) = cv2.minEnclosingCircle(cnt)
x = int(x)
y = int(y)
r = int(r)
cv2.putText(b_image_1, "#{}".format(label), (int(x) - 10, int(y)),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
##
cv2.drawContours(temp,[cnt],0,(255,255,255), -1)
#crop_img = np.bitwise_and(b_image,temp)
r = cv2.boundingRect(cnt)
crop_img = b_image[r[1]:r[1]+r[3], r[0]:r[0]+r[2]]
mean = cv2.mean(crop_img);
mean = np.array(mean).reshape(-1,1)
print (' Mean color', mean, np.shape(mean))
if mean[1] < 50:
cv2.putText(out_img, "M", (int(x) - 10, int(y)),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 1)
filename.write("Block number #"+ str(label)+ ' is : ' + 'Magenta'+'\n');
else:
cv2.putText(out_img, "G", (int(x) - 10, int(y)),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 1)
filename.write("Block number #"+ str(label)+ ' is : ' +'Gray'+'\n');
label = label+1;
cv2.imwrite("Block_order_C.PNG",b_image_1)
cv2.imwrite("Out_img.PNG",out_img)
filename.close()
cv2.imshow("preferred",b_image_1)
cv2.waitKey()
Error
[ WARN:0] global C:\projects\opencv-python\opencv\modules\imgcodecs\src\grfmt_tiff.cpp (449) cv::TiffDecoder::readData OpenCV TIFF: TIFFRGBAImageOK: Sorry, can not handle images with IEEE floating-point samples
Traceback (most recent call last):
File "Processing_C_preferred.py", line 32, in
p_mask_c = cv2.cvtColor(cv2.resize(cv2.imread(filename_mask),(800,800)),cv2.COLOR_RGB2HSV);
cv2.error: OpenCV(4.2.0) C:\projects\opencv-python\opencv\modules\imgproc\src\resize.cpp:4045: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'
When you read in the image pass the cv::IMREAD_ANYDEPTH = 2 parameter as the second parameter in cv2.imread().
Changing your lines to
p_mask_c = cv2.cvtColor(cv2.resize(cv2.imread(filename_mask, 2),(800,800)),cv2.COLOR_RGB2HSV);
and
b_image_1 = cv2.resize(cv2.imread(filename_image, 2),(800,800));
removes the resize error you're seeing.
But you get another error when changing the color since your TIFF image apparently has only one channel so cv2.COLOR_RGB2HSV won't work..
You could also use multiple flags like cv::IMREAD_COLOR = 1,
p_mask_c = cv2.cvtColor(cv2.resize(cv2.imread(filename_mask, 2 | 1),(800,800)),cv2.COLOR_BGR2HSV);
to read in a color image. But you get a different error. Perhaps you understand this image better than I do and can solve the problem from here on out.
I am working on a VCS (vehicle counting system) project. The scope of the project is to classify and count vehicles. I have built a custom model using Faster-RCNN in Tensorflow-object-detection-API This model only contains 7 classes such as car motorbike, bicycle and etc. The model works perfectly, But, the problem is "COUNTING". It is very hard to count vehicles in video frame. I did a pre-research on the internet. I tried a lot. but i could not find any useful information. There are some projects on github, they use tracking methods.
I want the following things. I want to draw an horizontal line in the frame. when the vehicle touch it, the counting should take place. How to do it. I don't know the algorithm behind it. I heard that centroid tracking would help me.
My question is, i want to count vehicles when it touch the horizontal line. I have linked a sample image bellow.
Sample_Image
import os
import cv2
import numpy as np
import tensorflow as tf
import sys
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
# Import utilites
from utils import label_map_util
from utils import visualization_utils as vis_util
# Name of the directory containing the object detection module we're using
MODEL_NAME = 'inference_graph'
VIDEO_NAME = 'Video_105.mp4'
# Grab path to current working directory
CWD_PATH = os.getcwd()
# Path to frozen detection graph .pb file, which contains the model that is used
# for object detection.
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,'frozen_inference_graph.pb')
# Path to label map file
PATH_TO_LABELS = os.path.join(CWD_PATH,'training','labelmap.pbtxt')
# Path to video
PATH_TO_VIDEO = os.path.join(CWD_PATH,VIDEO_NAME)
# Number of classes the object detector can identify
NUM_CLASSES = 7
# Load the label map.
# Label maps map indices to category names, so that when our convolution
# network predicts `5`, we know that this corresponds to `king`.
# Here we use internal utility functions, but anything that returns a
# dictionary mapping integers to appropriate string labels would be fine
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
# Load the Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
sess = tf.Session(graph=detection_graph)
# Define input and output tensors (i.e. data) for the object detection classifier
# Input tensor is the image
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Output tensors are the detection boxes, scores, and classes
# Each box represents a part of the image where a particular object was detected
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represents level of confidence for each of the objects.
# The score is shown on the result image, together with the class label.
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
# Number of objects detected
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Open video file
video = cv2.VideoCapture(PATH_TO_VIDEO)
while(video.isOpened()):
# Acquire frame and expand frame dimensions to have shape: [1, None, None, 3]
# i.e. a single-column array, where each item in the column has the pixel RGB value
ret, frame = video.read()
frame_expanded = np.expand_dims(frame, axis=0)
# Perform the actual detection by running the model with the image as input
(boxes, scores, classes, num) = sess.run(
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: frame_expanded})
# Draw the results of the detection (aka 'visulaize the results')
vis_util.visualize_boxes_and_labels_on_image_array(
frame,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8,
min_score_thresh=0.90)
# All the results have been drawn on the frame, so it's time to display it.
final_score = np.squeeze(scores)
count = 0
cv2.line(frame, (1144, 568), (1723,664), (0,0,255), 2) #Line
for i in range(100):
if scores is None or final_score[i] > 0.90:
min_score_thresh = 0.90
bboxes = boxes[scores > min_score_thresh]
im_width = video.get(cv2.CAP_PROP_FRAME_WIDTH)
im_height = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
final_box = []
for box in bboxes:
ymin, xmin, ymax, xmax = box
print("Ymin:{}:Xmin:{}:Ymax:{}Xmax{}".format(ymin*im_width,xmin*im_width,ymax*im_width,xmax*im_width))
final_box.append([xmin * im_width, xmax * im_width, ymin * im_height, ymax * im_height])
#print(final_box)
cv2.imshow('Object detector', frame)
# Press 'q' to quit
if cv2.waitKey(1) == ord('q'):
break
# Clean up
video.release()
cv2.destroyAllWindows()
# import the necessary packages
from imutils.video import VideoStream
from imutils.video import FPS
import argparse
import imutils
import time
import cv2
tracker = cv2.TrackerCSRT_create()
vs = cv2.VideoCapture("Video.mp4")
initBB = None
detec = []
def pega_centro(x, y, w, h):
x1 = int(w / 2)
y1 = int(h / 2)
cx = x + x1
cy = y + y1
return cx,cy
roi = 480
counter = 0
offset = 6
# loop over frames from the video stream
while vs.isOpened():
ret,frame = vs.read()
cv2.line(frame, (769 , roi), (1298 , roi), (255,0,0), 3)
# check to see if we are currently tracking an object
if initBB is not None:
# grab the new bounding box coordinates of the object
(success, box) = tracker.update(frame)
# check to see if the tracking was a success
if success:
(x, y, w, h) = [int(v) for v in box]
cv2.rectangle(frame, (x, y), (x + w, y + h),
(0, 255, 0), 2)
cX = int((x + x+w) / 2.0)
cY = int((y + y+h) / 2.0)
cv2.circle(frame, (cX, cY), 3, (0, 0, 255), -1)
c=pega_centro(x, y, w, h)
detec.append(c)
for (x,y) in detec:
if y<(roi+offset) and y>(roi-offset):
counter+=1
print(counter)
cv2.line(frame, (769 , roi), (1298 , roi), (0,0,255), 3)
detec.remove((x,y))
# show the output frame
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
if key == ord("s"):
# select the bounding box of the object we want to track (make
# sure you press ENTER or SPACE after selecting the ROI)
initBB = cv2.selectROI("Frame", frame, fromCenter=False,
showCrosshair=True)
# start OpenCV object tracker using the supplied bounding box
# coordinates, then start the FPS throughput estimator as well
tracker.init(frame, initBB)
fps = FPS().start()
# if the `q` key was pressed, break from the loop
elif key == ord("q"):
break
else:
vs.release()
cv2.destroyAllWindows()
So this is what I have now:
As you can see, the neural style transfer thing is only going over the area the detection box is detecting. I am trying to put the transformed cool picture (which will always be less than 1200 x 900 because the detection box is 1200 x 900) in a black picture with dimensions 1200 x 900 so that I can save the video file.
My box is measured with: startX, endX, startY, and endY. The way I am trying to put the cool picture over the background right now is: black_background[startY:endY, startX:endX] = output, where output also has the size (endY - startY, endX - startX).
My way is not working, any insights? And also, for some reason, when I do "*black_background[startY:endY, startX:endX] = output", there is often a few pixel off broadcasting issue, like can't add (859, 100, 3) with (860, 100, 3). Is there a non-buggy solution to the black background issue? I feel like manually doing *black_background[startY:endY, startX:endX] = output is weird.
Here's my full code, I marked the if loop that actually matters with -----, thank you!
from __future__ import print_function
from imutils.video import VideoStream
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import time
import cv2
from imutils import paths
import itertools
# We need to input model prototxt
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--prototxt", required=True,
help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model", required=True,
help="path to Caffe pre-trained model")
ap.add_argument("-c", "--confidence", type=float, default=0.2,
help="minimum probability to filter weak detections")
ap.add_argument("-nm", "--neuralmodels", required=True,
help="path to directory containing neural style transfer models")
args = vars(ap.parse_args())
# we should identify the class first, and then transfer that block
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
"sofa", "train", "tvmonitor"]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
# load our serialized model from disk
print("[INFO] loading model...")
DetectionNet = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
# grab the paths to all neural style transfer models in our 'models'
# directory, provided all models end with the '.t7' file extension
modelPaths = paths.list_files(args["neuralmodels"], validExts=(".t7",))
modelPaths = sorted(list(modelPaths))
# generate unique IDs for each of the model paths, then combine the
# two lists together
models = list(zip(range(0, len(modelPaths)), (modelPaths)))
# use the cycle function of itertools that can loop over all model
# paths, and then when the end is reached, restart again
modelIter = itertools.cycle(models)
(modelID, modelPath) = next(modelIter)
NTSnet = cv2.dnn.readNetFromTorch(modelPath)
# initialize the video stream, allow the cammera sensor to warmup,
# and initialize the FPS counter
print("[INFO] starting video stream...")
vs = VideoStream(src=1).start()
fps = FPS().start()
fourcc = cv2.VideoWriter_fourcc(*'XVID')
output_video = cv2.VideoWriter('output.avi', fourcc, 20.0, (1200, 900))
while True:
# grab the frame from the threaded video stream and resize it
# to have a maximum width of 400 pixels
frame = vs.read()
frame = imutils.resize(frame, width=1200, height=900)
# grab the frame dimensions and convert it to a blob
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)),
0.007843, (300, 300), 127.5)
# pass the blob through the network and obtain the detections and
# predictions
DetectionNet.setInput(blob)
detections = DetectionNet.forward()
# loop over the detections
for i in np.arange(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with
# the prediction
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the `confidence` is
# greater than the minimum confidence
if confidence > args["confidence"]:
# extract the index of the class label from the
# `detections`, then compute the (x, y)-coordinates of
# the bounding box for the object
idx = int(detections[0, 0, i, 1])
if(CLASSES[idx] == "person" and confidence > .90):
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# draw the prediction on the frame
label = "{}: {:.2f}%".format("PERSON",
confidence * 100)
cv2.rectangle(frame, (startX, startY), (endX, endY),
COLORS[idx], 2)
y = startY - 15 if startY - 15 > 15 else startY + 15
cv2.putText(frame, label, (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
# print box area in background
newimage = frame[startY:endY, startX:endX]
(h, w) = newimage.shape[:2]
#print(h,w)
#print(startX, endX, startY, endY)
noise_picture = cv2.imread('white_noise.jpg')
black_background = cv2.imread('black.png')
-------------------------------------------------------------------
*if(h > 0 and w > 0):
# to_be_transformed is the detection box area
# resize that area for MobileNetSSD
#to_be_transformed = imutils.resize(to_be_transformed, height=450)
(height_orig, width_orig) = noise_picture.shape[:2]
noise_picture[startY:endY, startX:endX] = newimage
noise_picture = imutils.resize(noise_picture, height=450)
# run it through the network, output is the image
(h, w) = noise_picture.shape[:2]
# print(h, w)
blob2 = cv2.dnn.blobFromImage(noise_picture, 1.0, (w, h), (103.939, 116.779, 123.680), swapRB=False, crop=False)
NTSnet.setInput(blob2)
output = NTSnet.forward()
output = output.reshape((3, output.shape[2], output.shape[3]))
output[0] += 103.939
output[1] += 116.779
output[2] += 123.680
output /= 255.0
output = output.transpose(1, 2, 0)
# set the 600 x 450 back to the original size
black_background = imutils.resize(black_background, width=1200, height = 900)
output = imutils.resize(output, width=1200)
#black_background[startY:endY, startX:endX] = output[startY:endY, startX:endX]
output = output[startY:endY, startX:endX]
(h2, w2) = output.shape[:2]
if(h2>0 and w2>0 ):
cv2.imshow('hmm', output)
black_background[startY:endY, startX:endX] = output
cv2.imshow("uh", black_background)
#output_video.write(black_background)
#output_video.write(background)*
---------------------------------------------------------------
# show the output frame, which is the whole thing
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
# update the FPS counter
fps.update()
# stop the timer and display FPS information
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()
Oh man, second time I made this mistake. You have to do * 255 when you are adding your output picture to your background. This is really weird, it seems like imread works if you only put numbers in [0, 1], but once you have a value that goes over 1, it treats the range as [0, 255], don't take my words on it though.
For my project I have multiple cameras and I need to save the video feed to multiple output files at the same time. Here's my code:
import cv2
def one_cam_is_open(list_of_cams):
for cam in list_of_cams:
if cam.isOpened():
return True
return False
cam_list = []
# Capture video from camera
for i in range(0, 2):
cap = cv2.VideoCapture(i)
if cap!=-1:
cam_list.append(cv2.VideoCapture(i))
# the width and height of all cameras will be 1900x1080
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) + 0.5)
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) + 0.5)
dim = (1900, 1080)
# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Be sure to use the lower case
video_writers = {}
for i, cam in enumerate(cam_list):
video_writers[cam] = cv2.VideoWriter("output_" + str(i) + ".mp4", fourcc, 20.0, dim)
while one_cam_is_open(cam_list):
for (cam, out) in video_writers:#TypeError: cannot unpack non-iterable cv2.VideoCapture object
ret, frame = cam.read()
if ret == True:
frame = cv2.flip(frame, 0)
# write the flipped frame
out.write(frame)
cv2.imshow('frame', frame)
if (cv2.waitKey(1) & 0xFF) == ord('q'): # Hit `q` to exit
break
else:
break
# Release everything if job is finished
for (cam, out) in video_writers:
out.release()
cam.release()
cv2.destroyAllWindows()
As noted in the code, I get the error TypeError: cannot unpack non-iterable cv2.VideoCapture object when I try to unpack the references to the VideoWriter obejects for each camera.
Is there a way to write to multiple video files using VideoWriter?