How to detect a personmovement in a RoI - opencv

I have a video and want to know when a person enters and stays in a specific area in the video, then sets a time in the video (video time), when he enters, and when he leaves to use this times later for cutting the video.
Im only a little bit experienced in opencv but have currently no experience with tensorflow or keras.
This is for a video analysis.
I have tried some things like BackgroundSubtractorMOG, use another resolution, etc.
https://s18.directupload.net/images/190517/wym8r59b.png
https://s18.directupload.net/images/190517/pi52vgv7.png
def calc_accum_avg(frame, accumulated_weight):
global background
if background is None:
background = frame.copy().astype("float")
return None
cv2.accumulateWeighted(frame, background, accumulated_weight)
def segment(frame, threshold=25):
global background
diff = cv2.absdiff(background.astype("uint8"),frame)
_, thresholded = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY)
contours, hierarchy = cv2.findContours(thresholded.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
if len(contours) == 0:
return None
else:
move_segment = max(contours, key = cv2.contourArea)
return (thresholded, move_segment)
def main():
video = cv2.VideoCapture("/home/felix/Schreibtisch/OpenCVPython/large_video.mp4")
video.set(3, 1920)
video.set(4, 1080)
length = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
print(length)
num_frames = 0
fgbg = cv2.bgsegm.createBackgroundSubtractorMOG()
while True:
ret,frame = video.read()
fgmask = fgbg.apply(frame)
if frame is None:
return
frame_copy = fgmask.copy()
#frame2_copy =frame.copy()
roi_visualiser = frame[roi_visualiser_top:roi_visualiser_bottom,roi_visualiser_right:roi_visualiser_left]
roi_board = frame[roi_board_top:roi_board_bottom,roi_board_right:roi_board_left]
gray = cv2.cvtColor(roi_visualiser, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (9,9), 0)
#gray = cv2.cvtColor(roi_board, cv2.COLOR_BGR2GRAY)
#gray = cv2.GaussianBlur(gray, (9,9), 0)
if num_frames < 2:
calc_accum_avg(gray, accumulated_weight)
#calc_accum_avg(gray2, accumulated_weight)
if num_frames <= 1:
cv2.imshow("Finger Count", frame_copy)
else:
hand = segment(gray)
if hand is not None:
thresholded, move_segment = hand
cv2.drawContours(frame_copy, [move_segment + (roi_visualiser_right, roi_visualiser_top)], -1, (255,0,0), 1)
#cv2.drawContours(frame_copy2, [move_segment + (roi_board_right, roi_board_top)], -1, (255,0,0), 1)
fingers = count_moves(thresholded, move_segment)
if fingers > 0:
print("ja") #test funktioniert
else:
print("Nein")
cv2.putText(frame_copy, str(fingers), (70,45), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2) #no need
cv2.imshow("Thresholded", thresholded) #no need
cv2.rectangle(frame_copy, (roi_visualiser_left, roi_visualiser_top), (roi_visualiser_right, roi_visualiser_bottom), (255,0,0), 1)
cv2.rectangle(frame_copy, (roi_board_left, roi_board_top), (roi_board_right, roi_board_bottom), (255,0,0), 1)
num_frames += 1
cv2.imshow("Finger Count", frame_copy)
I get no error messages all runs fine, but i dont get the correct result i need.
[1]: https://i.stack.imgur.com/dQbQi.png
[2]: https://i.stack.imgur.com/MqOAc.png

Have you tried BackgroundSubtractorMOG2? It can distinguish shadows, you can use that to prevent false positives.
To make the processing more efficient, first create a subimage of the area where the person enters/leaves. Apply the backgroundsubtraction on the subimage. Also, if the frames are noisy, applying a blur before backgroundsubtraction can improve the result.
Check the resulting mask for white objects of significant size. If detected, store the frame number using video.get(CV_CAP_PROP_POS_FRAMES) in an array and stop recording frame numbers until the mask is fully black again.

Related

How to separate handwriting from background

I want to separate the handwriting from the background as perfectly as possible in images like the following:
It first looks like one can separate the pixels by color, but plotting the pixels by brightness and relative blue content does not give a clear separation:
Using the above separating lines to keep only the pixels in the upper left area (and set the other pixels to white) we get the following result:
The handwriting is not fully extracted, but the (printed) numbers start to appear. So an improvement of the separating lines does not seem to be possible. Any other possibilities to improve the result?
This is as far as I could get with simple techniques. I'm using thresholding to get the letters plus bits of numbers and contours to filter out the little number bits. I also end up losing the dots on the i's doing this. If you have control over the handwriting, it'd be a lot easier and cleaner to separate out red ink since the black numbers have some blue in them.
kmeans clustering might get you better results, but I've forgotten how to do that in OpenCV :p
import cv2
import numpy as np
# load image
img = cv2.imread("writing.png");
# convert to hsv
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB);
l, a, b = cv2.split(lab);
# threshold on b channel
done = False;
low = 0;
high = 124; # [0, 124, 8] b-channel
size = 8;
while not done:
# copy image
copy = b.copy();
# threshold
thresh = cv2.inRange(copy, low, high);
# contours
_, contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE);
# filter contours by size
big_cntrs = [];
marked = img.copy();
for contour in contours:
area = cv2.contourArea(contour);
if area > size:
big_cntrs.append(contour);
cv2.drawContours(marked, big_cntrs, -1, (0, 255, 0), 3);
# show
cv2.imshow("original", img);
cv2.imshow("marked", marked);
cv2.imshow("thresh", thresh);
key = cv2.waitKey(1);
# check keypress
done = key == ord('z');
if key == ord('d'):
high += 1;
if key == ord('a'):
high -= 1;
if key == ord('w'):
low += 1;
if key == ord('s'):
low -= 1;
if key == ord('e'):
size += 1;
if key == ord('q'):
size -= 1;
print([low, high, size]);
# create a mask of the contoured image
mask = np.zeros_like(thresh);
mask = cv2.drawContours(mask, big_cntrs, -1, 255, -1);
cv2.imshow("Mask", mask);
cv2.waitKey(0);
cv2.imwrite("masked.png", mask);

OpenCV - Extracting lines on a graph

I would like to create a program that is able to extract lines from a graph.
For example, if a graph like this is inputted, I would just want the red line to be outputted.
Below I have tried to do this using a hough line transformation, however, I do not get very promising results.
import cv2
import numpy as np
graph_img = cv2.imread("/Users/2020shatgiskessell/Desktop/Graph1.png")
gray = cv2.cvtColor(graph_img, cv2.COLOR_BGR2GRAY)
kernel_size = 5
#grayscale image
blur_gray = cv2.GaussianBlur(gray,(kernel_size, kernel_size),0)
#Canny edge detecion
edges = cv2.Canny(blur_gray, 50, 150)
#Hough Lines Transformation
#distance resoltion of hough grid (pixels)
rho = 1
#angular resolution of hough grid (radians)
theta = np.pi/180
#minimum number of votes
threshold = 15
#play around with these
min_line_length = 25
max_line_gap = 20
#make new image
line_image = np.copy(graph_img)
#returns array of lines
lines = cv2.HoughLinesP(edges, rho, theta, threshold, np.array([]),
min_line_length, max_line_gap)
for line in lines:
for x1,y1,x2,y2 in line:
cv2.line(line_image,(x1,y1),(x2,y2),(255,0,0),2)
lines_edges = cv2.addWeighted(graph_img, 0.8, line_image, 1, 0)
cv2.imshow("denoised image",edges)
if cv2.waitKey(0) & 0xff == 27:
cv2.destroyAllWindows()
This produces the output image below, which does not accurately recognize the graph line. How might I go about doing this?
Note: For now, I am not concerned about the graph titles or any other text.
I would also like the code to work for other graph images aswell, such as:
etc.
If the graph does not have many noises around it (like your example) I would suggest to threshold your image with Otsu threshold instead of looking for edges . Then you simply search the contours, select the biggest one (graph) and draw it on a blank mask. After that you can perform a bitwise operation on image with the mask and you will get a black image with the graph. If you like the white background better, then simply change all black pixels to white. Steps are written in the example. Hope it helps a bit. Cheers!
Example:
import numpy as np
import cv2
# Read the image and create a blank mask
img = cv2.imread('graph.png')
h,w = img.shape[:2]
mask = np.zeros((h,w), np.uint8)
# Transform to gray colorspace and threshold the image
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
# Search for contours and select the biggest one and draw it on mask
_, contours, hierarchy = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
cnt = max(contours, key=cv2.contourArea)
cv2.drawContours(mask, [cnt], 0, 255, -1)
# Perform a bitwise operation
res = cv2.bitwise_and(img, img, mask=mask)
# Convert black pixels back to white
black = np.where(res==0)
res[black[0], black[1], :] = [255, 255, 255]
# Display the image
cv2.imshow('img', res)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
EDIT:
For noisier pictures you could try this code. Note that different graphs have different noises and may not work on every graph image since the denoisiation process would be specific in every case. For different noises you can use different ways to denoise it, for example histogram equalization, eroding, blurring etc. This code works well for all 3 graphs. Steps are written in comments. Hope it helps. Cheers!
import numpy as np
import cv2
# Read the image and create a blank mask
img = cv2.imread('graph.png')
h,w = img.shape[:2]
mask = np.zeros((h,w), np.uint8)
# Transform to gray colorspace and threshold the image
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
# Perform opening on the thresholded image (erosion followed by dilation)
kernel = np.ones((2,2),np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
# Search for contours and select the biggest one and draw it on mask
_, contours, hierarchy = cv2.findContours(opening,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
cnt = max(contours, key=cv2.contourArea)
cv2.drawContours(mask, [cnt], 0, 255, -1)
# Perform a bitwise operation
res = cv2.bitwise_and(img, img, mask=mask)
# Threshold the image again
gray = cv2.cvtColor(res,cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
# Find all non white pixels
non_zero = cv2.findNonZero(thresh)
# Transform all other pixels in non_white to white
for i in range(0, len(non_zero)):
first_x = non_zero[i][0][0]
first_y = non_zero[i][0][1]
first = res[first_y, first_x]
res[first_y, first_x] = 255
# Display the image
cv2.imshow('img', res)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:

OpenCV - VideoWriter FPS

I'm building a script to overlay bounding boxes onto my video via a CSV file. Each frame has n number of bounding boxes, and so I am just iterating over the bounding boxes in each frame and drawing a cv2.rectangle on the frame. As a result, I am writing to a frame many times, for all frames.
While my VideoWriter constructor takes in 23.97 FPS as a parameter, the resulting FPS is much lower. Is there an algorithm or a way in which I can set a proper FPS to compensate for the FPS drop after writing to the video?
Below is my code-snippet:
avg_fws = counter_written/float(total_frames-1)
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = video.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('data/bounding_videos/%s.avi' % video_file, fourcc, fps * avg_fws, (width,height))
counter = 1
print (counter_written)
while (video.isOpened()):
ret, frame = video.read()
if ret == True:
if len(frames_dict) != 0:
for i in frames_dict[counter].keys():
box_dim = frames_dict[counter][i]
x, y, w, h = box_dim
cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 255, 255), 3)
out.write(frame)
else:
out.write(frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
else:
break
counter += 1
video.release()
out.release()
cv2.destroyAllWindows()
The counters are just for me to keep track / access the frames, and avg_fws is the average frames written per second which basically is total_num_of_frames_written / total_num_of_frames_in_video.
The problem with your code is that, you are writing multiple frames in your for loop for each rectangle drawn. What you need to do is draw all rectangles at once on a frame and write it only for a single time. It can be done by moving out.write(frame) out of your for loop.

How to detect document from a picture in opencv?

I am trying to design an app similar to camscanner. For that, I have to take an image and then find the document in that. I started off with the code described here - http://opencvpython.blogspot.in/2012/06/sudoku-solver-part-2.html
I found the contours and the rectangular contour with max area should be the required document. For every contour, I am finding an approximate closed PolyDP. Of all the polyDP of size 4, the one with max area should be the required document. However, this method is not working.
The input image for the process is this
I tried to print the contour with max area and this resulted in this (Contour inside letter 'C')
Code:
img = cv2.imread('bounce.jpeg')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray,(5,5),0)
thresh = cv2.adaptiveThreshold(gray,255,1,1,11,2)
_, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
def biggestRectangle(contours):
biggest = None
max_area = 0
indexReturn = -1
for index in range(len(contours)):
i = contours[index]
area = cv2.contourArea(i)
if area > 100:
peri = cv2.arcLength(i,True)
approx = cv2.approxPolyDP(i,0.1*peri,True)
if area > max_area: #and len(approx)==4:
biggest = approx
max_area = area
indexReturn = index
return indexReturn
indexReturn = biggestRectangle(contours)
cv2.imwrite('hola.png',cv2.drawContours(img, contours, indexReturn, (0,255,0)))
What is going wrong in this? Is there any other method by which I can capture the document in this picture?
Try this :
output image
import cv2
import numpy as np
img = cv2.imread('bounce.jpg')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
invGamma = 1.0 / 0.3
table = np.array([((i / 255.0) ** invGamma) * 255
for i in np.arange(0, 256)]).astype("uint8")
# apply gamma correction using the lookup table
gray = cv2.LUT(gray, table)
ret,thresh1 = cv2.threshold(gray,80,255,cv2.THRESH_BINARY)
#thresh = cv2.adaptiveThreshold(gray,255,1,1,11,2)
_, contours, hierarchy = cv2.findContours(thresh1, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
def biggestRectangle(contours):
biggest = None
max_area = 0
indexReturn = -1
for index in range(len(contours)):
i = contours[index]
area = cv2.contourArea(i)
if area > 100:
peri = cv2.arcLength(i,True)
approx = cv2.approxPolyDP(i,0.1*peri,True)
if area > max_area: #and len(approx)==4:
biggest = approx
max_area = area
indexReturn = index
return indexReturn
indexReturn = biggestRectangle(contours)
hull = cv2.convexHull(contours[indexReturn])
cv2.imwrite('hola.png',cv2.drawContours(img, [hull], 0, (0,255,0),3))
#cv2.imwrite('hola.png',thresh1)
I would do it like this:
Do preprocessing like blur / canny
Extract all lines from the image using the hough line transform (open cv doc).
Use the 4 strongest lines
Try to construct the contour of the document using the four lines
Right now I do not have an OpenCV installed so I cannot try this approach but maybe it leads you in the right directon.

Difficult time trying to do shape recognition for 3D objects

I am trying to make a shape recognition classifier in which if you give an individual picture of an object (from a scene), it would be able to classify (after machine learning) the shape of an object (cylinder, cube, sphere, etc).
Original scene:
Individual objects it will classify:
I attempted to do this using cv2.approxPolyDB with an attempt to classify a cylinder. However, either my implementation isn't good or this wasn't a good choice of an algorithm to choose in the first place, the objects in the shape of cylinders were assigned a approxPolyDB value of 3 or 4.
Perhaps I can threshold and, in general, if given a value of 3 or 4, assume the object is a cylinder, but I feel like it's not the most reliable method for 3D shape classification. I feel like there is a better way to implement this and a better method as opposed to just hardcoding values. I feel like that with this method, it can easily confuse a cylinder with a cube.
Is there any way I can improve my 3D shape recognition program?
Code:
import cv2
import numpy as np
from pyimagesearch import imutils
from PIL import Image
from time import time
def invert_img(img):
img = (255-img)
return img
def threshold(im):
imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
imgray = cv2.medianBlur(imgray,9)
imgray = cv2.Canny(imgray,75,200)
return imgray
def view_all_contours(im, size_min, size_max):
main = np.array([[]])
cnt_target = im.copy()
for c in cnts:
epsilon = 0.1*cv2.arcLength(c,True)
approx = cv2.approxPolyDP(c,epsilon,True)
area = cv2.contourArea(c)
print 'area: ', area
test = im.copy()
# To weed out contours that are too small or big
if area > size_min and area < size_max:
print c[0,0]
print 'approx: ', len(approx)
max_pos = c.max(axis=0)
max_x = max_pos[0,0]
max_y = max_pos[0,1]
min_pos = c.min(axis=0)
min_x = min_pos[0,0]
min_y = min_pos[0,1]
# Load each contour onto image
cv2.drawContours(cnt_target, c, -1,(0,0,255),2)
print 'Found object'
frame_f = test[min_y:max_y , min_x:max_x]
main = np.append(main, approx[None,:][None,:])
thresh = frame_f.copy()
thresh = threshold(thresh)
contours_small, hierarchy = cv2.findContours(thresh.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cnts_small = sorted(contours_small, key = cv2.contourArea, reverse = True)
cv2.drawContours(frame_f, cnts_small, -1,(0,0,255),2)
cv2.imshow('Thresh', thresh)
cv2.imshow('Show Ya', frame_f)
cv2.waitKey(0)
# Uncomment in order to show all rectangles in image
print '---------------------------------------------'
#cv2.drawContours(cnt_target, cnts, -1,(0,255,0),2)
print main.shape
print main
return cnt_target
time_1 = time()
roi = cv2.imread('images/beach_trash_3.jpg')
hsv = cv2.cvtColor(roi,cv2.COLOR_BGR2HSV)
target = cv2.imread('images/beach_trash_3.jpg')
target = imutils.resize(target, height = 400)
hsvt = cv2.cvtColor(target,cv2.COLOR_BGR2HSV)
img_height = target.shape[0]
img_width = target.shape[1]
# calculating object histogram
roihist = cv2.calcHist([hsv],[0, 1], None, [180, 256], [0, 180, 0, 256] )
# normalize histogram and apply backprojection
cv2.normalize(roihist,roihist,0,255,cv2.NORM_MINMAX)
dst = cv2.calcBackProject([hsvt],[0,1],roihist,[0,180,0,256],1)
# Now convolute with circular disc
disc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
cv2.filter2D(dst,-1,disc,dst)
# threshold and binary AND
ret,thresh = cv2.threshold(dst,50,255,0)
thresh_one = thresh.copy()
thresh = cv2.merge((thresh,thresh,thresh))
res = cv2.bitwise_and(target,thresh)
# Implementing morphological erosion & dilation
kernel = np.ones((9,9),np.uint8) # (6,6) to get more contours (9,9) to reduce noise
thresh_one = cv2.erode(thresh_one, kernel, iterations = 3)
thresh_one = cv2.dilate(thresh_one, kernel, iterations=2)
# Invert the image
thresh_one = invert_img(thresh_one)
# To show prev img
#res = np.vstack((target,thresh,res))
#cv2.imwrite('res.jpg',res)
#cv2.waitKey(0)
#cv2.imshow('Before contours', thresh_one)
cnt_target = target.copy()
cnt_full = target.copy()
# Code to draw the contours
contours, hierarchy = cv2.findContours(thresh_one.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(contours, key = cv2.contourArea, reverse = True)
print time() - time_1
size_min = 200
size_max = 5000
cnt_target = view_all_contours(target, size_min, size_max)
cv2.drawContours(cnt_full, cnts, -1,(0,0,255),2)
res = imutils.resize(thresh_one, height = 700)
cv2.imshow('Original image', target)
cv2.imshow('Preprocessed', thresh_one)
cv2.imshow('All contours', cnt_full)
cv2.imshow('Filtered contours', cnt_target)
cv2.waitKey(0)

Resources