Impulse, gaussian and salt and pepper noise with OpenCV - image-processing

I'm studying Image Processing on the famous Gonzales "Digital Image Processing" and talking about image restoration a lot of examples are done with computer-generated noise (gaussian, salt and pepper, etc). In MATLAB there are some built-in functions to do it. What about OpenCV?

As far as I know there are no convenient built in functions like in Matlab. But with only a few lines of code you can create those images yourself.
For example additive gaussian noise:
Mat gaussian_noise = img.clone();
randn(gaussian_noise,128,30);
Salt and pepper noise:
Mat saltpepper_noise = Mat::zeros(img.rows, img.cols,CV_8U);
randu(saltpepper_noise,0,255);
Mat black = saltpepper_noise < 30;
Mat white = saltpepper_noise > 225;
Mat saltpepper_img = img.clone();
saltpepper_img.setTo(255,white);
saltpepper_img.setTo(0,black);

There is function random_noise() from the scikit-image package. It has several builtin noise patterns, such as gaussian, s&p (for salt and pepper noise), possion and speckle.
Below I show an example of how to use this method
from PIL import Image
import numpy as np
from skimage.util import random_noise
im = Image.open("test.jpg")
# convert PIL Image to ndarray
im_arr = np.asarray(im)
# random_noise() method will convert image in [0, 255] to [0, 1.0],
# inherently it use np.random.normal() to create normal distribution
# and adds the generated noised back to image
noise_img = random_noise(im_arr, mode='gaussian', var=0.05**2)
noise_img = (255*noise_img).astype(np.uint8)
img = Image.fromarray(noise_img)
img.show()
There is also a package called imgaug which are dedicated to augment images in various ways. It provides gaussian, poissan and salt&pepper noise augmenter. Here is how you can use it to add noise to image:
from PIL import Image
import numpy as np
from imgaug import augmenters as iaa
def main():
im = Image.open("bg_img.jpg")
im_arr = np.asarray(im)
# gaussian noise
# aug = iaa.AdditiveGaussianNoise(loc=0, scale=0.1*255)
# poisson noise
# aug = iaa.AdditivePoissonNoise(lam=10.0, per_channel=True)
# salt and pepper noise
aug = iaa.SaltAndPepper(p=0.05)
im_arr = aug.augment_image(im_arr)
im = Image.fromarray(im_arr).convert('RGB')
im.show()
if __name__ == "__main__":
main()

Simple Function to add Gaussian, Salt-pepper speckle and poisson noise to an image
Parameters
----------
image : ndarray
Input image data. Will be converted to float.
mode : str
One of the following strings, selecting the type of noise to add:
'gauss' Gaussian-distributed additive noise.
'poisson' Poisson-distributed noise generated from the data.
's&p' Replaces random pixels with 0 or 1.
'speckle' Multiplicative noise using out = image + n*image,where
n,is uniform noise with specified mean & variance.
import numpy as np
import os
import cv2
def noisy(noise_typ,image):
if noise_typ == "gauss":
row,col,ch= image.shape
mean = 0
#var = 0.1
#sigma = var**0.5
gauss = np.random.normal(mean,1,(row,col,ch))
gauss = gauss.reshape(row,col,ch)
noisy = image + gauss
return noisy
elif noise_typ == "s&p":
row,col,ch = image.shape
s_vs_p = 0.5
amount = 0.004
out = image
# Salt mode
num_salt = np.ceil(amount * image.size * s_vs_p)
coords = [np.random.randint(0, i - 1, int(num_salt))
for i in image.shape]
out[coords] = 1
# Pepper mode
num_pepper = np.ceil(amount* image.size * (1. - s_vs_p))
coords = [np.random.randint(0, i - 1, int(num_pepper))
for i in image.shape]
out[coords] = 0
return out
elif noise_typ == "poisson":
vals = len(np.unique(image))
vals = 2 ** np.ceil(np.log2(vals))
noisy = np.random.poisson(image * vals) / float(vals)
return noisy
elif noise_typ =="speckle":
row,col,ch = image.shape
gauss = np.random.randn(row,col,ch)
gauss = gauss.reshape(row,col,ch)
noisy = image + image * gauss
return noisy

"Salt & Pepper" noise can be added in a quite simple fashion using NumPy matrix operations.
def add_salt_and_pepper(gb, prob):
'''Adds "Salt & Pepper" noise to an image.
gb: should be one-channel image with pixels in [0, 1] range
prob: probability (threshold) that controls level of noise'''
rnd = np.random.rand(gb.shape[0], gb.shape[1])
noisy = gb.copy()
noisy[rnd < prob] = 0
noisy[rnd > 1 - prob] = 1
return noisy

# Adding noise to the image
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
img = cv2.imread('./fruit.png',0)
im = np.zeros(img.shape, np.uint8) # do not use original image it overwrites the image
mean = 0
sigma = 10
cv2.randn(im,mean,sigma) # create the random distribution
Fruit_Noise = cv2.add(img, im) # add the noise to the original image
plt.imshow(Fruit_Noise, cmap='gray')
The values of mean and sigma can be altered to bring about a specific change in noise like gaussian or pepper-salt noise etc.
You can use either randn or randu according to the need. Have a look at the documentation: https://docs.opencv.org/2.4/modules/core/doc/operations_on_arrays.html#cv2.randu

I made some change of #Shubham Pachori 's code. When reading a image into numpy arrary, the default dtype is uint8, which can cause wrapping when adding noise onto the image.
import numpy as np
from PIL import Image
"""
image: read through PIL.Image.open('path')
sigma: variance of gaussian noise
factor: the bigger this value is, the more noisy is the poisson_noised image
##IMPORTANT: when reading a image into numpy arrary, the default dtype is uint8,
which can cause wrapping when adding noise onto the image.
E.g, example = np.array([128,240,255], dtype='uint8')
example + 50 = np.array([178,44,49], dtype='uint8')
Transfer np.array to dtype='int16' can solve this problem.
"""
def gaussian_noise(image, sigma):
img = np.array(image)
noise = np.random.randn(img.shape[0], img.shape[1], img.shape[2])
img = img.astype('int16')
img_noise = img + noise * sigma
img_noise = np.clip(img_noise, 0, 255)
img_noise = img_noise.astype('uint8')
return Image.fromarray(img_noise)
def poisson_noise(image, factor):
factor = 1 / factor
img = np.array(image)
img = img.astype('int16')
img_noise = np.random.poisson(img * factor) / float(factor)
np.clip(img_noise, 0, 255, img_noise)
img_noise = img_noise.astype('uint8')
return Image.fromarray(img_noise)

http://scikit-image.org/docs/dev/api/skimage.util.html#skimage.util.random_noise
skimage.util.random_noise(image, mode='gaussian', seed=None, clip=True, **kwargs)

#Adding noise
[m,n]=img.shape
saltpepper_noise=zeros((m, n));
saltpepper_noise=rand(m,n); #creates a uniform random variable from 0 to 1
for i in range(0,m):
for j in range(0,n):
if saltpepper_noise[i,j]<=0.5:
saltpepper_noise[i,j]=0
else:
saltpepper_noise[i,j]=255

def add_salt_noise(src, ratio: float = 0.05, noise: list = [0, 0, 0]):
dst = src.copy()
import random
shuffle_dict = {}
i = 0
while i < (int(dst.shape[0]*dst.shape[1] * ratio)):
x, y = random.randint(0, dst.shape[0] - 1), random.randint(0, dst.shape[1] - 1)
if (x, y) in shuffle_dict:
continue
else:
dst[x, y] = noise
shuffle_dict[(x, y)] = 0
i += 1
return dst

although there is no built-in functions like in matlab
imnoise(image,noiseType,NoiseLevel) but we can easily add required amount random
valued impulse noise or salt and pepper into an image manually.
to add random valued impulse noise.
import random as r
def addRvinGray(image,n): # add random valued impulse noise in grayscale
'''parameters:
image: type=numpy array. input image in which you want add noise.
n: noise level (in percentage)'''
k=0 # counter variable
ih=image.shape[0]
iw=image.shape[1]
noisypixels=(ih*iw*n)/100 # here we calculate the number of pixels to be altered.
for i in range(ih*iw):
if k<noisypixels:
image[r.randrange(0,ih)][r.randrange(0,iw)]=r.randrange(0,256) #access random pixel in the image gives random intensity (0-255)
k+=1
else:
break
return image
to add salt and pepper noise
def addSaltGray(image,n): #add salt-&-pepper noise in grayscale image
k=0
salt=True
ih=image.shape[0]
iw=image.shape[1]
noisypixels=(ih*iw*n)/100
for i in range(ih*iw):
if k<noisypixels: #keep track of noise level
if salt==True:
image[r.randrange(0,ih)][r.randrange(0,iw)]=255
salt=False
else:
image[r.randrange(0,ih)][r.randrange(0,iw)]=0
salt=True
k+=1
else:
break
return image
Note: for color images: first split image in to three or four channels depending on the input image using opencv function:
(B, G, R) = cv2.split(image)
(B, G, R, A) = cv2.split(image)
after spliting perform the same operations on all channels.
at the end merge all the channels:
merged = cv2.merge([B, G, R])
return merged

Related

How can I compute percentage similarity between two images?

I have written a code snippet to compute structural similarity index (SSIM), pixel similarity, sift features to measure similarity, and earth movers distance between two images. I would like to find percentage similarity using these four metrics combined.
I tried the average of all the values from the metrics and convert them into percentages. But I am extremely doubtful of the approach
import warnings
from skimage.measure import compare_ssim
from skimage.transform import resize
from scipy.stats import wasserstein_distance
from scipy.misc import imsave
from scipy.ndimage import imread
import numpy as np
import cv2
##
# Globals
##
warnings.filterwarnings('ignore')
# specify resized image sizes
height = 2**10
width = 2**10
##
# Functions
##
def get_img(path, norm_size=True, norm_exposure=False):
'''
Prepare an image for image processing tasks
'''
# flatten returns a 2d grayscale array
img = imread(path, flatten=True).astype(int)
# resizing returns float vals 0:255; convert to ints for downstream tasks
if norm_size:
img = resize(img, (height, width), anti_aliasing=True, preserve_range=True)
if norm_exposure:
img = normalize_exposure(img)
return img
def get_histogram(img):
'''
Get the histogram of an image. For an 8-bit, grayscale image, the
histogram will be a 256 unit vector in which the nth value indicates
the percent of the pixels in the image with the given darkness level.
The histogram's values sum to 1.
'''
h, w = img.shape
hist = [0.0] * 256
for i in range(h):
for j in range(w):
hist[img[i, j]] += 1
return np.array(hist) / (h * w)
def normalize_exposure(img):
'''
Normalize the exposure of an image.
'''
img = img.astype(int)
hist = get_histogram(img)
# get the sum of vals accumulated by each position in hist
cdf = np.array([sum(hist[:i+1]) for i in range(len(hist))])
# determine the normalization values for each unit of the cdf
sk = np.uint8(255 * cdf)
# normalize each position in the output image
height, width = img.shape
normalized = np.zeros_like(img)
for i in range(0, height):
for j in range(0, width):
normalized[i, j] = sk[img[i, j]]
return normalized.astype(int)
def earth_movers_distance(path_a, path_b):
'''
Measure the Earth Mover's distance between two images
#args:
{str} path_a: the path to an image file
{str} path_b: the path to an image file
#returns:
TODO
'''
img_a = get_img(path_a, norm_exposure=True)
img_b = get_img(path_b, norm_exposure=True)
hist_a = get_histogram(img_a)
hist_b = get_histogram(img_b)
return wasserstein_distance(hist_a, hist_b)
def structural_sim(path_a, path_b):
'''
Measure the structural similarity between two images
#args:
{str} path_a: the path to an image file
{str} path_b: the path to an image file
#returns:
{float} a float {-1:1} that measures structural similarity
between the input images
'''
img_a = get_img(path_a)
img_b = get_img(path_b)
sim, diff = compare_ssim(img_a, img_b, full=True) # if 1 they are similar
return sim
def pixel_sim(path_a, path_b):
'''
Measure the pixel-level similarity between two images
#args:
{str} path_a: the path to an image file
{str} path_b: the path to an image file
#returns:
{float} a float {-1:1} that measures structural similarity
between the input images
'''
img_a = get_img(path_a, norm_exposure=True)
img_b = get_img(path_b, norm_exposure=True)
return np.sum(np.absolute(img_a - img_b)) / float(height*width) / 255
def sift_sim(path_a, path_b):
'''
Use SIFT features to measure image similarity
#args:
{str} path_a: the path to an image file
{str} path_b: the path to an image file
#returns:
TODO
'''
# initialize the sift feature detector
orb = cv2.ORB()
# get the images
img_a = cv2.imread(path_a, 0)
img_b = cv2.imread(path_b, 0)
# find the keypoints and descriptors with SIFT
kp_a, desc_a = orb.detectAndCompute(img_a, None)
kp_b, desc_b = orb.detectAndCompute(img_b, None)
# initialize the bruteforce matcher
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
# match.distance is a float between {0:100} - lower means more similar
matches = bf.match(desc_a, desc_b)
similar_regions = [i for i in matches if i.distance < 70]
if len(matches) == 0:
return 0
return len(similar_regions) / float(len(matches))
if __name__ == '__main__':
img_a = 'sample/image1.png'
img_b = 'sample/image2.png' #sample/087_GR_ALL_RS_036.png
# get the similarity values
structural_sim = structural_sim(img_a, img_b) # 1 is perfect structural similarity. A value of -1 indicates no structural similarity
pixel_sim = pixel_sim(img_a, img_b) # 0 is similar
sift_sim = sift_sim(img_a, img_b) # 1 is similar
emd = earth_movers_distance(img_a, img_b) # 0.0 is similar
print(structural_sim, pixel_sim, sift_sim, emd)
I am expecting the results in percentages such as 75%, 88%, 90%.
[EDIT] Consider image1 from https://imgur.com/Ys2xXJu
and consider image2 from https://imgur.com/881sz0V

How to remove the noise in the given image so that the ocr output be perfect?

I have done otsu thresholding on this bengali text image and use tesseract to OCR but the output is very bad. What preprocessing should I apply to remove the noise? I want to deskew the image as well, as it has slight skewed.
My code is given below
import tesserocr
from PIL import Image
import cv2
import codecs
image = cv2.imread("crop2.bmp", 0)
(thresh, bw_img) = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
img = Image.fromarray(bw_img)
text = tesserocr.image_to_text(img, lang='ben')
file = codecs.open("output_text", "w", "utf-8")
file.write(text)
file.close()
You can remove the noises by removing small connected components that might improve the accuracy. You would also need to get optimum value for noisy components threshold value.
import cv2
import numpy as np
img = cv2.imread(r'D:\Image\st5.png',0)
ret, bw = cv2.threshold(img, 128,255,cv2.THRESH_BINARY_INV)
connectivity = 4
nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(bw, connectivity, cv2.CV_32S)
sizes = stats[1:, -1]; nb_components = nb_components - 1
min_size = 50 #threshhold value for small noisy components
img2 = np.zeros((output.shape), np.uint8)
for i in range(0, nb_components):
if sizes[i] >= min_size:
img2[output == i + 1] = 255
res = cv2.bitwise_not(img2)
Denoised image:

Difficult time trying to do shape recognition for 3D objects

I am trying to make a shape recognition classifier in which if you give an individual picture of an object (from a scene), it would be able to classify (after machine learning) the shape of an object (cylinder, cube, sphere, etc).
Original scene:
Individual objects it will classify:
I attempted to do this using cv2.approxPolyDB with an attempt to classify a cylinder. However, either my implementation isn't good or this wasn't a good choice of an algorithm to choose in the first place, the objects in the shape of cylinders were assigned a approxPolyDB value of 3 or 4.
Perhaps I can threshold and, in general, if given a value of 3 or 4, assume the object is a cylinder, but I feel like it's not the most reliable method for 3D shape classification. I feel like there is a better way to implement this and a better method as opposed to just hardcoding values. I feel like that with this method, it can easily confuse a cylinder with a cube.
Is there any way I can improve my 3D shape recognition program?
Code:
import cv2
import numpy as np
from pyimagesearch import imutils
from PIL import Image
from time import time
def invert_img(img):
img = (255-img)
return img
def threshold(im):
imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
imgray = cv2.medianBlur(imgray,9)
imgray = cv2.Canny(imgray,75,200)
return imgray
def view_all_contours(im, size_min, size_max):
main = np.array([[]])
cnt_target = im.copy()
for c in cnts:
epsilon = 0.1*cv2.arcLength(c,True)
approx = cv2.approxPolyDP(c,epsilon,True)
area = cv2.contourArea(c)
print 'area: ', area
test = im.copy()
# To weed out contours that are too small or big
if area > size_min and area < size_max:
print c[0,0]
print 'approx: ', len(approx)
max_pos = c.max(axis=0)
max_x = max_pos[0,0]
max_y = max_pos[0,1]
min_pos = c.min(axis=0)
min_x = min_pos[0,0]
min_y = min_pos[0,1]
# Load each contour onto image
cv2.drawContours(cnt_target, c, -1,(0,0,255),2)
print 'Found object'
frame_f = test[min_y:max_y , min_x:max_x]
main = np.append(main, approx[None,:][None,:])
thresh = frame_f.copy()
thresh = threshold(thresh)
contours_small, hierarchy = cv2.findContours(thresh.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cnts_small = sorted(contours_small, key = cv2.contourArea, reverse = True)
cv2.drawContours(frame_f, cnts_small, -1,(0,0,255),2)
cv2.imshow('Thresh', thresh)
cv2.imshow('Show Ya', frame_f)
cv2.waitKey(0)
# Uncomment in order to show all rectangles in image
print '---------------------------------------------'
#cv2.drawContours(cnt_target, cnts, -1,(0,255,0),2)
print main.shape
print main
return cnt_target
time_1 = time()
roi = cv2.imread('images/beach_trash_3.jpg')
hsv = cv2.cvtColor(roi,cv2.COLOR_BGR2HSV)
target = cv2.imread('images/beach_trash_3.jpg')
target = imutils.resize(target, height = 400)
hsvt = cv2.cvtColor(target,cv2.COLOR_BGR2HSV)
img_height = target.shape[0]
img_width = target.shape[1]
# calculating object histogram
roihist = cv2.calcHist([hsv],[0, 1], None, [180, 256], [0, 180, 0, 256] )
# normalize histogram and apply backprojection
cv2.normalize(roihist,roihist,0,255,cv2.NORM_MINMAX)
dst = cv2.calcBackProject([hsvt],[0,1],roihist,[0,180,0,256],1)
# Now convolute with circular disc
disc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
cv2.filter2D(dst,-1,disc,dst)
# threshold and binary AND
ret,thresh = cv2.threshold(dst,50,255,0)
thresh_one = thresh.copy()
thresh = cv2.merge((thresh,thresh,thresh))
res = cv2.bitwise_and(target,thresh)
# Implementing morphological erosion & dilation
kernel = np.ones((9,9),np.uint8) # (6,6) to get more contours (9,9) to reduce noise
thresh_one = cv2.erode(thresh_one, kernel, iterations = 3)
thresh_one = cv2.dilate(thresh_one, kernel, iterations=2)
# Invert the image
thresh_one = invert_img(thresh_one)
# To show prev img
#res = np.vstack((target,thresh,res))
#cv2.imwrite('res.jpg',res)
#cv2.waitKey(0)
#cv2.imshow('Before contours', thresh_one)
cnt_target = target.copy()
cnt_full = target.copy()
# Code to draw the contours
contours, hierarchy = cv2.findContours(thresh_one.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(contours, key = cv2.contourArea, reverse = True)
print time() - time_1
size_min = 200
size_max = 5000
cnt_target = view_all_contours(target, size_min, size_max)
cv2.drawContours(cnt_full, cnts, -1,(0,0,255),2)
res = imutils.resize(thresh_one, height = 700)
cv2.imshow('Original image', target)
cv2.imshow('Preprocessed', thresh_one)
cv2.imshow('All contours', cnt_full)
cv2.imshow('Filtered contours', cnt_target)
cv2.waitKey(0)

OpenCV: Lucas Kanade applied within certain area (to detect facial features)

I am trying to preform face tracking with the Lucas Kanade algorithm with Haar Cascade Classification. The Lucas Kanade is successful and can track the user, but unfortunately, some of the good features to detect points are wasted on corners in the background. I wish to use Haar Cascade's ability to detect the fact to get coordinates of detected face and apply Lucas Kanade to only within that restricted area.
Basically, I want to use Haar Cascade to detect fact, get x, y, w, and h values, and use those coordinates to apply Lucas Kanade within that restricted area (so that none are wasted on assigning good features to the background and only facial features are detected)
The line of code that is doing the Lucas Kanade algorithm is this code:
p0 = cv2.goodFeaturesToTrack(old_gray, mask = None, **feature_params)
How do I do that?
Code:
from matplotlib import pyplot as plt
import numpy as np
import cv2
rectangle_x = 0
face_classifier = cv2.CascadeClassifier('haarcascades/haarcascade_frontalface_default.xml')
cap = cv2.VideoCapture(0)
# params for ShiTomasi corner detection
feature_params = dict( maxCorners = 200,
qualityLevel = 0.01,
minDistance = 10,
blockSize = 7 )
# Parameters for lucas kanade optical flow
lk_params = dict( winSize = (15,15),
maxLevel = 2,
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
# Create some random colors
color = np.random.randint(0,255,(100,3))
# Take first frame and find corners in it
ret, old_frame = cap.read()
cv2.imshow('Old_Frame', old_frame)
cv2.waitKey(0)
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
restart = True
face = face_classifier.detectMultiScale(old_gray, 1.2, 4)
if len(face) == 0:
print "This is empty"
for (x,y,w,h) in face:
focused_face = old_frame[y: y+h, x: x+w]
cv2.imshow('Old_Frame', old_frame)
face_gray = cv2.cvtColor(old_frame,cv2.COLOR_BGR2GRAY)
gray = cv2.cvtColor(focused_face,cv2.COLOR_BGR2GRAY)
corners_t = cv2.goodFeaturesToTrack(gray, mask = None, **feature_params)
corners = np.int0(corners_t)
for i in corners:
ix,iy = i.ravel()
cv2.circle(focused_face,(ix,iy),3,255,-1)
cv2.circle(old_frame,(x+ix,y+iy),3,255,-1)
print ix, " ", iy
plt.imshow(old_frame),plt.show()
##########
#############################
p0 = cv2.goodFeaturesToTrack(old_gray, mask = None, **feature_params)
#############################
# Create a mask image for drawing purposes
mask = np.zeros_like(old_frame)
print "X: ", x
print "Y: ", y
while(1):
ret,frame = cap.read()
frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# calculate optical flow
p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)
# Select good points
good_new = p1[st==1]
good_old = p0[st==1]
# draw the circles
for i,(new,old) in enumerate(zip(good_new,good_old)):
a,b = new.ravel()
c,d = old.ravel()
cv2.circle(frame,(a, b),5,color[i].tolist(),-1)
if i == 99:
break
cv2.imshow('frame',frame)
k = cv2.waitKey(30) & 0xff
if k == 27:
break
# Now update the previous frame and previous points
old_gray = frame_gray.copy()
p0 = good_new.reshape(-1,1,2)
cv2.destroyAllWindows()
cap.release()
Here is the code snippet:
p0 = np.array([[[x,y]], [[x0,y0]]], np.float32)
just replace p0 in original code and and assign x,x0... with your desired points
- make sure its a 2d array
- and the type is float 32 for single precision

Background subtraction in opencv2

I am trying to detect foreground motion using opencv2 by removing static (mostly) BG elements. The method I am using is based on taking the mean of a series of images - representing the background. Then calculating one Standard deviation above and below that mean. Using that as a window to detect foreground motion.
This mechanism reportedly works well for moderately noisy environments like waving trees in the BG.
The desired output is a mask that can be used in a subsequent operation so as to minimise further processing. Specifically I am going to use optical flow detection within that region.
cv2 has made this much easier and the code is much simpler to read and understand. Thanks cv2 and numpy.
But I am having difficulty doing the correct FG detection.
Ideally I also want to erode/dilate the BG mean so as to eleminate 1 pixel noise.
The code is all togethr so you have a number of frames at the start (BGsample) to gather the BG data before FG detection starts. the only dependencies are opencv2 (> 2.3.1 ) and numpy (which should be included in > opencv 2.3.1 )
import cv2
import numpy as np
if __name__ == '__main__':
cap = cv2.VideoCapture(0) # webcam
cv2.namedWindow("input")
cv2.namedWindow("sig2")
cv2.namedWindow("detect")
BGsample = 20 # number of frames to gather BG samples from at start of capture
success, img = cap.read()
width = cap.get(3)
height = cap.get(4)
# can use img.shape(:-1) # cut off extra channels
if success:
acc = np.zeros((height, width), np.float32) # 32 bit accumulator
sqacc = np.zeros((height, width), np.float32) # 32 bit accumulator
for i in range(20): a = cap.read() # dummy to warm up sensor
# gather BG samples
for i in range(BGsample):
success, img = cap.read()
frame = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.accumulate(frame, acc)
cv2.accumulateSquare(frame, sqacc)
#
M = acc/float(BGsample)
sqaccM = sqacc/float(BGsample)
M2 = M*M
sig2 = sqaccM-M2
# have BG samples now
# start FG detection
key = -1
while(key < 0):
success, img = cap.read()
frame = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#Ideally we create a mask for future use that is B/W for FG objects
# (using erode or dilate to remove noise)
# this isn't quite right
level = M+sig2-frame
grey = cv2.morphologyEx(level, cv2.MORPH_DILATE,
cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3)), iterations=2)
cv2.imshow("input", frame)
cv2.imshow("sig2", sig2/60)
cv2.imshow("detect", grey/20)
key = cv2.waitKey(1)
cv2.destroyAllWindows()
I don't think you need to manually compute the mean and standard deviation use cv2.meanStdDev instead. In the code below, I'm using your average background matrix computed from
M = acc/float(BGsample)
So, now we can compute the mean and standard deviation of the average background image, and finally inRange is used to pull out the range that you wanted (i.e., the mean +/- 1 standard deviation).
(mu, sigma) = cv2.meanStdDev(M)
fg = cv2.inRange(M, (mu[0] - sigma[0]), (mu[0] + sigma[0]))
# proceed with morphological clean-up here...
Hope that helps!
my best guess so far. Using detectmin, max to coerce the fp sigma into grayscale for the cv2.inRange to use.
Seems to work OK but was hoping for better... plenty of holes in valid FG data.
I suppose it would work better in rgb instead of grayscale.
Can't get noise reduction using dilate or erode to work.
Any improvements ?
import cv2
import numpy as np
if __name__ == '__main__':
cap = cv2.VideoCapture(1)
cv2.namedWindow("input")
#cv2.namedWindow("sig2")
cv2.namedWindow("detect")
BGsample = 20 # number of frames to gather BG samples from at start of capture
success, img = cap.read()
width = cap.get(3)
height = cap.get(4)
if success:
acc = np.zeros((height, width), np.float32) # 32 bit accumulator
sqacc = np.zeros((height, width), np.float32) # 32 bit accumulator
for i in range(20): a = cap.read() # dummy to warm up sensor
# gather BG samples
for i in range(BGsample):
success, img = cap.read()
frame = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.accumulate(frame, acc)
cv2.accumulateSquare(frame, sqacc)
#
M = acc/float(BGsample)
sqaccM = sqacc/float(BGsample)
M2 = M*M
sig2 = sqaccM-M2
# have BG samples now
# calculate upper and lower bounds of detection window around mean.
# coerce into 8bit image space for cv2.inRange compare
detectmin = cv2.convertScaleAbs(M-sig2)
detectmax = cv2.convertScaleAbs(M+sig2)
# start FG detection
key = -1
while(key < 0):
success, img = cap.read()
frame = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
level = cv2.inRange(frame, detectmin, detectmax)
cv2.imshow("input", frame)
#cv2.imshow("sig2", M/200)
cv2.imshow("detect", level)
key = cv2.waitKey(1)
cv2.destroyAllWindows()

Resources