I am trying to design an app similar to camscanner. For that, I have to take an image and then find the document in that. I started off with the code described here - http://opencvpython.blogspot.in/2012/06/sudoku-solver-part-2.html
I found the contours and the rectangular contour with max area should be the required document. For every contour, I am finding an approximate closed PolyDP. Of all the polyDP of size 4, the one with max area should be the required document. However, this method is not working.
The input image for the process is this
I tried to print the contour with max area and this resulted in this (Contour inside letter 'C')
img = cv2.imread('bounce.jpeg')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray,(5,5),0)
thresh = cv2.adaptiveThreshold(gray,255,1,1,11,2)
_, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
def biggestRectangle(contours):
biggest = None
max_area = 0
indexReturn = -1
for index in range(len(contours)):
i = contours[index]
area = cv2.contourArea(i)
if area > 100:
peri = cv2.arcLength(i,True)
approx = cv2.approxPolyDP(i,0.1*peri,True)
if area > max_area: #and len(approx)==4:
biggest = approx
max_area = area
indexReturn = index
return indexReturn
indexReturn = biggestRectangle(contours)
cv2.imwrite('hola.png',cv2.drawContours(img, contours, indexReturn, (0,255,0)))
What is going wrong in this? Is there any other method by which I can capture the document in this picture?
Try this :
output image
import cv2
import numpy as np
img = cv2.imread('bounce.jpg')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
invGamma = 1.0 / 0.3
table = np.array([((i / 255.0) ** invGamma) * 255
for i in np.arange(0, 256)]).astype("uint8")
# apply gamma correction using the lookup table
gray = cv2.LUT(gray, table)
ret,thresh1 = cv2.threshold(gray,80,255,cv2.THRESH_BINARY)
#thresh = cv2.adaptiveThreshold(gray,255,1,1,11,2)
_, contours, hierarchy = cv2.findContours(thresh1, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
def biggestRectangle(contours):
biggest = None
max_area = 0
indexReturn = -1
for index in range(len(contours)):
i = contours[index]
area = cv2.contourArea(i)
if area > 100:
peri = cv2.arcLength(i,True)
approx = cv2.approxPolyDP(i,0.1*peri,True)
if area > max_area: #and len(approx)==4:
biggest = approx
max_area = area
indexReturn = index
return indexReturn
indexReturn = biggestRectangle(contours)
hull = cv2.convexHull(contours[indexReturn])
cv2.imwrite('hola.png',cv2.drawContours(img, [hull], 0, (0,255,0),3))
I would do it like this:
Do preprocessing like blur / canny
Extract all lines from the image using the hough line transform (open cv doc).
Use the 4 strongest lines
Try to construct the contour of the document using the four lines
Right now I do not have an OpenCV installed so I cannot try this approach but maybe it leads you in the right directon.
I have the following problem: when detecting a while line under various lighting conditions, a mask (based on HSV) results in good performance in only one scenario (very bright or very shaded areas). As seen below.
My code is as follows, I am using HSV. The threshold for upper and lower is a constant value (+x/-x)
shadeLower1 = np.array([127,30,117] , dtype=np.uint8)
shadeUpper1 = np.array([147,51,138], dtype=np.uint8)
## SUN
sunLower2 = np.array([4,0,184], dtype=np.uint8)
sunUpper2 = np.array([104,57,255], dtype=np.uint8)
mask1 = cv2.inRange(hsv, shadeLower1, shadeUpper1)
mask2 = cv2.inRange(hsv, sunLower2, sunUpper2)
mask = cv2.max(mask1, mask2)
For instance, it will be fine in the shaded region (the white tape is perfect) and once it reaches the sunny area, the mask window is just saturated with white and I loose my white object.
Any help would be appreciated in what to do!
Shaded Area
Sunny Area
I mostly did the same thing you did for thresholding, but I used bitwise_and instead of bitwise_or (bitwise_or is the same as cv2.max). The lines are a little messy, but hopefully good enough for you to use. You might be able to clean them up more if you take the hue channel into account to exclude the red (I avoided it since white is technically all hues).
It might even be worth it to try and filter across multiple color spaces and combine the masks.
import cv2
import numpy as np
# find path and return its contour
def findPath(hsv):
# threshold on s an v channel
h,s,v = cv2.split(hsv);
mask1 = cv2.inRange(s, 0, 45);
mask2 = cv2.inRange(v, 115, 255);
mask3 = cv2.bitwise_and(mask1, mask2, mask = None);
# close
kernel = np.ones((5,5),np.uint8)
mask3 = cv2.dilate(mask3,kernel,iterations = 1);
mask3 = cv2.erode(mask3,kernel, iterations = 1);
# open
mask3 = cv2.erode(mask3,kernel,iterations = 1);
mask3 = cv2.dilate(mask3,kernel, iterations = 1);
# find contours
_, contours, _ = cv2.findContours(mask3, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE);
# find biggest contour
biggest = None;
biggest_size = -1;
for contour in contours:
area = cv2.contourArea(contour);
if area > biggest_size:
biggest = contour;
biggest_size = area;
return biggest;
# skeletonize the mask
def skeleton(mask):
# get structure
img = mask.copy();
size = np.size(img);
skel = np.zeros_like(mask);
elem = cv2.getStructuringElement(cv2.MORPH_CROSS,(3,3));
while True:
# skeleton iteration
eroded = cv2.erode(img,elem);
temp = cv2.dilate(eroded,elem);
temp = cv2.subtract(img,temp);
skel = cv2.bitwise_or(skel,temp);
# check for end condition
img = eroded.copy() ;
zeros = size - cv2.countNonZero(img);
if zeros == size:
# connect small gaps
kernel = np.ones((2,2), np.uint8);
skel = cv2.dilate(skel, kernel, iterations = 1);
# filter out little lines
_, contours, _ = cv2.findContours(skel, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE);
# filter contours by size
big_cntrs = [];
for contour in contours:
perimeter = cv2.arcLength(contour, True);
if perimeter > 50:
thin_lines = np.zeros_like(skel);
thin_lines = cv2.drawContours(thin_lines, big_cntrs, -1, 255, -1);
skel = thin_lines;
# dilate and close to connect lines
kernel = np.ones((3,3), np.uint8)
skel = cv2.dilate(skel, kernel, iterations = 5);
skel = cv2.erode(skel, kernel, iterations = 4);
# show
return skel;
# load image
imgs = [];
l1 = cv2.imread("line1.png");
l2 = cv2.imread("line2.png");
# convert
hsvs = [];
for img in imgs:
hsvs.append(cv2.cvtColor(img, cv2.COLOR_BGR2HSV));
# draw contours
masks = [];
for a in range(len(imgs)):
# get contour
contour = findPath(hsvs[a]);
# create mask
mask = np.zeros_like(hsvs[a][:,:,0]);
cv2.drawContours(mask, [contour], -1, (255), -1);
mask = cv2.medianBlur(mask, 5);
# skeleton
skelly_masks = [];
for mask in masks:
skelly = skeleton(mask.copy());
# draw on original
for a in range(len(imgs)):
imgs[a][np.where(masks[a] == 255)] = (155,0,0); # 155 to avoid blinding people
imgs[a][np.where(skelly_masks[a] == 255)] = (0,0,155);
cv2.imshow(str(a), imgs[a]);
cv2.imwrite("img" + str(a) + ".png", imgs[a]);
I am currently working on a project, where the problem statement is to detect handwritten text from a image of a particular form. As a pre-processing step I have extracted texts in the form of bounding boxes, and I have around 1500 images of texts extracted from the image form, out of which 50 of them are handwritten.
The problem is how do I now use these extracted images to train a classifier model which will classify the images as printed or handwritten text. I have no prior knowledge of Deep learning. Any help will be appreciated. I am uploading the image and the extracted images, as well as the code to extract the texts from the images.
im_ns = cv.imread('~/Image processing/IMG_20180921_111952.png')
gray = cv.cvtColor(im_ns,cv.COLOR_BGR2GRAY)
blurred_g = cv.GaussianBlur(gray,(11,11),0)
ret, th1 = cv.threshold(blurred_g,127,255,cv.THRESH_BINARY)
th2 = cv.adaptiveThreshold(blurred_g,255,cv.ADAPTIVE_THRESH_MEAN_C,cv.THRESH_BINARY,11,2)
th3 = cv.adaptiveThreshold(blurred_g,255,cv.ADAPTIVE_THRESH_GAUSSIAN_C,cv.THRESH_BINARY,11,2)
##Detecting horizontal Lines and removing them
th3_di1 = th3_di.copy()
hor = int(round(th3_di1.shape[1]/30,0))
hor_struc = cv.getStructuringElement(cv.MORPH_RECT,(hor,1))
bw_hor_er = cv.erode(th3_di1,hor_struc,iterations=1)
bw_hor_di = cv.dilate(th3_di1,hor_struc,iterations=1)
for i in range(0,bw_hor_di.shape[0]):
for j in range(0,bw_hor_di.shape[1]):
if bw_hor_di[i,j] == 0:
th3_di1[i,j] = 255
th3_di1[i,j] = th3_di1[i,j]
# perform a connected component analysis on the thresholded
# image, then initialize a mask to store only the "large"
# components
labels = measure.label(th3_di1, neighbors=4, background=255)
mask = np.zeros(th3_di1.shape, dtype="uint8")
# loop over the unique components
for lab in np.unique(labels):
# if this is the background label, ignore it
if lab == 0:
# otherwise, construct the label mask and count the
# number of pixels
labelMask = np.zeros(th3_di.shape, dtype="uint8")
labelMask[labels == lab] = 255
numPixels = cv.countNonZero(labelMask)
# if the number of pixels in the component is sufficiently
# large, then add it to our mask of "large blobs"
if numPixels > 8:
mask = cv.add(mask, labelMask)
# find the contours in the mask, then sort them from left to
# right
cnts = cv.findContours(mask.copy(), cv.RETR_EXTERNAL,
cnts = cnts[0] if imutils.is_cv2() else cnts[1]
cnts = contours.sort_contours(cnts)[0]
# loop over the contours to make rectangles for the th3 image with gassian thresholding
for (i, c) in enumerate(cnts):
# draw the bright spot on the image
(x,y,w,h) = cv.boundingRect(c)
#((cX, cY), radius) = cv.minEnclosingCircle(c)
cv.putText(th3, "",(x+w+10,y+h),0,0.3,(0,255,0))
# show the output image
cv.imshow("Image", th3)
##Extracting the bounding boxes
for (i, c) in enumerate(cnts):
# draw the bright spot on the image
idx += 1
x,y,w,h = cv.boundingRect(c)
roi = im_ns[y:y+h,x:x+w]
#((cX, cY), radius) = cv.minEnclosingCircle(c)
I would like to create a program that is able to extract lines from a graph.
For example, if a graph like this is inputted, I would just want the red line to be outputted.
Below I have tried to do this using a hough line transformation, however, I do not get very promising results.
import cv2
import numpy as np
graph_img = cv2.imread("/Users/2020shatgiskessell/Desktop/Graph1.png")
gray = cv2.cvtColor(graph_img, cv2.COLOR_BGR2GRAY)
kernel_size = 5
#grayscale image
blur_gray = cv2.GaussianBlur(gray,(kernel_size, kernel_size),0)
#Canny edge detecion
edges = cv2.Canny(blur_gray, 50, 150)
#Hough Lines Transformation
#distance resoltion of hough grid (pixels)
rho = 1
#angular resolution of hough grid (radians)
theta = np.pi/180
#minimum number of votes
threshold = 15
#play around with these
min_line_length = 25
max_line_gap = 20
#make new image
line_image = np.copy(graph_img)
#returns array of lines
lines = cv2.HoughLinesP(edges, rho, theta, threshold, np.array([]),
min_line_length, max_line_gap)
for line in lines:
for x1,y1,x2,y2 in line:
lines_edges = cv2.addWeighted(graph_img, 0.8, line_image, 1, 0)
cv2.imshow("denoised image",edges)
if cv2.waitKey(0) & 0xff == 27:
This produces the output image below, which does not accurately recognize the graph line. How might I go about doing this?
Note: For now, I am not concerned about the graph titles or any other text.
I would also like the code to work for other graph images aswell, such as:
If the graph does not have many noises around it (like your example) I would suggest to threshold your image with Otsu threshold instead of looking for edges . Then you simply search the contours, select the biggest one (graph) and draw it on a blank mask. After that you can perform a bitwise operation on image with the mask and you will get a black image with the graph. If you like the white background better, then simply change all black pixels to white. Steps are written in the example. Hope it helps a bit. Cheers!
import numpy as np
import cv2
# Read the image and create a blank mask
img = cv2.imread('graph.png')
h,w = img.shape[:2]
mask = np.zeros((h,w), np.uint8)
# Transform to gray colorspace and threshold the image
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
# Search for contours and select the biggest one and draw it on mask
_, contours, hierarchy = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
cnt = max(contours, key=cv2.contourArea)
cv2.drawContours(mask, [cnt], 0, 255, -1)
# Perform a bitwise operation
res = cv2.bitwise_and(img, img, mask=mask)
# Convert black pixels back to white
black = np.where(res==0)
res[black[0], black[1], :] = [255, 255, 255]
# Display the image
cv2.imshow('img', res)
For noisier pictures you could try this code. Note that different graphs have different noises and may not work on every graph image since the denoisiation process would be specific in every case. For different noises you can use different ways to denoise it, for example histogram equalization, eroding, blurring etc. This code works well for all 3 graphs. Steps are written in comments. Hope it helps. Cheers!
import numpy as np
import cv2
# Read the image and create a blank mask
img = cv2.imread('graph.png')
h,w = img.shape[:2]
mask = np.zeros((h,w), np.uint8)
# Transform to gray colorspace and threshold the image
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
# Perform opening on the thresholded image (erosion followed by dilation)
kernel = np.ones((2,2),np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
# Search for contours and select the biggest one and draw it on mask
_, contours, hierarchy = cv2.findContours(opening,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
cnt = max(contours, key=cv2.contourArea)
cv2.drawContours(mask, [cnt], 0, 255, -1)
# Perform a bitwise operation
res = cv2.bitwise_and(img, img, mask=mask)
# Threshold the image again
gray = cv2.cvtColor(res,cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
# Find all non white pixels
non_zero = cv2.findNonZero(thresh)
# Transform all other pixels in non_white to white
for i in range(0, len(non_zero)):
first_x = non_zero[i][0][0]
first_y = non_zero[i][0][1]
first = res[first_y, first_x]
res[first_y, first_x] = 255
# Display the image
cv2.imshow('img', res)
I'm facing some problems to get smoother contours on curves.
After an image processing I have this image.
I am trying to get smoother curves with this code:
imgWithBridgesBw = convert_to_bw(imgWithBridges)
# add later this mask upper
ellipsekernel20 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
enhanceMask = np.ones((InterholesBw.shape[0],InterholesBw.shape[1]) ,dtype="uint8") * 255
enhanceMaskColor = np.ones((InterholesBw.shape[0],InterholesBw.shape[1],3) ,dtype="uint8") * 255
# invert the Layer to get less blank pixels
imgWithBridgesBwInv = 255 - imgWithBridgesBw
imgWithBridgesBwInv_dilate = cv2.dilate(imgWithBridgesBwInv,ellipsekernel20,iterations =1)
imgWithBridgesBwInv_erode = cv2.erode(imgWithBridgesBwInv_dilate, ellipsekernel20, iterations = 1)
_ ,allCnts, hier = cv2.findContours(imgWithBridgesBwInv_dilate,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
The result is this:
As you can see at this image, which is a "zoom", I'm not reaching any success.
I am trying to make a shape recognition classifier in which if you give an individual picture of an object (from a scene), it would be able to classify (after machine learning) the shape of an object (cylinder, cube, sphere, etc).
Original scene:
Individual objects it will classify:
I attempted to do this using cv2.approxPolyDB with an attempt to classify a cylinder. However, either my implementation isn't good or this wasn't a good choice of an algorithm to choose in the first place, the objects in the shape of cylinders were assigned a approxPolyDB value of 3 or 4.
Perhaps I can threshold and, in general, if given a value of 3 or 4, assume the object is a cylinder, but I feel like it's not the most reliable method for 3D shape classification. I feel like there is a better way to implement this and a better method as opposed to just hardcoding values. I feel like that with this method, it can easily confuse a cylinder with a cube.
Is there any way I can improve my 3D shape recognition program?
import cv2
import numpy as np
from pyimagesearch import imutils
from PIL import Image
from time import time
def invert_img(img):
img = (255-img)
return img
def threshold(im):
imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
imgray = cv2.medianBlur(imgray,9)
imgray = cv2.Canny(imgray,75,200)
return imgray
def view_all_contours(im, size_min, size_max):
main = np.array([[]])
cnt_target = im.copy()
for c in cnts:
epsilon = 0.1*cv2.arcLength(c,True)
approx = cv2.approxPolyDP(c,epsilon,True)
area = cv2.contourArea(c)
print 'area: ', area
test = im.copy()
# To weed out contours that are too small or big
if area > size_min and area < size_max:
print c[0,0]
print 'approx: ', len(approx)
max_pos = c.max(axis=0)
max_x = max_pos[0,0]
max_y = max_pos[0,1]
min_pos = c.min(axis=0)
min_x = min_pos[0,0]
min_y = min_pos[0,1]
# Load each contour onto image
cv2.drawContours(cnt_target, c, -1,(0,0,255),2)
print 'Found object'
frame_f = test[min_y:max_y , min_x:max_x]
main = np.append(main, approx[None,:][None,:])
thresh = frame_f.copy()
thresh = threshold(thresh)
contours_small, hierarchy = cv2.findContours(thresh.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cnts_small = sorted(contours_small, key = cv2.contourArea, reverse = True)
cv2.drawContours(frame_f, cnts_small, -1,(0,0,255),2)
cv2.imshow('Thresh', thresh)
cv2.imshow('Show Ya', frame_f)
# Uncomment in order to show all rectangles in image
print '---------------------------------------------'
#cv2.drawContours(cnt_target, cnts, -1,(0,255,0),2)
print main.shape
print main
return cnt_target
time_1 = time()
roi = cv2.imread('images/beach_trash_3.jpg')
hsv = cv2.cvtColor(roi,cv2.COLOR_BGR2HSV)
target = cv2.imread('images/beach_trash_3.jpg')
target = imutils.resize(target, height = 400)
hsvt = cv2.cvtColor(target,cv2.COLOR_BGR2HSV)
img_height = target.shape[0]
img_width = target.shape[1]
# calculating object histogram
roihist = cv2.calcHist([hsv],[0, 1], None, [180, 256], [0, 180, 0, 256] )
# normalize histogram and apply backprojection
dst = cv2.calcBackProject([hsvt],[0,1],roihist,[0,180,0,256],1)
# Now convolute with circular disc
disc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
# threshold and binary AND
ret,thresh = cv2.threshold(dst,50,255,0)
thresh_one = thresh.copy()
thresh = cv2.merge((thresh,thresh,thresh))
res = cv2.bitwise_and(target,thresh)
# Implementing morphological erosion & dilation
kernel = np.ones((9,9),np.uint8) # (6,6) to get more contours (9,9) to reduce noise
thresh_one = cv2.erode(thresh_one, kernel, iterations = 3)
thresh_one = cv2.dilate(thresh_one, kernel, iterations=2)
# Invert the image
thresh_one = invert_img(thresh_one)
# To show prev img
#res = np.vstack((target,thresh,res))
#cv2.imshow('Before contours', thresh_one)
cnt_target = target.copy()
cnt_full = target.copy()
# Code to draw the contours
contours, hierarchy = cv2.findContours(thresh_one.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(contours, key = cv2.contourArea, reverse = True)
print time() - time_1
size_min = 200
size_max = 5000
cnt_target = view_all_contours(target, size_min, size_max)
cv2.drawContours(cnt_full, cnts, -1,(0,0,255),2)
res = imutils.resize(thresh_one, height = 700)
cv2.imshow('Original image', target)
cv2.imshow('Preprocessed', thresh_one)
cv2.imshow('All contours', cnt_full)
cv2.imshow('Filtered contours', cnt_target)