Why cv.matchShape is NOT invariant to translation as it claims? - opencv

I have two contours to match (think of them as any arbitrary 2D closed curves). opencv claims to have matchShapes function that is invariant under translation, rotation and scale. But it seems to me that this is not the case, when I add shift (10, 5) to one of the curves, the function returns a different result, let alone if I did something whackier. Why is that?
matchShape
Reproducible example:
t = np.arange(0, np.pi, 0.001)
x, y = np.cos(t), np.sin(t)
xy = np.stack([x, y], -1)
print(cv.matchShapes(xy, xy, 1, 0))
print(cv.matchShapes(xy, xy + (2, 10), 1, 0))

The objects you send to cv.matchShapes() need to be contour objects which are different to a straight up 2D numpy array. The following code converts your curves to a plot,
then to an image & the contours of the 2 curves are found.
Finally cv.matchShapes() is run.
The output: 0 for the self match & 6.637412841570267e-12 for the match with the translated curve, a pretty accurate match under translation.
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
t = np.arange(0, np.pi, 0.001)
x, y = np.cos(t), np.sin(t)
ax.plot(x, y)
x_new = x + 2
y_new = y + 10
ax.plot(x_new, y_new, 'b')
[s.set_visible(False) for s in ax.spines.values()]
[t.set_visible(False) for t in ax.get_xticklines()]
[t.set_visible(False) for t in ax.get_yticklines()]
ax.axis('off')
plt.savefig('xy.jpg')
xy_img = cv.imread('xy.jpg', cv.IMREAD_COLOR)
xy_cpy = cv.cvtColor(xy_img, cv.COLOR_BGR2GRAY)
(threshold, bw) = cv.threshold(xy_cpy, 127, 255, cv.THRESH_BINARY)
contours, hier = cv.findContours(bw, cv.RETR_LIST, cv.CHAIN_APPROX_NONE)
contours = contours[0:-1] # remove box surounding whole image
print(cv.matchShapes(contours[0], contours[0], method=cv.CONTOURS_MATCH_I1, parameter=0))
print(cv.matchShapes(contours[0], contours[1], method=cv.CONTOURS_MATCH_I1, parameter=0))
cv.namedWindow("xy")
cv.drawContours(xy_img, contours, -1, (0, 255, 0), 3)
cv.imshow("xy", xy_img)
cv.waitKey()

Related

Camera calibration with OpenCV-python for autonomous car doesn't work well

Problems
doesn't work well
When I use my code with my Image, it doesn't work well.
I only edited 'wc' and 'hc' from OpenCV DOC
import glob
import cv2 as cv
import numpy as np
wc = 7
hc = 4
# termination criteria
criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
objp = np.zeros((wc * hc, 3), np.float32)
objp[:, :2] = np.mgrid[0:hc, 0:wc].T.reshape(-1, 2)
# Arrays to store object points and image points from all the images.
objpoints = [] # 3d point in real world space
imgpoints = [] # 2d points in image plane.
images = glob. Glob('1.jpg')
for fname in images:
img = cv.imread(fname)
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# Find the chess board corners
ret, corners = cv.findChessboardCorners(gray, (hc, wc), None)
# If found, add object points, image points (after refining them)
print(ret, wc, hc)
if True:
objpoints.append(objp)
corners2 = cv.cornerSubPix(gray, corners, (20, 20), (-1, -1),
criteria) # image, corners, winSize, zeroZone, criteria
imgpoints.append(corners2)
# Draw and display the corners
cv.drawChessboardCorners(img, (hc, wc), corners2, ret)
cv.imwrite('ChessboardCorners.png', img)
cv.waitKey(0)
ret, mtx, dist, rvecs, tvecs = cv.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None)
img = cv.imread('1.jpg')
print(img.shape[:2])
h, w = img.shape[:2]
newcameramtx, roi = cv.getOptimalNewCameraMatrix(mtx, dist, (w, h), 1, (w, h))
# undistort
dst = cv.undistort(img, mtx, dist, None, newcameramtx)
# crop the image
x, y, w, h = roi
dst = dst[y:y + h, x:x + w]
cv.imwrite('calibresult.png', dst)
cv.waitKey(0)
mean_error = 0
for i in range(len(objpoints)):
imgpoints2, _ = cv.projectPoints(objpoints[i], rvecs[i], tvecs[i], mtx, dist)
error = cv.norm(imgpoints[i], imgpoints2, cv.NORM_L2) / len(imgpoints2)
mean_error += error
print("total error: {}".format(mean_error / len(objpoints)))
print("\n\n", fname, "claer")
cv.destroyAllWindows()
exit(0)
original image - not well
ChessboardCorners - (I'm not sure that this is not well)
calibresult image - not well
works well with other images
But, when I use my code with the Image which was in the example in OpenCV DOC, it works well.
import glob
import cv2 as cv
import numpy as np
wc = 6
hc = 7
# termination criteria
criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
objp = np.zeros((wc * hc, 3), np.float32)
objp[:, :2] = np.mgrid[0:hc, 0:wc].T.reshape(-1, 2)
# Arrays to store object points and image points from all the images.
objpoints = [] # 3d point in real world space
imgpoints = [] # 2d points in image plane.
images = glob. Glob('img.png')
for fname in images:
img = cv.imread(fname)
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# Find the chess board corners
ret, corners = cv.findChessboardCorners(gray, (hc, wc), None)
# If found, add object points, image points (after refining them)
print(ret, wc, hc)
if True:
objpoints.append(objp)
corners2 = cv.cornerSubPix(gray, corners, (20, 20), (-1, -1),
criteria) # image, corners, winSize, zeroZone, criteria
imgpoints.append(corners2)
# Draw and display the corners
cv.drawChessboardCorners(img, (hc, wc), corners2, ret)
cv.imwrite('ChessboardCorners.png', img)
cv.waitKey(0)
ret, mtx, dist, rvecs, tvecs = cv.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None)
img = cv.imread('img.png')
print(img.shape[:2])
h, w = img.shape[:2]
newcameramtx, roi = cv.getOptimalNewCameraMatrix(mtx, dist, (w, h), 1, (w, h))
# undistort
dst = cv.undistort(img, mtx, dist, None, newcameramtx)
# crop the image
x, y, w, h = roi
dst = dst[y:y + h, x:x + w]
cv.imwrite('calibresult.png', dst)
cv.waitKey(0)
mean_error = 0
for i in range(len(objpoints)):
imgpoints2, _ = cv.projectPoints(objpoints[i], rvecs[i], tvecs[i], mtx, dist)
error = cv.norm(imgpoints[i], imgpoints2, cv.NORM_L2) / len(imgpoints2)
mean_error += error
print("total error: {}".format(mean_error / len(objpoints)))
print("\n\n", fname, "claer")
cv.destroyAllWindows()
exit(0)
I removed the images because "Your question appears to be spam."
Please see the images on OpenCV DOC
Please, give me the solution to this problem.
Do I need to modify the parameters, or what should I do?
Is my chessboard wrong?
Below is what I have tried.
First, I tried to find correct numbers of 'wc' and 'hc'
I used this code to find.
import glob
import cv2 as cv
import numpy as np
for i in range(3, 50):
for j in range(i + 1, 50): # I used this code becuase I found that the order of the variables does not matter last time.
wc = i
hc = j
# termination criteria
criteria = (cv.TERM_CRITERIA_EPS + cv.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
objp = np.zeros((wc * hc, 3), np.float32)
objp[:, :2] = np.mgrid[0:hc, 0:wc].T.reshape(-1, 2)
# Arrays to store object points and image points from all the images.
objpoints = [] # 3d point in real world space
imgpoints = [] # 2d points in image plane.
images = glob.glob('1.jpg')
for fname in images:
img = cv.imread(fname)
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
# Find the chess board corners
ret, corners = cv.findChessboardCorners(gray, (hc, wc), None)
# If found, add object points, image points (after refining them)
print(ret, wc, hc)
And the result here:
False 3 4
False 3 5
False 3 6
···
False 4 5
False 4 6
**True 4 7**
False 4 8
False 4 9
···
False 47 48
False 47 49
False 48 49
Process finished with exit code 0
I also found that the Image which was in the example in OpenCV DOC has another 'wc' and 'hc', (4, 4).
And result here:
ChessboardCorners by (4, 4
calibresult by (4, 4)
So, I'm expecting that the 'wc'and 'hc' of my Image (4, 7) might be small.
Should I increase the max and do a brute-force search again?
I can't tell for sure, but it looks like you are only using a single input image for calibration. If that's true, try increasing to at least 5 images (more would be better) and see if that helps. The images should be at different angles and distances.
I notice that your lens has significant distortion. A few suggestions for getting that to work well:
Use the rational model for distortion - the basic kappa 1, kappa 2 model won't do well.
Your data set will need to include image points from all parts of the image, including near the edges and corners of the image. This can be difficult/impossible to achieve using the normal chessboard pattern (because the entire pattern must be visible in the image) - I suggest using the ChAruco calibration pattern/functions. This uses a modified chessboard pattern that includes Aruco markers embedded in the white squares, which allows for partial patterns to be used.
Note that the wc and hc parameters you are searching for are used to describe the chessboard pattern width and height. This should be known to you ahead of time and you shouldn't need to search for it.

Adjusting pytesseract parameters

Note: I am migrating this question from Data Science Stack Exchange, where it received little exposure.
I am trying to implement an OCR solution to identify the numbers read from the picture of a screen.
I am adapting this pyimagesearch tutorial to my problem.
Because I am dealing with a dark background, I first invert the image, before converting it to grayscale and thresholding it:
inverted_cropped_image = cv2.bitwise_not(cropped_image)
gray = get_grayscale(inverted_cropped_image)
thresholded_image = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY)[1]
Then I call pytesseract's image_to_data function to output a dictionary containing the different text regions and their confidence intervals:
from pytesseract import Output
results = pytesseract.image_to_data(thresholded_image, output_type=Output.DICT)
Finally I iterate over results and plot them when their confidence exceeds a user defined threshold (70%). What bothers me, is that my script identifies everything in the image except the number that I would like to recognize (1227.938).
My first guess is that the image_to_data parameters are not set properly.
Checking this website, I selected a page segmentation mode (psm) of 11 (sparse text) and tried whitelisting numbers only (tessedit_char_whitelist=0123456789m.'):
results = pytesseract.image_to_data(thresholded_image, config='--psm 11 --oem 3 -c tessedit_char_whitelist=0123456789m.', output_type=Output.DICT)
Alas, this is even worse, and the script now identifies nothing at all!
Do you have any suggestion? Am I missing something obvious here?
EDIT #1:
At Ann Zen's request, here's the code used to obtain the first image:
import imutils
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pytesseract
from pytesseract import Output
def get_grayscale(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
filename = "IMAGE.JPG"
cropped_image = cv2.imread(filename)
inverted_cropped_image = cv2.bitwise_not(cropped_image)
gray = get_grayscale(inverted_cropped_image)
thresholded_image = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY)[1]
results = pytesseract.image_to_data(thresholded_image, config='--psm 11 --oem 3 -c tessedit_char_whitelist=0123456789m.', output_type=Output.DICT)
color = (255, 255, 255)
for i in range(0, len(results["text"])):
x = results["left"][i]
y = results["top"][i]
w = results["width"][i]
h = results["height"][i]
text = results["text"][i]
conf = int(results["conf"][i])
print("Confidence: {}".format(conf))
if conf > 70:
print("Confidence: {}".format(conf))
print("Text: {}".format(text))
print("")
text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
cv2.rectangle(cropped_image, (x, y), (x + w, y + h), color, 2)
cv2.putText(cropped_image, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX,1.2, color, 3)
cv2.imshow('Image', cropped_image)
cv2.waitKey(0)
EDIT #2:
Rarely have I spent reputation points so well! All three replies posted so far helped me refine my algorithm.
First, I wrote a Tkinter program allowing me to manually crop the image around the number of interest (modifying the one found in this SO post)
Then I used Ann Zen's idea of narrowing down the search area around the fractional part. I am using her nifty process function to prepare my grayscale image for contour extraction: contours, _ = cv2.findContours(process(img_gray), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE). I am using RETR_EXTERNAL to avoid dealing with overlapping bounding rectangles.
I then sorted my contours from left to right. Bounding rectangles exceeding a user-defined threshold are associated with the integral part (white rectangles); otherwise they are associated with the fractional part (black rectangles).
I then extracted the characters using Esraa's approach i.e. applying a Gaussian blur prior to calling Tesseract. I used a much larger kernel (15x15 vs 3x3) to achieve this.
I am not out of the woods yet, but hopefully I will get better results by using Ahx's adaptive thresholding.
The Concept
As you have probably heard, pytesseract is not good at detecting text of different sizes on the same line as one piece of text. In your case, you want to detect the 1227.938, where the 1227 is much larger than the .938.
One way to go about solving this is to have the program estimate where the .938 is, and enlarge that part of the image. After that, pytesseract will have no problem in returning the text.
The Code
import cv2
import numpy as np
import pytesseract
def process(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(img_gray, 200, 255, cv2.THRESH_BINARY)
img_canny = cv2.Canny(thresh, 100, 100)
kernel = np.ones((3, 3))
img_dilate = cv2.dilate(img_canny, kernel, iterations=2)
return cv2.erode(img_dilate, kernel, iterations=2)
img = cv2.imread("image.png")
img_copy = img.copy()
hh = 50
contours, _ = cv2.findContours(process(img), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
for cnt in contours:
if 20 * hh < cv2.contourArea(cnt) < 30 * hh:
x, y, w, h = cv2.boundingRect(cnt)
ww = int(hh / h * w)
src_seg = img[y: y + h, x: x + w]
dst_seg = img_copy[y: y + hh, x: x + ww]
h_seg, w_seg = dst_seg.shape[:2]
dst_seg[:] = cv2.resize(src_seg, (ww, hh))[:h_seg, :w_seg]
gray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY)
results = pytesseract.image_to_data(thresh)
for b in map(str.split, results.splitlines()[1:]):
if len(b) == 12:
x, y, w, h = map(int, b[6: 10])
cv2.putText(img, b[11], (x, y + h + 15), cv2.FONT_HERSHEY_COMPLEX, 0.6, 0)
cv2.imshow("Result", img)
cv2.waitKey(0)
The Output
Here is the input image:
And here is the output image:
As you have said in your post, the only part you need the the decimal 1227.938. If you want to filter out the rest of the detected text, you can try tweaking some parameters. For example, replacing the 180 from _, thresh = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY) with 230 will result in the output image:
The Explanation
Import the necessary libraries:
import cv2
import numpy as np
import pytesseract
Define a function, process(), that will take in an image array, and return a binary image array that is the processed version of the image that will allow proper contour detection:
def process(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(img_gray, 200, 255, cv2.THRESH_BINARY)
img_canny = cv2.Canny(thresh, 100, 100)
kernel = np.ones((3, 3))
img_dilate = cv2.dilate(img_canny, kernel, iterations=2)
return cv2.erode(img_dilate, kernel, iterations=2)
I'm sure that you don't have to do this, but due to a problem in my environment, I have to add pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' before I can call the pytesseract.image_to_data() method, or it throws an error:
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
Read in the original image, make a copy of it, and define the rough height of the large part of the decimal:
img = cv2.imread("image.png")
img_copy = img.copy()
hh = 50
Detect the contours of the processed version of the image, and add a filter that roughly filters out the contours so that the small text remains:
contours, _ = cv2.findContours(process(img), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
for cnt in contours:
if 20 * hh < cv2.contourArea(cnt) < 30 * hh:
Define the bounding box of each contour that didn't get filtered out, and use the properties to enlarge those parts of the image to the height defined for the large text (making sure to also scale the width accordingly):
x, y, w, h = cv2.boundingRect(cnt)
ww = int(hh / h * w)
src_seg = img[y: y + h, x: x + w]
dst_seg = img_copy[y: y + hh, x: x + ww]
h_seg, w_seg = dst_seg.shape[:2]
dst_seg[:] = cv2.resize(src_seg, (ww, hh))[:h_seg, :w_seg]
Finally, we can use the pytesseract.image_to_data() method to detect the text. Of course, we'll need to threshold the image again:
gray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY)
results = pytesseract.image_to_data(thresh)
for b in map(str.split, results.splitlines()[1:]):
if len(b) == 12:
x, y, w, h = map(int, b[6: 10])
cv2.putText(img, b[11], (x, y + h + 15), cv2.FONT_HERSHEY_COMPLEX, 0.6, 0)
cv2.imshow("Result", img)
cv2.waitKey(0)
I have been working with Tesseract for quite some time, so let me clarify something for you. Tesseract is extremely helpful if you're trying to recognize text in documents more than any other computer vision projects. It usually needs a binarized image to get a good output. Therefore, you will always need some image pre-processing.
However, after several trials in the past with all page segmentation modes, I realized that it fails when font size differs on the same line without having a space. Sometimes PSM 6 is helpful if the difference is low, but in your condition, you may try an alternative. If you don't care about the decimals, you may try the following solution:
img = cv2.imread(r'E:\Downloads\Iwzrg.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_blur = cv2.GaussianBlur(gray, (3,3),0)
_,thresh = cv2.threshold(img_blur,200,255,cv2.THRESH_BINARY_INV)
# If using a fixed camera
new_img = thresh[0:100, 80:320]
text = pytesseract.image_to_string(new_img, lang='eng', config='--psm 6 --oem 3 -c tessedit_char_whitelist=0123456789')
OUTPUT: 1227
I would like to recommend applying another image processing method.
Because I am dealing with a dark background, I first invert the image, before converting it to grayscale and thresholding it:
You applied global thresholding and couldn't achieve the desired result.
Then you can apply either adaptive-thresholding or inRange
For the given image, if we apply the inRange threshold:
To be able to recognize the image as accurately as possible we can add a border to the top of the image and resize the image (Optional)
In the OCR section, check if the detected region contains a digit
if text.isdigit():
Then display on the image:
The result is nearly the desired value. Now you can try with the other suggested methods to find the exact value.
The problem is .938 recognized as 235, maybe resizing using different values might improve the result.
Code:
from cv2 import imread, cvtColor, COLOR_BGR2HSV as HSV, inRange, getStructuringElement, resize
from cv2 import imshow, waitKey, MORPH_RECT, dilate, bitwise_and, rectangle, putText
from cv2 import copyMakeBorder as addBorder, BORDER_CONSTANT as CONSTANT, FONT_HERSHEY_SIMPLEX
from numpy import array
from pytesseract import image_to_data, Output
bgr = imread("Iwzrg.png")
resized = resize(bgr, (800, 600), fx=0.75, fy=0.75)
bordered = addBorder(resized, 200, 0, 0, 0, CONSTANT, value=0)
hsv = cvtColor(bordered, HSV)
mask = inRange(hsv, array([0, 0, 250]), array([179, 255, 255]))
kernel = getStructuringElement(MORPH_RECT, (50, 30))
dilated = dilate(mask, kernel, iterations=1)
thresh = 255 - bitwise_and(dilated, mask)
data = image_to_data(thresh, output_type=Output.DICT)
for i in range(0, len(data["text"])):
x = data["left"][i]
y = data["top"][i]
w = data["width"][i]
h = data["height"][i]
text = data["text"][i]
if text.isdigit():
print("Text: {}".format(text))
print("")
text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
rectangle(thresh, (x, y), (x + w, y + h), (0, 255, 0), 2)
putText(thresh, text, (x, y - 10), FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3)
imshow("", thresh)
waitKey(0)

OpenCV Pupil reflection/glare removal

I am trying to track the pupil, using OpenCV assuming the fact pupil is always black, however the biggest challenge I am going through is the reflection on the pupil, is there a way I can change the color of the glare with the color of the pupil that is visible around it?
Please find the code block below
from imutils import face_utils
import numpy as np
import argparse
import imutils
import dlib
import cv2
from matplotlib import pyplot as plt
import sys
import os
import time
import math
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--shape-predictor", required=True,
help="path to facial landmark predictor")
ap.add_argument("-i", "--image", required=True,
help="path to input image")
args = vars(ap.parse_args())
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(args["shape_predictor"])
image = cv2.imread(args["image"])
image = imutils.resize(image, width=1080)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
rects = detector(gray, 1)
for (i, rect) in enumerate(rects):
shape = predictor(gray, rect)
shape = face_utils.shape_to_np(shape)
for (name, (i, j)) in face_utils.FACIAL_LANDMARKS_IDXS.items():
if(name == "left_eye"):
listval = shape[i:j].tolist()
del listval[3]
del listval[0]
(x, y, w, h) = cv2.boundingRect(np.array([listval]))
roi = image[y:y + h, x:x + w]
roi = imutils.resize(roi, width=250, inter=cv2.INTER_CUBIC)
grey = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
imgHSV = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
_, img1 = cv2.threshold(grey, 26 , 255, cv2.THRESH_BINARY)
cv2.imshow("ROI_1", roi)
img1 = cv2.erode(img1, None, iterations=2) #1
img1 = cv2.dilate(img1, None, iterations=4) #2
img1 = cv2.medianBlur(img1, 5) #3
contours, hierarchy = cv2.findContours(img1, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
drawing = np.copy(roi)
cv2.drawContours(drawing, contours, -1, (255, 0, 0), 2)
for contour in contours:
contour = cv2.convexHull(contour)
area = cv2.contourArea(contour)
circumference = cv2.arcLength(contour,True)
circularity = circumference ** 2 / (4*math.pi*area)
print(circularity)
print(area)
if area > 200 or circularity > 1.5:
continue
bounding_box = cv2.boundingRect(contour)
extend = area / (bounding_box[2] * bounding_box[3])
if extend > 0.8:
continue
m = cv2.moments(contour)
if m['m00'] != 0:
center = (int(m['m10'] / m['m00']), int(m['m01'] / m['m00']))
cv2.circle(drawing, center, 3, (0, 255, 0), -1)
try:
ellipse = cv2.fitEllipse(contour)
cv2.ellipse(drawing, box=ellipse, color=(0, 255, 0))
except:
pass
cv2.putText(drawing, str(area),(10,20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (155,255,0))
cv2.imshow("Drawing", drawing)
cv2.waitKey(0)
Glares usually have pixel values around 180. You can check the pixel values with 180 there will be a cluster of pixel with values ranging from 180 to 185 or 190. Change the pixel value of the cluster to the near by clusters pixel value.

detecting rectangle contours not accurate in UML diagrams, openCV

I want to extract some rectangles at the top from a UML sequence diagram in jpg format by using OpenCV.
The algorithm I use finds way too many rectangles that are super small and not needed.
I think the mess up is somewhere in the beginning of the code where I apply canny edge detection but I am not sure.
I want to capture only the big rectangles from the top and center.
Thanks for any help.
import cv2
import numpy as np
import imutils
image = cv2.imread("./diagrams/sd2.jpg")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 90, 150, 3)
cnts = cv2.findContours(edges, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if imutils.is_cv2() else cnts[1]
cv2.drawContours(image, cnts, -1, (0, 255, 0), 1)
def detect(c):
shape = "unidentified"
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.03 * peri, True)
if len(approx) == 4:
(x, y, w, h) = cv2.boundingRect(approx)
ar = w / float(h)
shape = "square" if ar >= 0.95 and ar <= 1.05 else "rectangle"
return shape
# loop over the contours
for c in cnts:
M = cv2.moments(c)
if M["m00"] != 0:
cX = int((M["m10"] / M["m00"]))
cY = int((M["m01"] / M["m00"]))
shape = detect(c)
c = c.astype("float")
c = c.astype("int")
if(shape == "rectangle"):
cv2.drawContours(image, [c], -1, (0, 255, 0), 2)
cv2.putText(image, shape, (cX, cY), cv2.FONT_HERSHEY_SIMPLEX,
0.5, (0, 0, 0), 2)
# show the output image
cv2.imshow("Image", image)
cv2.waitKey(0)

Calculate new coordinates of keypoints after transformation

How to get the new coordinates of the points a and b in this exemple after a transformation M (40 degrees counter-clockwise rotation) ?
import cv2
cap = cv2.VideoCapture("http://i.imgur.com/7G91d2im.jpg")
a, b = (100, 100), (200, 200)
if cap.isOpened():
ret, im = cap.read()
rows, cols = im.shape[:2]
im_keypoints = im.copy()
for point in [a, b]:
cv2.circle(im_keypoints, point, 6, (0, 0, 255), -1)
cv2.imwrite("im_keypoints.jpg", im_keypoints)
M = cv2.getRotationMatrix2D((cols / 2, rows / 2), 40, 1)
im_rotated = cv2.warpAffine(im, M, (cols, rows))
cv2.imwrite("im_rotated.jpg", im_rotated)
M is a 2 by 3 rotation matrix, so all you need to do it apply M to your points.
im_rotated_keypoints = im_rotated.copy()
for point in [a, b]:
# Convert to homogenous coordinates in np array format first so that you can pre-multiply M
rotated_point = M.dot(np.array(point + (1,)))
cv.circle(im_rotated_keypoints, (int(rotated_point[0]), int(rotated_point[1])), 6, (0, 0, 255), -1)
And you should be able to see

Resources