Binarization for large directional gradient grayscale image - opencv

I am trying to binarize grayscale images with large gradients in one direction.
Apparently, normal methods including Otsu's are not good enough to do this job.
I tried to use Sobel gradients and the Bradley adaptive thresholding, by which I get OK results, but there are some issues as indicated in the attached picture.
From the section gradient curve, we could see the gradient difference is very big at the beginning, so I split the Sobel result and do the adaptive thresholding separately, and then fuse the two results together.
My question is:
Are there any other better methods to do this job? To better understand what I do, I post my sample python code here. Thanks for your attention.
import cv2 as cv
import numpy as np
from matplotlib import pyplot as plt
def bradley_threshold(inputMat,ddepth):
nRows = inputMat.shape[0]
nCols = inputMat.shape[1]
sumMat = cv.integral(inputMat, ddepth)
S = max(nRows, nCols) / 8
T = 0.15
s2 = int(S / 2)
outputMat = np.zeros(inputMat.shape, np.uint8)
for i in range(nRows):
y1 = i - s2
y2 = i + s2
if y1 < 0:
y1 = 0
if y2 >= nRows:
y2 = nRows - 1
y1+=1
y2+=1
for j in range(nCols):
# set the SxS region
x1 = j - s2
x2 = j + s2
if x1 < 0:
x1 = 0
if x2 >= nCols:
x2 = nCols - 1;
x1 += 1
x2 += 1
count = (x2 - x1) * (y2 - y1)
sum = sumMat[y2, x2]-sumMat[y1, x2]-sumMat[y2, x1]+sumMat[y1, x1]
if inputMat[i, j] * count <= sum * (1.0 - T):
outputMat[i, j] = 0
else:
outputMat[i, j] = 255
return outputMat
if __name__ == '__main__':
gray = cv.imread('sample.png', cv.IMREAD_UNCHANGED)
image = cv.cvtColor(gray,cv.COLOR_GRAY2BGR)
blur = cv.medianBlur(gray, 3)
sobel = cv.Sobel(gray, cv.CV_32F, 1, 0, 1);
inverse = -sobel;
inverse[inverse < 0] = 0
thresh = bradley_threshold(inverse, cv.CV_32F)
splitter = 31
part1 = inverse[:,:31]
part2 = inverse[:,31:]
thresh1 = bradley_threshold(part1, cv.CV_32F)
thresh2 = bradley_threshold(part2, cv.CV_32F)
thresh_part = np.concatenate((thresh1, thresh2), axis=1)
cv.imwrite('bad_binary.png',thresh)
cv.imwrite('good_binary.png',thresh_part)
plt.imshow(thresh, cmap='gray')
plt.show()
plt.imshow(thresh_part, cmap='gray')
plt.show()
plt.plot(inverse[inverse.shape[0]-1, :])
plt.show()

Related

How to extract area of interest in the image while the boundary is not obvious

Are there ways to just extract the area of interest (the square light part in the red circle in the original image)? That means I need to get the coordinates of the edge and then masking the image outside the boundaries. I don't know how to do that. Could anyone help? Thanks!
#define horizontal and Vertical sobel kernels
Gx = np.array([[-1, 0, 1],[-2, 0, 2],[-1, 0, 1]])
Gy = np.array([[-1, -2, -1],[0, 0, 0],[1, 2, 1]])
#define kernal convolution function
# with image X and filter F
def convolve(X, F):
# height and width of the image
X_height = X.shape[0]
X_width = X.shape[3]
# height and width of the filter
F_height = F.shape[0]
F_width = F.shape[1]
H = (F_height - 1) // 2
W = (F_width - 1) // 2
#output numpy matrix with height and width
out = np.zeros((X_height, X_width))
#iterate over all the pixel of image X
for i in np.arange(H, X_height-H):
for j in np.arange(W, X_width-W):
sum = 0
#iterate over the filter
for k in np.arange(-H, H+1):
for l in np.arange(-W, W+1):
#get the corresponding value from image and filter
a = X[i+k, j+l]
w = F[H+k, W+l]
sum += (w * a)
out[i,j] = sum
#return convolution
return out
#normalizing the vectors
sob_x = convolve(image, Gx) / 8.0
sob_y = convolve(image, Gy) / 8.0
#calculate the gradient magnitude of vectors
sob_out = np.sqrt(np.power(sob_x, 2) + np.power(sob_y, 2))
# mapping values from 0 to 255
sob_out = (sob_out / np.max(sob_out)) * 255
plt.imshow(sob_out, cmap = 'gray', interpolation = 'bicubic')
plt.show()

How to do stretching Image distortion transformation

I have an assignment to transform an image
to one that has distortion effect like a dent, squeeze, stretch like this:
I have done with twirling, fisheye, bulge, but I'm having a hard time finding the right formulas for those effects.
here is my code for twirling:
import numpy as np
import cv2
import math
from google.colab.patches import cv2_imshow
img = cv2.imread("./orig_img.png")
h,w,_ = img.shape
flex_x = np.zeros((h,w),np.float32)
flex_y = np.zeros((h,w),np.float32)
scale_y= 1
scale_x = 1
alpha = -1.8
center_x, center_y = (w // 2, h // 2)
radius = h/5
for y in range(h):
delta_y = scale_y * (y - center_y)
for x in range(w):
delta_x = scale_x * (x - center_x)
distance = delta_x * delta_x + delta_y * delta_y
if distance >= (radius * radius):
flex_x[y, x] = x
flex_y[y, x] = y
else:
theta = np.arctan2(delta_x,delta_y) + alpha*(radius-math.sqrt(distance))/radius
r_sin = math.sqrt(distance)*np.cos(theta)
r_cos = math.sqrt(distance)*np.sin(theta)
flex_x[y, x] = r_cos + center_x
flex_y[y, x] = r_sin + center_y
dst = cv2.remap(img, flex_x, flex_y, cv2.INTER_LINEAR)
cv2_imshow(dst)
Anyone who has experience with this kind of transformation, please help me! I'm really thankful.

How to convert bounding box (x1, y1, x2, y2) to YOLO Style (X, Y, W, H)

I'm training a YOLO model, I have the bounding boxes in this format:-
x1, y1, x2, y2 => ex (100, 100, 200, 200)
I need to convert it to YOLO format to be something like:-
X, Y, W, H => 0.436262 0.474010 0.383663 0.178218
I already calculated the center point X, Y, the height H, and the weight W.
But still need a away to convert them to floating numbers as mentioned.
for those looking for the reverse of the question (yolo format to normal bbox format)
def yolobbox2bbox(x,y,w,h):
x1, y1 = x-w/2, y-h/2
x2, y2 = x+w/2, y+h/2
return x1, y1, x2, y2
Here's code snipet in python to convert x,y coordinates to yolo format
def convert(size, box):
dw = 1./size[0]
dh = 1./size[1]
x = (box[0] + box[1])/2.0
y = (box[2] + box[3])/2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
im=Image.open(img_path)
w= int(im.size[0])
h= int(im.size[1])
print(xmin, xmax, ymin, ymax) #define your x,y coordinates
b = (xmin, xmax, ymin, ymax)
bb = convert((w,h), b)
Check my sample program to convert from LabelMe annotation tool format to Yolo format https://github.com/ivder/LabelMeYoloConverter
There is a more straight-forward way to do those stuff with pybboxes. Install with,
pip install pybboxes
use it as below,
import pybboxes as pbx
voc_bbox = (100, 100, 200, 200)
W, H = 1000, 1000 # WxH of the image
pbx.convert_bbox(voc_bbox, from_type="voc", to_type="yolo", image_size=(W,H))
>>> (0.15, 0.15, 0.1, 0.1)
Note that, converting to YOLO format requires the image width and height for scaling.
YOLO normalises the image space to run from 0 to 1 in both x and y directions. To convert between your (x, y) coordinates and yolo (u, v) coordinates you need to transform your data as u = x / XMAX and y = y / YMAX where XMAX, YMAX are the maximum coordinates for the image array you are using.
This all depends on the image arrays being oriented the same way.
Here is a C function to perform the conversion
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <math.h>
struct yolo {
float u;
float v;
};
struct yolo
convert (unsigned int x, unsigned int y, unsigned int XMAX, unsigned int YMAX)
{
struct yolo point;
if (XMAX && YMAX && (x <= XMAX) && (y <= YMAX))
{
point.u = (float)x / (float)XMAX;
point.v = (float)y / (float)YMAX;
}
else
{
point.u = INFINITY;
point.v = INFINITY;
errno = ERANGE;
}
return point;
}/* convert */
int main()
{
struct yolo P;
P = convert (99, 201, 255, 324);
printf ("Yolo coordinate = <%f, %f>\n", P.u, P.v);
exit (EXIT_SUCCESS);
}/* main */
There are two potential solutions. First of all you have to understand if your first bounding box is in the format of Coco or Pascal_VOC. Otherwise you can't do the right math.
Here is the formatting;
Coco Format: [x_min, y_min, width, height]
Pascal_VOC Format: [x_min, y_min, x_max, y_max]
Here are some Python Code how you can do the conversion:
Converting Coco to Yolo
# Convert Coco bb to Yolo
def coco_to_yolo(x1, y1, w, h, image_w, image_h):
return [((2*x1 + w)/(2*image_w)) , ((2*y1 + h)/(2*image_h)), w/image_w, h/image_h]
Converting Pascal_voc to Yolo
# Convert Pascal_Voc bb to Yolo
def pascal_voc_to_yolo(x1, y1, x2, y2, image_w, image_h):
return [((x2 + x1)/(2*image_w)), ((y2 + y1)/(2*image_h)), (x2 - x1)/image_w, (y2 - y1)/image_h]
If need additional conversions you can check my article at Medium: https://christianbernecker.medium.com/convert-bounding-boxes-from-coco-to-pascal-voc-to-yolo-and-back-660dc6178742
For yolo format to x1,y1, x2,y2 format
def yolobbox2bbox(x,y,w,h):
x1 = int((x - w / 2) * dw)
x2 = int((x + w / 2) * dw)
y1 = int((y - h / 2) * dh)
y2 = int((y + h / 2) * dh)
if x1 < 0:
x1 = 0
if x2 > dw - 1:
x2 = dw - 1
if y1 < 0:
y1 = 0
if y2 > dh - 1:
y2 = dh - 1
return x1, y1, x2, y2
There are two things you need to do:
Divide the coordinates by the image size to normalize them to [0..1] range.
Convert (x1, y1, x2, y2) coordinates to (center_x, center_y, width, height).
If you're using PyTorch, Torchvision provides a function that you can use for the conversion:
from torch import tensor
from torchvision.ops import box_convert
image_size = tensor([608, 608])
boxes = tensor([[100, 100, 200, 200], [300, 300, 400, 400]], dtype=float)
boxes[:, :2] /= image_size
boxes[:, 2:] /= image_size
boxes = box_convert(boxes, "xyxy", "cxcywh")
Just reading the answers I am also looking for this but find this more informative to know what happening at the backend.
Form Here: Source
Assuming x/ymin and x/ymax are your bounding corners, top left and bottom right respectively. Then:
x = xmin
y = ymin
w = xmax - xmin
h = ymax - ymin
You then need to normalize these, which means give them as a proportion of the whole image, so simple divide each value by its respective size from the values above:
x = xmin / width
y = ymin / height
w = (xmax - xmin) / width
h = (ymax - ymin) / height
This assumes a top-left origin, you will have to apply a shift factor if this is not the case.
So the answer

How to read an analogue gauge using Open CV

I'm interested in trying to read an analog gauge using a Raspberry PI and Open CV. I've only really messed with face detection in opencv, so I don't even know where to begin. Any ideas, starting points?
You can detect circles with HoughCircles method and detect lines with HoughLinesP method of with opencv lib in Python. After detecting these, you can find out the value of the gauge from the line's position via trigonometry.
You can see the sample code in python. It basically does these:
Read image with imread method.
turn it in to gray with cvtColor.
Find out the circles' center x,y coordinates and radius with HoughCircles, these method has some parameter that can be tweaked.
Detect the lines with HoughLinesP method again parameters should be tweaked.
Calculate the value, considering max value, min value on the gauge and angle interval of the gauge.
Reference: https://github.com/intel-iot-devkit/python-cv-samples/tree/master/examples/analog-gauge-reader
Hope this helps.
CODE:
import os
import cv2
import numpy
def getScriptDir():
currentFile = __file__ # May be 'my_script', or './my_script' or
realPath = os.path.realpath(currentFile) # /home/user/test/my_script.py
dirPath = os.path.dirname(realPath)
return dirPath
def getUserRealGaugeDetails():
min_angle = input('Min derece: ') #the lowest possible angle
max_angle = input('Max derece ') #highest possible angle
min_value = input('Min deger: ') #usually zero
max_value = input('Max deger: ') #maximum reading of the gauge
units = input('Birim girin: ')
return min_angle,max_angle,min_value,max_value,units
def setStaticUserRealGaugeDetails():
min_angle = 5 # input('Min angle (lowest possible angle of dial) - in degrees: ') #the lowest possible angle
max_angle = 355 # input('Max angle (highest possible angle) - in degrees: ') #highest possible angle
min_value = -20 #input('Min value: ') #usually zero
max_value = 120 #input('Max value: ') #maximum reading of the gauge
units = 'b' #input('Enter units: ')
return min_angle,max_angle,min_value,max_value,units
def getImage():
dirPath = getScriptDir()
dirPath += "/images/1.jpg"
return cv2.imread(dirPath)
def distance2Points(x1, y1, x2, y2):
#print np.sqrt((x2-x1)^2+(y2-y1)^2)
return numpy.sqrt((x2 - x1)**2 + (y2 - y1)**2)
def averageCircle(circles, b):
avg_x=0
avg_y=0
avg_r=0
for i in range(b):
#optional - average for multiple circles (can happen when a gauge is at a slight angle)
avg_x = avg_x + circles[0][i][0]
avg_y = avg_y + circles[0][i][1]
avg_r = avg_r + circles[0][i][2]
avg_x = int(avg_x/(b))
avg_y = int(avg_y/(b))
avg_r = int(avg_r/(b))
return avg_x, avg_y, avg_r
#return the center and radius of the circle
def getCircleAndCustomize(image):
height, width = image.shape[:2]
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #convert to gray
# gray = cv2.GaussianBlur(gray, (5, 5), 0)
# gray = cv2.medianBlur(gray, 5)
# cv2.imwrite('C:/Users/okarademirci/Desktop/analog-gauge-reader/images/gauge-gray-2.jpg', gray)
#detect circles
#restricting the search from 35-48% of the possible radii gives fairly good results across different samples. Remember that
#these are pixel values which correspond to the possible radii search range.
circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, 20, numpy.array([]), 100, 50, int(height*0.35), int(height*0.48))
#coordinates and radius
a, b, c = circles.shape
x,y,r = averageCircle(circles, b)
return x ,y ,r
def get_current_value(img, min_angle, max_angle, min_value, max_value, x, y, r):
gray2 = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Set threshold and maxValue
thresh = 175
maxValue = 255
# for testing purposes, found cv2.THRESH_BINARY_INV to perform the best
# th, dst1 = cv2.threshold(gray2, thresh, maxValue, cv2.THRESH_BINARY);
# th, dst2 = cv2.threshold(gray2, thresh, maxValue, cv2.THRESH_BINARY_INV);
# th, dst3 = cv2.threshold(gray2, thresh, maxValue, cv2.THRESH_TRUNC);
# th, dst4 = cv2.threshold(gray2, thresh, maxValue, cv2.THRESH_TOZERO);
# th, dst5 = cv2.threshold(gray2, thresh, maxValue, cv2.THRESH_TOZERO_INV);
# cv2.imwrite('gauge-%s-dst1.%s' % (gauge_number, file_type), dst1)
# cv2.imwrite('gauge-%s-dst2.%s' % (gauge_number, file_type), dst2)
# cv2.imwrite('gauge-%s-dst3.%s' % (gauge_number, file_type), dst3)
# cv2.imwrite('gauge-%s-dst4.%s' % (gauge_number, file_type), dst4)
# cv2.imwrite('gauge-%s-dst5.%s' % (gauge_number, file_type), dst5)
# apply thresholding which helps for finding lines
th, dst2 = cv2.threshold(gray2, thresh, maxValue, cv2.THRESH_BINARY_INV)
# found Hough Lines generally performs better without Canny / blurring, though there were a couple exceptions where it would only work with Canny / blurring
#dst2 = cv2.medianBlur(dst2, 5)
#dst2 = cv2.Canny(dst2, 50, 150)
#dst2 = cv2.GaussianBlur(dst2, (5, 5), 0)
# for testing, show image after thresholding
dirPath = getScriptDir() + '/images/afterTreshold.jpg'
cv2.imwrite(dirPath, dst2)
# find lines
minLineLength = 10
maxLineGap = 0
lines = cv2.HoughLinesP(image=dst2, rho=3, theta=numpy.pi / 180, threshold=100,minLineLength=minLineLength, maxLineGap=0) # rho is set to 3 to detect more lines, easier to get more then filter them out later
#for testing purposes, show all found lines
# for i in range(0, len(lines)):
# for x1, y1, x2, y2 in lines[i]:
# cv2.line(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
# cv2.imwrite('gauge-%s-lines-test.%s' %(gauge_number, file_type), img)
# remove all lines outside a given radius
final_line_list = []
#print "radius: %s" %r
diff1LowerBound = 0.15 #diff1LowerBound and diff1UpperBound determine how close the line should be from the center
diff1UpperBound = 0.25
diff2LowerBound = 0.5 #diff2LowerBound and diff2UpperBound determine how close the other point of the line should be to the outside of the gauge
diff2UpperBound = 1.0
for i in range(0, len(lines)):
for x1, y1, x2, y2 in lines[i]:
diff1 = distance2Points(x, y, x1, y1) # x, y is center of circle
diff2 = distance2Points(x, y, x2, y2) # x, y is center of circle
#set diff1 to be the smaller (closest to the center) of the two), makes the math easier
if (diff1 > diff2):
temp = diff1
diff1 = diff2
diff2 = temp
# check if line is within an acceptable range
if (((diff1<diff1UpperBound*r) and (diff1>diff1LowerBound*r) and (diff2<diff2UpperBound*r)) and (diff2>diff2LowerBound*r)):
line_length = distance2Points(x1, y1, x2, y2)
# add to final list
final_line_list.append([x1, y1, x2, y2])
#testing only, show all lines after filtering
# for i in range(0,len(final_line_list)):
# x1 = final_line_list[i][0]
# y1 = final_line_list[i][1]
# x2 = final_line_list[i][2]
# y2 = final_line_list[i][3]
# cv2.line(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
# assumes the first line is the best one
x1 = final_line_list[0][0]
y1 = final_line_list[0][1]
x2 = final_line_list[0][2]
y2 = final_line_list[0][3]
cv2.line(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
#for testing purposes, show the line overlayed on the original image
#cv2.imwrite('gauge-1-test.jpg', img)
#cv2.imwrite('C:/Users/okarademirci/Desktop/analog-gauge-reader/images/gauge-%s-lines-2.%s' % (gauge_number, file_type), img)
#find the farthest point from the center to be what is used to determine the angle
dist_pt_0 = distance2Points(x, y, x1, y1)
dist_pt_1 = distance2Points(x, y, x2, y2)
if (dist_pt_0 > dist_pt_1):
x_angle = x1 - x
y_angle = y - y1
else:
x_angle = x2 - x
y_angle = y - y2
# take the arc tan of y/x to find the angle
res = numpy.arctan(numpy.divide(float(y_angle), float(x_angle)))
#np.rad2deg(res) #coverts to degrees
# print x_angle
# print y_angle
# print res
# print np.rad2deg(res)
#these were determined by trial and error
res = numpy.rad2deg(res)
if x_angle > 0 and y_angle > 0: #in quadrant I
final_angle = 270 - res
if x_angle < 0 and y_angle > 0: #in quadrant II
final_angle = 90 - res
if x_angle < 0 and y_angle < 0: #in quadrant III
final_angle = 90 - res
if x_angle > 0 and y_angle < 0: #in quadrant IV
final_angle = 270 - res
#print final_angle
old_min = float(min_angle)
old_max = float(max_angle)
new_min = float(min_value)
new_max = float(max_value)
old_value = final_angle
old_range = (old_max - old_min)
new_range = (new_max - new_min)
new_value = (((old_value - old_min) * new_range) / old_range) + new_min
return new_value
def main():
# 1) get the image from directory.
image = getImage()
min_angle,max_angle,min_value,max_value,units = setStaticUserRealGaugeDetails()
# 2) covnert the image to gray .
# 3) find the circle in the image with customization
x,y,r = getCircleAndCustomize(image)
# 4) find the line in the circle.
# 5) find the value in the range of guage
newValue = get_current_value(image,min_angle,max_angle,min_value,max_value,x,y,r)
print(newValue)
if __name__=='__main__':
main()

Hough space for cv2 houghlines

I am having some trouble with cv2.Houghlines() showing vertical lines when I believe that the real fit should provide horizontal lines.
Here is a clip of the code I am using:
rho_resoultion = 1
theta_resolution = np.pi/180
threshold = 200
lines = cv2.HoughLines(image, rho_resoultion, theta_resolution, threshold)
# print(lines)
for line in lines:
rho, theta = line[0]
a = np.cos(theta)
b = np.sin(theta)
x0 = a*rho
y0 = b*rho
x1 = int(x0 + 1000*(-b))
y1 = int(y0 + 1000*(a))
x2 = int(x0 - 1000*(-b))
y2 = int(y0 - 1000*(a))
cv2.line(image,(x1,y1),(x2,y2),(255,255,255),1)
cv2.namedWindow('thing', cv2.WINDOW_NORMAL)
cv2.imshow("thing", image)
cv2.waitKey(0)
This is the input and output:
I think it would be easier to extract out what is occurring if the Hough space image could be viewed.
However, the documentation does not provide information for how to show the full hough space.
How would one show the whole Hough transform space?
I attempted reducing the threshold to 1 but it did not provide an image.
Maybe you got something wrong when calculationg the angles. Feel free to show some code.
Here is an example of how to show all Hough lines in an image:
import cv2
import numpy as np
img = cv2.imread('sudoku.jpg')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray,50,150,apertureSize = 3)
lines = cv2.HoughLines(edges,1,np.pi/180,200)
for line in lines:
for rho,theta in line:
a = np.cos(theta)
b = np.sin(theta)
x0 = a*rho
y0 = b*rho
x1 = int(x0 + 1000*(-b))
y1 = int(y0 + 1000*(a))
x2 = int(x0 - 1000*(-b))
y2 = int(y0 - 1000*(a))
cv2.line(img,(x1,y1),(x2,y2),(0,0,255),2)
cv2.imshow('Houghlines',img)
if cv2.waitKey(0) & 0xff == 27:
cv2.destroyAllWindows()
Original image:
Result:

Resources