How to do stretching Image distortion transformation - opencv

I have an assignment to transform an image
to one that has distortion effect like a dent, squeeze, stretch like this:
I have done with twirling, fisheye, bulge, but I'm having a hard time finding the right formulas for those effects.
here is my code for twirling:
import numpy as np
import cv2
import math
from google.colab.patches import cv2_imshow
img = cv2.imread("./orig_img.png")
h,w,_ = img.shape
flex_x = np.zeros((h,w),np.float32)
flex_y = np.zeros((h,w),np.float32)
scale_y= 1
scale_x = 1
alpha = -1.8
center_x, center_y = (w // 2, h // 2)
radius = h/5
for y in range(h):
delta_y = scale_y * (y - center_y)
for x in range(w):
delta_x = scale_x * (x - center_x)
distance = delta_x * delta_x + delta_y * delta_y
if distance >= (radius * radius):
flex_x[y, x] = x
flex_y[y, x] = y
else:
theta = np.arctan2(delta_x,delta_y) + alpha*(radius-math.sqrt(distance))/radius
r_sin = math.sqrt(distance)*np.cos(theta)
r_cos = math.sqrt(distance)*np.sin(theta)
flex_x[y, x] = r_cos + center_x
flex_y[y, x] = r_sin + center_y
dst = cv2.remap(img, flex_x, flex_y, cv2.INTER_LINEAR)
cv2_imshow(dst)
Anyone who has experience with this kind of transformation, please help me! I'm really thankful.

Related

Binarization for large directional gradient grayscale image

I am trying to binarize grayscale images with large gradients in one direction.
Apparently, normal methods including Otsu's are not good enough to do this job.
I tried to use Sobel gradients and the Bradley adaptive thresholding, by which I get OK results, but there are some issues as indicated in the attached picture.
From the section gradient curve, we could see the gradient difference is very big at the beginning, so I split the Sobel result and do the adaptive thresholding separately, and then fuse the two results together.
My question is:
Are there any other better methods to do this job? To better understand what I do, I post my sample python code here. Thanks for your attention.
import cv2 as cv
import numpy as np
from matplotlib import pyplot as plt
def bradley_threshold(inputMat,ddepth):
nRows = inputMat.shape[0]
nCols = inputMat.shape[1]
sumMat = cv.integral(inputMat, ddepth)
S = max(nRows, nCols) / 8
T = 0.15
s2 = int(S / 2)
outputMat = np.zeros(inputMat.shape, np.uint8)
for i in range(nRows):
y1 = i - s2
y2 = i + s2
if y1 < 0:
y1 = 0
if y2 >= nRows:
y2 = nRows - 1
y1+=1
y2+=1
for j in range(nCols):
# set the SxS region
x1 = j - s2
x2 = j + s2
if x1 < 0:
x1 = 0
if x2 >= nCols:
x2 = nCols - 1;
x1 += 1
x2 += 1
count = (x2 - x1) * (y2 - y1)
sum = sumMat[y2, x2]-sumMat[y1, x2]-sumMat[y2, x1]+sumMat[y1, x1]
if inputMat[i, j] * count <= sum * (1.0 - T):
outputMat[i, j] = 0
else:
outputMat[i, j] = 255
return outputMat
if __name__ == '__main__':
gray = cv.imread('sample.png', cv.IMREAD_UNCHANGED)
image = cv.cvtColor(gray,cv.COLOR_GRAY2BGR)
blur = cv.medianBlur(gray, 3)
sobel = cv.Sobel(gray, cv.CV_32F, 1, 0, 1);
inverse = -sobel;
inverse[inverse < 0] = 0
thresh = bradley_threshold(inverse, cv.CV_32F)
splitter = 31
part1 = inverse[:,:31]
part2 = inverse[:,31:]
thresh1 = bradley_threshold(part1, cv.CV_32F)
thresh2 = bradley_threshold(part2, cv.CV_32F)
thresh_part = np.concatenate((thresh1, thresh2), axis=1)
cv.imwrite('bad_binary.png',thresh)
cv.imwrite('good_binary.png',thresh_part)
plt.imshow(thresh, cmap='gray')
plt.show()
plt.imshow(thresh_part, cmap='gray')
plt.show()
plt.plot(inverse[inverse.shape[0]-1, :])
plt.show()

Real-time OCR Videstreaming is lagging when running real-time and implementing text recognition

I'm done installing packages and debugging it. This code is from https://github.com/LaggyHammer/real-time-OCR. When I run it, my camera or the video-streaming gets lag or it has delayed from my end.
Here's the code of what I executed through CLI.
# coding: utf-8
# =====================================================================
# Filename: video_text_detection.py
#
# py Ver: python 3.6 or later
#
# Description: Recognizes regions of text in a given video or through the webcam feed
#
# Usage: python real_time_ocr.py --east frozen_east_text_detection.pb
# or
# python real_time_ocr.py --east frozen_east_text_detection.pb --video test.avi
#
# Note: Requires opencv 3.4.2 or later
# For more in-script documentation, look at video_text_detection_modular.py
#
# Author: Ankit Saxena (ankch24#gmail.com)
# =====================================================================
from imutils.video import VideoStream
from imutils.video import FPS
from imutils.object_detection import non_max_suppression
import numpy as np
import argparse
import imutils
import time
import cv2
import pytesseract
# setting up tesseract path
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
def box_extractor(scores, geometry, min_confidence):
num_rows, num_cols = scores.shape[2:4]
rectangles = []
confidences = []
for y in range(num_rows):
scores_data = scores[0, 0, y]
x_data0 = geometry[0, 0, y]
x_data1 = geometry[0, 1, y]
x_data2 = geometry[0, 2, y]
x_data3 = geometry[0, 3, y]
angles_data = geometry[0, 4, y]
for x in range(num_cols):
if scores_data[x] < min_confidence:
continue
offset_x, offset_y = x * 4.0, y * 4.0
angle = angles_data[x]
cos = np.cos(angle)
sin = np.sin(angle)
box_h = x_data0[x] + x_data2[x]
box_w = x_data1[x] + x_data3[x]
end_x = int(offset_x + (cos * x_data1[x]) + (sin * x_data2[x]))
end_y = int(offset_y + (cos * x_data2[x]) - (sin * x_data1[x]))
start_x = int(end_x - box_w)
start_y = int(end_y - box_h)
rectangles.append((start_x, start_y, end_x, end_y))
confidences.append(scores_data[x])
return rectangles, confidences
def get_arguments():
ap = argparse.ArgumentParser()
ap.add_argument('-v', '--video', type=str,
help='path to optional video file')
ap.add_argument('-east', '--east', type=str, required=True,
help='path to EAST text detection model')
ap.add_argument('-c', '--min_confidence', type=float, default=0.5,
help='minimum confidence to process a region')
ap.add_argument('-w', '--width', type=int, default=320,
help='resized image width (multiple of 32)')
ap.add_argument('-e', '--height', type=int, default=320,
help='resized image height (multiple of 32)')
ap.add_argument('-p', '--padding', type=float, default=0.0,
help='padding on each ROI border')
arguments = vars(ap.parse_args())
return arguments
if __name__ == '__main__':
args = get_arguments()
w, h = None, None
new_w, new_h = args['width'], args['height']
ratio_w, ratio_h = None, None
layer_names = ['feature_fusion/Conv_7/Sigmoid', 'feature_fusion/concat_3']
print("[INFO] loading EAST text detector...")
net = cv2.dnn.readNet(args["east"])
if not args.get('video', False):
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
time.sleep(0)
else:
vs = cv2.VideoCapture(args['video'])
fps = FPS().start()
while True:
frame = vs.read()
frame = frame[1] if args.get('video', False) else frame
if frame is None:
break
frame = imutils.resize(frame, width=500)
orig = frame.copy()
orig_h, orig_w = orig.shape[:2]
if w is None or h is None:
h, w = frame.shape[:2]
ratio_w = w / float(new_w)
ratio_h = h / float(new_h)
frame = cv2.resize(frame, (new_w, new_h))
blob = cv2.dnn.blobFromImage(frame, 1.0, (new_w, new_h), (123.68, 116.78, 103.94),
swapRB=True, crop=False)
net.setInput(blob)
scores, geometry = net.forward(layer_names)
rectangles, confidences = box_extractor(scores, geometry, min_confidence=args['min_confidence'])
boxes = non_max_suppression(np.array(rectangles), probs=confidences)
for (start_x, start_y, end_x, end_y) in boxes:
start_x = int(start_x * ratio_w)
start_y = int(start_y * ratio_h)
end_x = int(end_x * ratio_w)
end_y = int(end_y * ratio_h)
dx = int((end_x - start_x) * args['padding'])
dy = int((end_y - start_y) * args['padding'])
start_x = max(0, start_x - dx)
start_y = max(0, start_y - dy)
end_x = min(orig_w, end_x + (dx * 2))
end_y = min(orig_h, end_y + (dy * 2))
# ROI to be recognized
roi = orig[start_y:end_y, start_x:end_x]
# recognizing text
config = '-l eng --oem 1 --psm 7'
text = pytesseract.image_to_string(roi, config=config)
cv2.rectangle(orig, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)
cv2.putText(orig, text, (start_x, start_y - 20),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3)
fps.update()
cv2.imshow("Detection", orig)
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
break
fps.stop()
print(f"[INFO] elapsed time {round(fps.elapsed(), 2)}")
print(f"[INFO] approx. FPS : {round(fps.fps(), 2)}")
if not args.get('video', False):
vs.stop()
else:
vs.release()
cv2.destroyAllWindows()
Is there a way easiest way to make the videostreaming smoother with this code?

Drawing Circle/SemiCircle in Metal

I am new to metal. I want to draw a semi circle. Any ideas or suggestions to draw a semi circle. I tried using tesselation
float u = patch_coord.x;
float v = patch_coord.y;
float w = patch_coord.z;
float x = u * patchIn.control_points[0].position.x + v * patchIn.control_points[1].position.x + w * patchIn.control_points[2].position.x;
float y = u * patchIn.control_points[0].position.y + v * patchIn.control_points[1].position.y + w * patchIn.control_points[2].position.y;
float2 tangent = normalize(float2(x, y)) / 25;
When I normalize always it is in the middle of the screen. I want to draw circle in my finger touch.

Draw a line using an angle and a point in OpenCV

I have a point and an angle in OpenCV, how can I draw that using those parameters and not using 2 points?
Thanks so much!
Just use the equation
x2 = x1 + length * cos(θ)
y2 = y1 + length * sin(θ)
and θ should be in radians
θ = angle * 3.14 / 180.0
In OpenCV you can rewrite the above equation like
int angle = 45;
int length = 150;
Point P1(50,50);
Point P2;
P2.x = (int)round(P1.x + length * cos(angle * CV_PI / 180.0));
P2.y = (int)round(P1.y + length * sin(angle * CV_PI / 180.0));
Done!

Draw Perpendicular line to a line in opencv

I better explain my problem with an Image
I have a contour and a line which is passing through that contour.
At the intersection point of contour and line I want to draw a perpendicular line at the intersection point of a line and contour up to a particular distance.
I know the intersection point as well as slope of the line.
For reference I am attaching this Image.
If the blue line in your picture goes from point A to point B, and you want to draw the red line at point B, you can do the following:
Get the direction vector going from A to B. This would be:
v.x = B.x - A.x; v.y = B.y - A.y;
Normalize the vector:
mag = sqrt (v.x*v.x + v.y*v.y); v.x = v.x / mag; v.y = v.y / mag;
Rotate the vector 90 degrees by swapping x and y, and inverting one of them. Note about the rotation direction: In OpenCV and image processing in general x and y axis on the image are not oriented in the Euclidian way, in particular the y axis points down and not up. In Euclidian, inverting the final x (initial y) would rotate counterclockwise (standard for euclidean), and inverting y would rotate clockwise. In OpenCV it's the opposite. So, for example to get clockwise rotation in OpenCV: temp = v.x; v.x = -v.y; v.y = temp;
Create a new line at B pointing in the direction of v:
C.x = B.x + v.x * length; C.y = B.y + v.y * length;
(Note that you can make it extend in both directions by creating a point D in the opposite direction by simply negating length.)
This is my version of the function :
def getPerpCoord(aX, aY, bX, bY, length):
vX = bX-aX
vY = bY-aY
#print(str(vX)+" "+str(vY))
if(vX == 0 or vY == 0):
return 0, 0, 0, 0
mag = math.sqrt(vX*vX + vY*vY)
vX = vX / mag
vY = vY / mag
temp = vX
vX = 0-vY
vY = temp
cX = bX + vX * length
cY = bY + vY * length
dX = bX - vX * length
dY = bY - vY * length
return int(cX), int(cY), int(dX), int(dY)

Resources