I came across a paper which uses synthetic handwriting data generated with the ImageMagick convert command, using a lot of different handwriting fonts (Example images from paper).
They have annotated these images with their transcriptions, but I would like to annotate them with a bounding box for each individual character. I was wondering if this is possible with ImageMagick or any other available tool/script/code.
I have solved this problem by using ImageMagick to iteratively generate new characters one at a time and by masking out the previous characters with OpenCV to get the bounding box for the new character (Example result).
Example code:
import subprocess
import numpy as np
import cv2
full_text = 'OpenCV'
fname = 'test.jpg'
im_size = 'x75'
font = '"ambarella/Ambarella.ttf"'
other_options = '-gravity West -stroke black'
bboxes = []
prev_img = None
# For each letter
for i in range(len(full_text)):
text = '"' + full_text[:i + 1] + '"'
fname = 'test_out/' + str(i) + '.jpg'
command = 'convert -size ' + im_size + ' -font ' + font + ' ' + other_options + ' label:' + text + ' ' + fname
subprocess.run([command], shell=True)
img = cv2.imread(fname, 0)
# Threshold the image
ret, img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
if prev_img is None:
inv = 255 - img
nonzero = cv2.findNonZero(inv)
x1, y1, w, h = cv2.boundingRect(nonzero)
x2 = x1 + w
y2 = y1 + h
prev_img = img.copy()
bboxes.append((x1, y1, x2, y2))
h, w = img.shape
d_h, d_w = h - prev_img.shape[0], w - prev_img.shape[1]
# Pad the older image
if d_w > 0:
prev_img = cv2.copyMakeBorder(prev_img, d_h, 0, 0, d_w, cv2.BORDER_CONSTANT, value=255)
# Mask the previous letters
nonzero_prev = (prev_img == 0)
masked_out = img.copy()
masked_out[nonzero_prev] = 255
# Get bounding box of new letter
inv = 255 - masked_out
nonzero = cv2.findNonZero(inv)
x1, y1, w, h = cv2.boundingRect(nonzero)
x2 = x1 + w
y2 = y1 + h
bboxes.append((x1, y1, x2, y2))
# Set prev image to current image
prev_img = img.copy()
# Visualize results
colors = ((255, 0, 0), (0, 255, 0), (0, 0, 255))
img = cv2.imread(fname)
for i, b in enumerate(bboxes):
x1, y1, x2, y2 = b
cv2.rectangle(img, (x1, y1), (x2, y2), colors[i % len(colors)], 1)
cv2.imwrite('boxes.png', img)
I'm using OpenCV 4.4 and running the following code to detect lines of a grid. When I display the image it always detect one line as shown in the screenshot. How can I detect all vertical lines in the grid?
grid = cv2.imread('images/grid.jpeg')
grayscale = cv2.cvtColor(grid, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(grayscale, 50, 150, apertureSize=3)
lines = cv2.HoughLines(edges, 1, np.pi/180, 100)
for rho, theta in lines[0]:
a = np.cos(theta)
b = np.sin(theta)
x0 = a * rho
y0 = b * rho
x1 = int(x0 + 1000 * (-b))
y1 = int(y0 + 1000 * (a))
x2 = int(x0 - 1000 * (-b))
y2 = int(y0 - 1000 * (a))
cv2.line(grid, (x1, y1), (x2, y2), (255, 0, 0), 2)
cv2.imshow("Lines", grid)
Original Image:
You can use lineDetector algorithm.
Find the edges of your image, as #Micka suggested
img = cv2.imread("lines.png")
img_gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_cny = cv2.Canny(img_gry, 50, 200)
To detect the vertical edges, the difference between x-coordinates should be close to 0 Since only y-coordinates are changing.
if abs(x1 - x2) < 3:
cv2.line(img, pt1=(x1, y1), pt2=(x2, y2), color=(0, 0, 255), thickness=3)
import cv2
img = cv2.imread("lines.png")
img_gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_cny = cv2.Canny(img_gry, 50, 200)
lns = cv2.ximgproc.createFastLineDetector().detect(img_cny)
for ln in lns:
x1 = int(ln[0][0])
y1 = int(ln[0][1])
x2 = int(ln[0][2])
y2 = int(ln[0][3])
if abs(x1 - x2) < 3:
cv2.line(img, pt1=(x1, y1), pt2=(x2, y2), color=(0, 0, 255), thickness=3)
cv2.imshow("lns", img)
I want to detect playing cards and found .cfg and .weights for it. Classes has 52cards names. Following code is giving index out of range error. I couldn't understand the outputs of Yolo and how to get the detected labels. I am new to this, have been trying to understand. Can someone please help!
import cv2
import numpy as np
# Load Yolo
net = cv2.dnn.readNet("yolocards_608.weights", "yolocards.cfg")
classes = []
with open("cards.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
# Loading image
img = cv2.imread("playing_cards_image.jpg")
img = cv2.resize(img, None, fx=0.4, fy=0.4)
height, width, channels = img.shape
# Detecting objects
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
outs = net.forward(output_layers)
# Showing informations on the screen
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
for j in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
color = colors[i]
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
cv2.putText(img, label, (x, y + 30), font, 3, color, 3)
IndexError Traceback (most recent call last)
<ipython-input-46-adaf82305ab8> in <module>
6 label = str(classes[class_ids[i]])
7 print(label)
----> 8 color = colors[i]
9 cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
10 cv2.putText(img, label, (x, y + 30), font, 3, color, 3)
IndexError: index 52 is out of bounds for axis 0 with size 52
I'm working on a project about recognizing moroccan license plates which look like this image :
Moroccan License Plate
Please how can I use OpenCV to cut the license plate out and Tesseract to read the numbers and arabic letter in the middle.
I have looked into this research paper : https://www.researchgate.net/publication/323808469_Moroccan_License_Plate_recognition_using_a_hybrid_method_and_license_plate_features
I have installed OpenCV and Tesseract for python in Windows 10. When I run the tesseract on the text only part of the license plate using "fra" language I get 7714315l Bv. How can I separate the data?
The arabic letters we use in Morocco are :
أ ب ت ج ح د هـ
The expected result is : 77143 د 6
The vertical lines are irrelevant, I have to use them to separate the image and read data separately.
Thanks in advance!
You can use HoughTransform since the two vertical lines are irrelevant, to crop the image:
import numpy as np
import cv2
image = cv2.imread("lines.jpg")
grayImage = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
dst = cv2.Canny(grayImage, 0, 150)
cv2.imwrite("canny.jpg", dst)
lines = cv2.HoughLinesP(dst, 1, np.pi / 180, 50, None, 60, 20)
lines_x = []
# Get height and width to constrain detected lines
height, width, channels = image.shape
for i in range(0, len(lines)):
l = lines[i][0]
# Check if the lines are vertical or not
angle = np.arctan2(l[3] - l[1], l[2] - l[0]) * 180.0 / np.pi
if (l[2] > width / 4) and (l[0] > width / 4) and (70 < angle < 100):
# To draw the detected lines
#cv2.line(image, (l[0], l[1]), (l[2], l[3]), (0, 0, 255), 3, cv2.LINE_AA)
#cv2.imwrite("lines_found.jpg", image)
# Sorting to get the line with the maximum x-coordinate for proper cropping
crop_image = "cropped_lines"
for i in range(0, len(lines_x)):
if i == 0:
# Cropping to the end
img = image[0:height, lines_x[i]:width]
# Cropping from the start
img = image[0:height, 0:lines_x[i]]
cv2.imwrite(crop_image + str(i) + ".jpg", img)
I am sure you know now how to get the middle part ;)
Hope it helps!
Using some morphological operations, you can also extract the characters individually:
import numpy as np
import cv2
image = cv2.imread("lines.jpg")
grayImage = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
dst = cv2.Canny(grayImage, 50, 100)
dst = cv2.morphologyEx(dst, cv2.MORPH_RECT, np.zeros((5,5), np.uint8),
cv2.imwrite("canny.jpg", dst)
im2, contours, heirarchy = cv2.findContours(dst, cv2.RETR_EXTERNAL,
for i in range(0, len(contours)):
if cv2.contourArea(contours[i]) > 200:
x,y,w,h = cv2.boundingRect(contours[i])
# The w constrain to remove the vertical lines
if w > 10:
cv2.rectangle(image, (x, y), (x+w, y+h), (0, 0, 255), 1)
cv2.imwrite("contour.jpg", image)
This what I achieved by now...
The detection on second image was made by using the code found here: License plate detection with OpenCV and Python
Full code (which work from the third image an on) is this:
import cv2
import numpy as np
import tesserocr as tr
from PIL import Image
image = cv2.imread("cropped.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cv2.imshow('gray', image)
thresh = cv2.adaptiveThreshold(gray, 250, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 255, 1)
cv2.imshow('thresh', thresh)
kernel = np.ones((1, 1), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
im2, ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
clean_plate = 255 * np.ones_like(img_dilation)
for i, ctr in enumerate(sorted_ctrs):
x, y, w, h = cv2.boundingRect(ctr)
roi = img_dilation[y:y + h, x:x + w]
# these are very specific values made for this image only - it's not a factotum code
if h > 70 and w > 100:
rect = cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
clean_plate[y:y + h, x:x + w] = roi
cv2.imshow('ROI', rect)
cv2.imwrite('roi.png', roi)
img = cv2.imread("roi.png")
blur = cv2.medianBlur(img, 1)
cv2.imshow('4 - blur', blur)
pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
api = tr.PyTessBaseAPI()
boxes = api.GetComponentImages(tr.RIL.TEXTLINE, True)
text = api.GetUTF8Text()
# clean the string a bit
text = str(text).strip()
plate = ""
# 77143-1916 ---> NNNNN|symbol|N
for char in text:
firstSection = text[:5]
# the arabic symbol is easy because it's nearly impossible for the OCR to misunderstood the last 2 digit
# so we have that the symbol is always the third char from the end (right to left)
symbol = text[-3]
lastChar = text[-1]
plate = firstSection + "[" + symbol + "]" + lastChar
For arabic symbols you should install additional languages from TesseractOCR (and possibly use the version 4 of it).
Output: 77143[9]6
The number between brackets is the arabic symbol (undetected).
Hope I helped you.
Is it possible to do the following ImageMagick perspective distort command using VIPS? If so, what would the command be (using ruby-vips)?
$ convert my_file.png -matte -virtual-pixel transparent +distort Perspective '0,0,0,60 1500,0,300,0 0,2100,0,2310 1500,2100,300,2100' -crop 300x2310+0+0
There isn't a built-in thing for perspective distort, but you can make one using mapim:
require 'vips'
image = Vips::Image.new_from_file ARGV[0]
# perspective distortion: each pixel (x', y') in the output image is
# interpolated from pixel (x, y) in the input using:
# x' = (A x + B y + C) / (G x + H y + 1)
# y' = (D x + E y + F) / (G x + H y + 1)
# where the constants A .. H are from the transform matrix T
# T = [A, B, .. H]
T = [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0003, 0.0001]
# make an index image where pixels have the value of their (x, y) coordinates
i = Vips::Image.xyz image.width, image.height
x = (i[0] * T[0] + i[1] * T[1] + T[2]) / (i[0] * T[6] + i[1] * T[7] + 1)
y = (i[0] * T[3] + i[1] * T[4] + T[5]) / (i[0] * T[6] + i[1] * T[7] + 1)
# join up x and y as a map image
m = x.bandjoin y
# and use it to transform our original image
image = image.mapim m
image.write_to_file ARGV[1]
You'd also need something to calculate the transform from a set of tie-points, of course.
edit: fixed typo
I'm interested in trying to read an analog gauge using a Raspberry PI and Open CV. I've only really messed with face detection in opencv, so I don't even know where to begin. Any ideas, starting points?
You can detect circles with HoughCircles method and detect lines with HoughLinesP method of with opencv lib in Python. After detecting these, you can find out the value of the gauge from the line's position via trigonometry.
You can see the sample code in python. It basically does these:
Read image with imread method.
turn it in to gray with cvtColor.
Find out the circles' center x,y coordinates and radius with HoughCircles, these method has some parameter that can be tweaked.
Detect the lines with HoughLinesP method again parameters should be tweaked.
Calculate the value, considering max value, min value on the gauge and angle interval of the gauge.
Reference: https://github.com/intel-iot-devkit/python-cv-samples/tree/master/examples/analog-gauge-reader
Hope this helps.
import os
import cv2
import numpy
def getScriptDir():
currentFile = __file__ # May be 'my_script', or './my_script' or
realPath = os.path.realpath(currentFile) # /home/user/test/my_script.py
dirPath = os.path.dirname(realPath)
return dirPath
def getUserRealGaugeDetails():
min_angle = input('Min derece: ') #the lowest possible angle
max_angle = input('Max derece ') #highest possible angle
min_value = input('Min deger: ') #usually zero
max_value = input('Max deger: ') #maximum reading of the gauge
units = input('Birim girin: ')
return min_angle,max_angle,min_value,max_value,units
def setStaticUserRealGaugeDetails():
min_angle = 5 # input('Min angle (lowest possible angle of dial) - in degrees: ') #the lowest possible angle
max_angle = 355 # input('Max angle (highest possible angle) - in degrees: ') #highest possible angle
min_value = -20 #input('Min value: ') #usually zero
max_value = 120 #input('Max value: ') #maximum reading of the gauge
units = 'b' #input('Enter units: ')
return min_angle,max_angle,min_value,max_value,units
def getImage():
dirPath = getScriptDir()
dirPath += "/images/1.jpg"
return cv2.imread(dirPath)
def distance2Points(x1, y1, x2, y2):
#print np.sqrt((x2-x1)^2+(y2-y1)^2)
return numpy.sqrt((x2 - x1)**2 + (y2 - y1)**2)
def averageCircle(circles, b):
for i in range(b):
#optional - average for multiple circles (can happen when a gauge is at a slight angle)
avg_x = avg_x + circles[0][i][0]
avg_y = avg_y + circles[0][i][1]
avg_r = avg_r + circles[0][i][2]
avg_x = int(avg_x/(b))
avg_y = int(avg_y/(b))
avg_r = int(avg_r/(b))
return avg_x, avg_y, avg_r
#return the center and radius of the circle
def getCircleAndCustomize(image):
height, width = image.shape[:2]
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #convert to gray
# gray = cv2.GaussianBlur(gray, (5, 5), 0)
# gray = cv2.medianBlur(gray, 5)
# cv2.imwrite('C:/Users/okarademirci/Desktop/analog-gauge-reader/images/gauge-gray-2.jpg', gray)
#detect circles
#restricting the search from 35-48% of the possible radii gives fairly good results across different samples. Remember that
#these are pixel values which correspond to the possible radii search range.
circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, 20, numpy.array([]), 100, 50, int(height*0.35), int(height*0.48))
#coordinates and radius
a, b, c = circles.shape
x,y,r = averageCircle(circles, b)
return x ,y ,r
def get_current_value(img, min_angle, max_angle, min_value, max_value, x, y, r):
gray2 = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Set threshold and maxValue
thresh = 175
maxValue = 255
# for testing purposes, found cv2.THRESH_BINARY_INV to perform the best
# th, dst1 = cv2.threshold(gray2, thresh, maxValue, cv2.THRESH_BINARY);
# th, dst2 = cv2.threshold(gray2, thresh, maxValue, cv2.THRESH_BINARY_INV);
# th, dst3 = cv2.threshold(gray2, thresh, maxValue, cv2.THRESH_TRUNC);
# th, dst4 = cv2.threshold(gray2, thresh, maxValue, cv2.THRESH_TOZERO);
# th, dst5 = cv2.threshold(gray2, thresh, maxValue, cv2.THRESH_TOZERO_INV);
# cv2.imwrite('gauge-%s-dst1.%s' % (gauge_number, file_type), dst1)
# cv2.imwrite('gauge-%s-dst2.%s' % (gauge_number, file_type), dst2)
# cv2.imwrite('gauge-%s-dst3.%s' % (gauge_number, file_type), dst3)
# cv2.imwrite('gauge-%s-dst4.%s' % (gauge_number, file_type), dst4)
# cv2.imwrite('gauge-%s-dst5.%s' % (gauge_number, file_type), dst5)
# apply thresholding which helps for finding lines
th, dst2 = cv2.threshold(gray2, thresh, maxValue, cv2.THRESH_BINARY_INV)
# found Hough Lines generally performs better without Canny / blurring, though there were a couple exceptions where it would only work with Canny / blurring
#dst2 = cv2.medianBlur(dst2, 5)
#dst2 = cv2.Canny(dst2, 50, 150)
#dst2 = cv2.GaussianBlur(dst2, (5, 5), 0)
# for testing, show image after thresholding
dirPath = getScriptDir() + '/images/afterTreshold.jpg'
cv2.imwrite(dirPath, dst2)
# find lines
minLineLength = 10
maxLineGap = 0
lines = cv2.HoughLinesP(image=dst2, rho=3, theta=numpy.pi / 180, threshold=100,minLineLength=minLineLength, maxLineGap=0) # rho is set to 3 to detect more lines, easier to get more then filter them out later
#for testing purposes, show all found lines
# for i in range(0, len(lines)):
# for x1, y1, x2, y2 in lines[i]:
# cv2.line(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
# cv2.imwrite('gauge-%s-lines-test.%s' %(gauge_number, file_type), img)
# remove all lines outside a given radius
final_line_list = []
#print "radius: %s" %r
diff1LowerBound = 0.15 #diff1LowerBound and diff1UpperBound determine how close the line should be from the center
diff1UpperBound = 0.25
diff2LowerBound = 0.5 #diff2LowerBound and diff2UpperBound determine how close the other point of the line should be to the outside of the gauge
diff2UpperBound = 1.0
for i in range(0, len(lines)):
for x1, y1, x2, y2 in lines[i]:
diff1 = distance2Points(x, y, x1, y1) # x, y is center of circle
diff2 = distance2Points(x, y, x2, y2) # x, y is center of circle
#set diff1 to be the smaller (closest to the center) of the two), makes the math easier
if (diff1 > diff2):
temp = diff1
diff1 = diff2
diff2 = temp
# check if line is within an acceptable range
if (((diff1<diff1UpperBound*r) and (diff1>diff1LowerBound*r) and (diff2<diff2UpperBound*r)) and (diff2>diff2LowerBound*r)):
line_length = distance2Points(x1, y1, x2, y2)
# add to final list
final_line_list.append([x1, y1, x2, y2])
#testing only, show all lines after filtering
# for i in range(0,len(final_line_list)):
# x1 = final_line_list[i][0]
# y1 = final_line_list[i][1]
# x2 = final_line_list[i][2]
# y2 = final_line_list[i][3]
# cv2.line(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
# assumes the first line is the best one
x1 = final_line_list[0][0]
y1 = final_line_list[0][1]
x2 = final_line_list[0][2]
y2 = final_line_list[0][3]
cv2.line(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
#for testing purposes, show the line overlayed on the original image
#cv2.imwrite('gauge-1-test.jpg', img)
#cv2.imwrite('C:/Users/okarademirci/Desktop/analog-gauge-reader/images/gauge-%s-lines-2.%s' % (gauge_number, file_type), img)
#find the farthest point from the center to be what is used to determine the angle
dist_pt_0 = distance2Points(x, y, x1, y1)
dist_pt_1 = distance2Points(x, y, x2, y2)
if (dist_pt_0 > dist_pt_1):
x_angle = x1 - x
y_angle = y - y1
x_angle = x2 - x
y_angle = y - y2
# take the arc tan of y/x to find the angle
res = numpy.arctan(numpy.divide(float(y_angle), float(x_angle)))
#np.rad2deg(res) #coverts to degrees
# print x_angle
# print y_angle
# print res
# print np.rad2deg(res)
#these were determined by trial and error
res = numpy.rad2deg(res)
if x_angle > 0 and y_angle > 0: #in quadrant I
final_angle = 270 - res
if x_angle < 0 and y_angle > 0: #in quadrant II
final_angle = 90 - res
if x_angle < 0 and y_angle < 0: #in quadrant III
final_angle = 90 - res
if x_angle > 0 and y_angle < 0: #in quadrant IV
final_angle = 270 - res
#print final_angle
old_min = float(min_angle)
old_max = float(max_angle)
new_min = float(min_value)
new_max = float(max_value)
old_value = final_angle
old_range = (old_max - old_min)
new_range = (new_max - new_min)
new_value = (((old_value - old_min) * new_range) / old_range) + new_min
return new_value
def main():
# 1) get the image from directory.
image = getImage()
min_angle,max_angle,min_value,max_value,units = setStaticUserRealGaugeDetails()
# 2) covnert the image to gray .
# 3) find the circle in the image with customization
x,y,r = getCircleAndCustomize(image)
# 4) find the line in the circle.
# 5) find the value in the range of guage
newValue = get_current_value(image,min_angle,max_angle,min_value,max_value,x,y,r)
if __name__=='__main__':