pytesseract can't detect digit and operators - opencv

I want to extract two digits and an operator (e.g. 14 + 23) from an image, but this script that I wrote, won't work:
import cv2 as cv
import pytesseract
img = cv.imread('VerificationImage.jpg')
img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
print(pytesseract.image_to_string(img))
conf = r'--oem 3 --psm 11 outputbase digits'
boxes = pytesseract.image_to_data(img, config=conf)
for x, b in enumerate(boxes.splitlines()):
if x != 0:
b = b.split()
print(b)
if len(b) == 12:
x, y, w, h = int(b[6]), int(b[7]), int(b[8]), int(b[9])
cv.rectangle(img, (x,y), (x+w, y+h), (0, 120, 0), 2)
cv.imshow("display", img)
cv.waitKey()
The sample image is like this:

Related

ROS melodic with python3 got openCV error

as mentioned on title I'm trying to use ros melodic with python3. First error pop up because of cv_bridge and it has been fixed. Now I'm getting this error:
[ERROR] [1673464074.204372, 2767.036000]: bad callback: <function im_callback at 0x7f889d1aed90>
cv2.error: OpenCV(4.4.0) /tmp/pip-req-build-civioau0/opencv/modules/dnn/src/tensorflow/tf_importer.cpp:586:
error: (-2:Unspecified error) Const input blob for weights not found in function 'getConstBlob'
I checked and could not find anything about this error.
Here is my code that I'm trying to rosrun:
#! /usr/bin/env python3
import cv2
import numpy as np
import rospy
import sensor_msgs.msg as sensor
import cv_bridge
rostopic = "/iris/camera/rgb/image_raw"
rosmsg = sensor.Image
configPath = "/home/irene/catkin_ws/src/beginner_tutorials/scripts/model/ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt" #file.pbtxt
modelPath = "/home/irene/catkin_ws/src/beginner_tutorials/scripts/model/frozen_inference_graph.pb" #file.pb
classesPath = "/home/irene/catkin_ws/src/beginner_tutorials/scripts/model/coco.names" #file.names
bridge = cv_bridge.CvBridge()
def im_callback(rosmsg):
global configPath, modelPath, classesPath, bridge
img = bridge.imgmsg_to_cv2(rosmsg, "bgr8")
net = cv2.dnn_DetectionModel(modelPath, configPath)
net.setInputSize(320,320)
net.setInputScale(1/127.5)
net.setInputMean((127.5, 127.5, 127.5))
net.setInputSwapRB(True)
with open(classesPath, "r") as file:
classesList = file.read().splitlines()
classesLabelIDs, confidences, body_rects = net.detect(img, confThreshold = 0.6)
body_rects = list(body_rects)
confidences = list(np.array(confidences).reshape(1,-1)[0])
confidences = list(map(float, confidences))
bboxsIDx = cv2.dnn.NMSBoxes(body_rects, confidences, score_threshold=0.5, nms_threshold = 0.2)
if len(bboxsIDx) != 0:
for _, bID in enumerate(bboxsIDx):
classLabelID = np.squeeze(classesLabelIDs[np.squeeze(bID)])
classLabel = classesList[classLabelID]
if classLabel == "person":
body_rect = body_rects[np.squeeze(bID)]
classConfidence = confidences[np.squeeze(bID)]
display_text = "{} - {:.1}".format(classLabel, classConfidence)
x,y,w,h = body_rect
cv2.rectangle(img, (x,y), (x+w, y+h), (0,0,255), 1)
cv2.line(img, (x,y), (x+ int(w*.3), y), (0,0,255), 3)
cv2.line(img, (x,y), (x, y + int(h*.3)), (0,0,255), 3)
cv2.line(img, (x+w,y), (x + w - int(w*.3), y), (0,0,255), 3)
cv2.line(img, (x+w,y), (x+w, y + int(h*.3)), (0,0,255), 3)
cv2.line(img, (x+w,y+h), (x + w - int(w*.3), y + h), (0,0,255), 3)
cv2.line(img, (x+w,y+h), (x+w, y + h - int(h*.3)), (0,0,255), 3)
cv2.line(img, (x,y+h), (x + int(w*.3), y+h), (0,0,255), 3)
cv2.line(img, (x,y+h), (x, y + h -int(h*.3)), (0,0,255), 3)
cv2.putText(img, display_text, (x, y-8), cv2.FONT_HERSHEY_COMPLEX, .4, (255,255,255), 1)
cv2.imshow("Image", img)
cv2.waitKey(1)
def main():
global rosmsg, rostopic
rospy.init_node("webcam_node", anonymous=True)
rospy.Subscriber(rostopic, rosmsg, im_callback)
rospy.spin()
if __name__ == "__main__":
main()
I was trying to use ROS melodic with python3 and I got this error
As you've probably gathered, the error is because of you're trying to use Python3. Melodic targets Python2.7 exclusively and it's highly not recommended to try and make it run with Python3 for the reasons you're seeing. If you really want to use Python3 packages and dependencies in your project, you should instead be running the Noetic distro of ROS.

Why does Tesseract fail to recognize 6 out of 26 of my alphabetic keyboard keys even with several parameter tunings?

TL;DR I'm using:
adaptive thresholding
segmenting by keys (width/height ratio) - see green boxes in image result
psm 10 to treat each key as a character
but it fails to recognize some keys, falsely identifies others or identifies 2 for 1 char (see the L character in the image result, it's an L and P), etc.
Note: I cropped the image and re-ran the results to get it to fit on this site, but before cropping it did slightly better (recognized more keys, fewer false positives, etc).
I just want it to recognize the alphabet keys. Ultimately I will want it to work for realtime video.
config:
'-l eng --oem 1 --psm 10 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZ"'
I've tried scaling the image differently, scaling the individual key segments, using opening/closing/etc but it doesn't recognize all the keys.
original image
image result
Update: new results if I make the image straighter (bird's eye) and remove the whitelisting, it manages to detect all for the most part (although it thinks the O is a 0 and the I is a |, which is understandable). Why is this and how could I make this adaptive enough for a dynamic video when it is so sensitive to these conditions?
Code:
import pytesseract
import numpy as np
try:
from PIL import Image
except ImportError:
import Image
import cv2
from tqdm import tqdm
from collections import defaultdict
def get_missing_chars(dict):
capital_alphabet = [chr(ascii) for ascii in range(65, 91)]
return [let for let in capital_alphabet if let not in dict]
def draw_box_and_char(img, contour_dims, c, box_col, text_col):
x, y, w, h = contour_dims
top_left = (x, y)
bot_right = (x + w, y+h)
font_offset = 3
text_pos = (x+h//2+12, y+h-font_offset)
img_copy = img.copy()
cv2.rectangle(img_copy, top_left, bot_right, box_col, 2)
cv2.putText(img_copy, c, text_pos, cv2.FONT_HERSHEY_SIMPLEX, fontScale=.5, color=text_col, thickness=1, lineType=cv2.LINE_AA)
return img_copy
def detect_keys(img):
scaling = .25
img = cv2.resize(img, None, fx=scaling, fy=scaling, interpolation=cv2.INTER_AREA)
print("img shape", img.shape)
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ratio_min = 0.7
area_min = 1000
nbrhood_size = 1001
bias = 2
# adapt to different lighting
bin_img = cv2.adaptiveThreshold(gray_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv2.THRESH_BINARY_INV, nbrhood_size, bias)
items = cv2.findContours(bin_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = items[0] if len(items) == 2 else items[1]
key_contours = []
for c in contours:
x, y, w, h = cv2.boundingRect(c)
ratio = h/w
area = cv2.contourArea(c)
# square-like ratio, try to get character
if ratio > ratio_min and area > area_min:
key_contours.append(c)
detected = defaultdict(int)
n_kept = 0
img_copy = cv2.cvtColor(bin_img, cv2.COLOR_GRAY2RGB)
let_to_contour = {}
n_contours = len(key_contours)
# offset to get smaller square within the key segment for easier char recognition
offset = 10
show_each_char = False
for _, c in tqdm(enumerate(key_contours), total=n_contours):
x, y, w, h = cv2.boundingRect(c)
ratio = h/w
area = cv2.contourArea(c)
base = np.zeros(bin_img.shape, dtype=np.uint8)
base.fill(255)
n_kept += 1
new_y = y+offset
new_x = x+offset
new_h = h-2*offset
new_w = w-2*offset
base[new_y:new_y+new_h, new_x:new_x+new_w] = bin_img[new_y:new_y+new_h, new_x:new_x+new_w]
segment = cv2.bitwise_not(base)
# try scaling up individual keys
# scaling = 2
# segment = cv2.resize(segment, None, fx=scaling, fy=scaling, interpolation=cv2.INTER_CUBIC)
# psm 10: treats the segment as a single character
custom_config = r'-l eng --oem 1 --psm 10 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZ"'
d = pytesseract.image_to_data(segment, config=custom_config, output_type='dict')
conf = d['conf']
c = d['text'][-1]
if c:
# sometimes recognizes multiple keys even though there is only 1
for sub_c in c:
# save character and contour to draw on image and show bounds/detection
if sub_c not in let_to_contour or (sub_c in let_to_contour and conf > let_to_contour[sub_c]['conf']):
let_to_contour[sub_c] = {'conf': conf, 'cont': (new_x, new_y, new_w, new_h)}
else:
c = "?"
text_col = (0, 0, 255)
if show_each_char:
contour_dims = (new_x, new_y, new_w, new_h)
box_col = (0, 255, 0)
text_col = (0, 0, 0)
segment_with_boxes = draw_box_and_char(segment, contour_dims, c, box_col, text_col)
cv2.imshow('segment', segment_with_boxes)
cv2.waitKey(0)
cv2.destroyAllWindows()
# draw boxes around recognized keys
for c, data in let_to_contour.items():
box_col = (0, 255, 0)
text_col = (0, 0, 0)
img_copy = draw_box_and_char(img_copy, data['cont'], c, box_col, text_col)
detected = {k: 1 for k in let_to_contour}
for det in let_to_contour:
print(det, let_to_contour[det])
print("total detected: ", let_to_contour.keys())
missing = get_missing_chars(detected)
print(f"n_missing: {len(missing)}")
print(f"chars missing: {missing}")
return img_copy
if __name__ == "__main__":
img_file = "keyboard.jpg"
img = cv2.imread(img_file)
img_with_detected_keys = detect_keys(img)
cv2.imshow("detected", img_with_detected_keys)
cv2.waitKey(0)
cv2.destroyAllWindows()

Logistic Regression not able to find value of theta

I have hundred Entries in csv file.
Physics,Maths,Status_class0or1
30,40,0
90,70,1
Using above data i am trying to build logistic (binary) classifier.
Please advise me where i am doing wrong ? Why i am getting answer in 3*3 Matrix (9 values of theta, where as it should be 3 only)
Here is code:
importing the libraries
import numpy as np
import pandas as pd
from sklearn import preprocessing
reading data from csv file.
df = pd.read_csv("LogisticRegressionFirstBinaryClassifier.csv", header=None)
df.columns = ["Maths", "Physics", "AdmissionStatus"]
X = np.array(df[["Maths", "Physics"]])
y = np.array(df[["AdmissionStatus"]])
X = preprocessing.normalize(X)
X = np.c_[np.ones(X.shape[0]), X]
theta = np.ones((X.shape[1], 1))
print(X.shape) # (100, 3)
print(y.shape) # (100, 1)
print(theta.shape) # (3, 1)
calc_z to caculate dot product of X and theta
def calc_z(X,theta):
return np.dot(X,theta)
Sigmoid function
def sigmoid(z):
return 1 / (1 + np.exp(-z))
Cost_function
def cost_function(X, y, theta):
z = calc_z(X,theta)
h = sigmoid(z)
return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
print("cost_function =" , cost_function(X, y, theta))
def derivativeofcostfunction(X, y, theta):
z = calc_z(X,theta)
h = sigmoid(z)
calculation = np.dot((h - y).T,X)
return calculation
print("derivativeofcostfunction=", derivativeofcostfunction(X, y, theta))
def grad_desc(X, y, theta, lr=.001, converge_change=.001):
cost = cost_function(X, y, theta)
change_cost = 1
num_iter = 1
while(change_cost > converge_change):
old_cost = cost
print(theta)
print (derivativeofcostfunction(X, y, theta))
theta = theta - lr*(derivativeofcostfunction(X, y, theta))
cost = cost_function(X, y, theta)
change_cost = old_cost - cost
num_iter += 1
return theta, num_iter
Here is the output :
[[ 0.4185146 -0.56877556 0.63999433]
[15.39722864 9.73995197 11.07882445]
[12.77277463 7.93485324 9.24909626]]
[[0.33944777 0.58199037 0.52493407]
[0.02106587 0.36300629 0.30297278]
[0.07040604 0.3969297 0.33737757]]
[[-0.05856159 -0.89826735 0.30849185]
[15.18035041 9.59004868 10.92827046]
[12.4804775 7.73302024 9.04599788]]
[[0.33950634 0.58288863 0.52462558]
[0.00588552 0.35341624 0.29204451]
[0.05792556 0.38919668 0.32833157]]
[[-5.17526527e-01 -1.21534937e+00 -1.03387571e-02]
[ 1.49729502e+01 9.44663458e+00 1.07843504e+01]
[ 1.21978140e+01 7.53778010e+00 8.84964495e+00]]
(array([[ 0.34002386, 0.58410398, 0.52463592],
[-0.00908743, 0.34396961, 0.28126016],
[ 0.04572775, 0.3816589 , 0.31948193]]), 46)
I changed this code , just added Transpose while returning the matrix and it fixed my issue.
def derivativeofcostfunction(X, y, theta):
z = calc_z(X,theta)
h = sigmoid(z)
calculation = np.dot((h - y).T,X)
return calculation.T

Pixels regions comparision

I'm trying to write a python script for GIMP, who's aim is to slice a picture into a tileset (identify each unique 16x16 tiles in a picture).
So far, I'm able to read tiles (in fact a 16x16 pixels region) and write it somewhere.
But all my attempts at comparing tiles failed.
Did I miss Something ?
My script is as follow:
#!/usr/bin/env python
from gimpfu import *
# compare 2 tiles,
# return 1 if identical, false otherwise
def tileCompare(tile1, tile2):
if(tile1 == tile2):
return 1
return 0
# return tile at (x, y) coordinates
def readTile(layer, x, y):
pr = layer.get_pixel_rgn(x,y,16,16)
return pr[x:x+16, y:y+16]
# write tile at (x, y) coordinates on given layer
def writeTile(layer, x, y, tile):
pr = layer.get_pixel_rgn(x,y,16,16)
pr[x:x+16, y:y+16] = tile
def TilesSlicer(sourceLayer, targetLayer):
# Actual plug-in code will go here
# iterate tiles (result in tileSource)
for x in range(0, sourceLayer.width, 16):
for y in range(0, sourceLayer.height, 16):
tileSource = readTile(sourceLayer, x, y)
found = 0
# iterate tiles again (result in tileIterator)
for a in range(0, sourceLayer.width, 16):
for b in range(0, sourceLayer.height, 16):
tileIterator = readTile(sourceLayer, x, y)
# compare tiles
# if identical and not yet found
# write it in the target layer
if (tileCompare(tileSource, tileIterator) == 1):
if(found == 0):
writeTile(tileIterator, a, b, tileSource)
found = 1
register(
"TilesSlicer",
"Tiles slicer",
"Slice a picture into tiles",
"Fabrice Lambert",
"Fabrice Lambert",
"April 2019",
"Tiles slicer...",
"RGB*",
[
(PF_DRAWABLE, "sourceLayer", "Source Layer: ", None),
(PF_DRAWABLE, "targetLayer", "Target Layer: ", None),
],
[],
TilesSlicer,
menu="<Image>/Filters/My Scripts")
main()
Thanks for your suggestions.
Nvm,
I found the problem:
tileIterator = readTile(sourceLayer, a, b)
instead of:
tileIterator = readTile(sourceLayer, x, y)
Alright,
After refining a bit, the script is as follow:
- Added tiles width and height to handle any tile size.
- Removed target layer parameter, the script now create it.
- Added real time display to give feedbacks to the user (sadly, progress bar doesn't work).
- Improved speed.
#!/usr/bin/env python
from gimpfu import *
# compare 2 tiles,
# return 1 if identical, 0 otherwise
def tileCompare(tile1, tile2):
if(tile1 == tile2):
return 1
return 0
# return tile at (x, y) coordinates
def readTile(layer, x, y, width, height):
pr = layer.get_pixel_rgn(x, y, width, height)
return pr[x:x+width, y:y+height]
# write tile at (x, y) coordinates on given layer
def writeTile(layer, x, y, width, height, tile):
pr = layer.get_pixel_rgn(x, y, width, height)
pr[x:x+width, y:y+height] = tile
layer.update(x, y, width, height)
gimp.displays_flush()
def TilesSlicer(sourceLayer, tileWidth, tileHeight):
# Actual plug-in code will go here
if((sourceLayer.width % tileWidth) != 0):
gimp.message("The layer width is not multiple of " + str(tileWidth))
gimp.quit()
if((sourceLayer.height % tileWidth) != 0):
gimp.message("The layer height is not multiple of " + str(tileHeight))
gimp.quit()
totalTiles = (sourceLayer.width / tileWidth) * (sourceLayer.height / tileHeight)
tilesProcessed = 0
gimp.progress_init("Processing...")
gimp.progress_update(0.0)
sourceImage = sourceLayer.image
targetLayer = pdb.gimp_layer_new(sourceImage, sourceLayer.width, sourceLayer.height, sourceImage.base_type, "Target", 100.0, sourceLayer.mode)
targetLayer.add_alpha()
targetLayer.fill(TRANSPARENT_FILL)
sourceImage.add_layer(targetLayer, 0)
# iterate tiles (result in tileSource)
for x in range(0, sourceLayer.width, tileWidth):
for y in range(0, sourceLayer.height, tileHeight):
tileSource = readTile(sourceLayer, x, y, tileWidth, tileHeight)
found = 0
# iterate tiles again (result in tileIterator)
for a in range(0, sourceLayer.width, tileWidth):
for b in range(0, sourceLayer.height, tileHeight):
tileIterator = readTile(sourceLayer, a, b, tileWidth, tileHeight)
# compare tiles
# if identical and not yet found
# write it in the target layer
# and abort iteration (for speed purpose)
if (tileCompare(tileSource, tileIterator) == 1):
if(found == 0):
writeTile(targetLayer, a, b, tileWidth, tileHeight, tileIterator)
found = 1
break
if(found == 1):
break
tilesProcessed = tilesProcessed + 1
gimp.progress_update(tilesProcessed / totalTiles)
gimp.displays_flush()
register(
"TilesSlicer",
"Tiles slicer",
"Slice a picture into tiles",
"Fabrice Lambert",
"Fabrice Lambert",
"April 2019",
"Tiles slicer...",
"RGB*",
[
(PF_DRAWABLE, "sourceLayer", "Source Layer: ", None),
(PF_INT8, "tileWidth", "Tile width: ", 16),
(PF_INT8, "tileHeight", "Tile height: ", 16),
],
[],
TilesSlicer,
menu="<Image>/Filters/My Scripts")
main()
It can probably be refined better, and if someone have anything to deal with the progress bar, let me know.
I'm open to suggestions.

USB mapping with python

While reading some CTF write-ups I came across this script
#!/usr/bin/env python
import struct
import Image
import dpkt
INIT_X, INIT_Y = 100, 400
def print_map(pcap, device):
picture = Image.new("RGB", (1200, 500), "white")
pixels = picture.load()
x, y = INIT_X, INIT_Y
for ts, buf in pcap:
device_id, = struct.unpack("b", buf[0x0B])
if device_id != device:
continue
data = struct.unpack("bbbb", buf[-4:])
status = data[0]
x = x + data[1]
y = y + data[2]
if (status == 1):
for i in range(-5, 5):
for j in range(-5, 5):
pixels[x + i , y + j] = (0, 0, 0, 0)
else:
pixels[x, y] = (255, 0, 0, 0)
picture.save("riverside-map.png", "PNG")
if __name__ == "__main__":
f = open("usb.pcap", "rb")
pcap = dpkt.pcap.Reader(f)
print_map(pcap, 5)
f.close()
And when I run it on my usb.pcap I get this error:
Traceback (most recent call last):
File "test.py", line 39, in <module>
print_map(pcap, n)
File "test.py", line 31, in print_map
pixels[x, y] = (255, 0, 0, 0)
IndexError: image index out of range
Why it is happening?
Depending on the dataset in your usb.pcap file, you may need to adjust the INIT_X and INIT_Y variables. The problem is that struct.unpack returns a signed value, so if the data is over 127 then it appears negative and you are exceeding the array boundaries. If the data is really always positive, you can test for that and force it to a positive value. Something like:
data = [item + 256 if item < 0 else item for item in data]
As Steve Cohen noticed your data is unsigned byte in range -128...127 but if these are indexes of the array than most probably they should be unsigned.
Python's struct has format characters for most cases, use the right ones:
data = struct.unpack("BBBB", buf[-4:]) # tuple of four unsigned bytes

Resources