TL;DR I'm using:
adaptive thresholding
segmenting by keys (width/height ratio) - see green boxes in image result
psm 10 to treat each key as a character
but it fails to recognize some keys, falsely identifies others or identifies 2 for 1 char (see the L character in the image result, it's an L and P), etc.
Note: I cropped the image and re-ran the results to get it to fit on this site, but before cropping it did slightly better (recognized more keys, fewer false positives, etc).
I just want it to recognize the alphabet keys. Ultimately I will want it to work for realtime video.
config:
'-l eng --oem 1 --psm 10 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZ"'
I've tried scaling the image differently, scaling the individual key segments, using opening/closing/etc but it doesn't recognize all the keys.
original image
image result
Update: new results if I make the image straighter (bird's eye) and remove the whitelisting, it manages to detect all for the most part (although it thinks the O is a 0 and the I is a |, which is understandable). Why is this and how could I make this adaptive enough for a dynamic video when it is so sensitive to these conditions?
Code:
import pytesseract
import numpy as np
try:
from PIL import Image
except ImportError:
import Image
import cv2
from tqdm import tqdm
from collections import defaultdict
def get_missing_chars(dict):
capital_alphabet = [chr(ascii) for ascii in range(65, 91)]
return [let for let in capital_alphabet if let not in dict]
def draw_box_and_char(img, contour_dims, c, box_col, text_col):
x, y, w, h = contour_dims
top_left = (x, y)
bot_right = (x + w, y+h)
font_offset = 3
text_pos = (x+h//2+12, y+h-font_offset)
img_copy = img.copy()
cv2.rectangle(img_copy, top_left, bot_right, box_col, 2)
cv2.putText(img_copy, c, text_pos, cv2.FONT_HERSHEY_SIMPLEX, fontScale=.5, color=text_col, thickness=1, lineType=cv2.LINE_AA)
return img_copy
def detect_keys(img):
scaling = .25
img = cv2.resize(img, None, fx=scaling, fy=scaling, interpolation=cv2.INTER_AREA)
print("img shape", img.shape)
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ratio_min = 0.7
area_min = 1000
nbrhood_size = 1001
bias = 2
# adapt to different lighting
bin_img = cv2.adaptiveThreshold(gray_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv2.THRESH_BINARY_INV, nbrhood_size, bias)
items = cv2.findContours(bin_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = items[0] if len(items) == 2 else items[1]
key_contours = []
for c in contours:
x, y, w, h = cv2.boundingRect(c)
ratio = h/w
area = cv2.contourArea(c)
# square-like ratio, try to get character
if ratio > ratio_min and area > area_min:
key_contours.append(c)
detected = defaultdict(int)
n_kept = 0
img_copy = cv2.cvtColor(bin_img, cv2.COLOR_GRAY2RGB)
let_to_contour = {}
n_contours = len(key_contours)
# offset to get smaller square within the key segment for easier char recognition
offset = 10
show_each_char = False
for _, c in tqdm(enumerate(key_contours), total=n_contours):
x, y, w, h = cv2.boundingRect(c)
ratio = h/w
area = cv2.contourArea(c)
base = np.zeros(bin_img.shape, dtype=np.uint8)
base.fill(255)
n_kept += 1
new_y = y+offset
new_x = x+offset
new_h = h-2*offset
new_w = w-2*offset
base[new_y:new_y+new_h, new_x:new_x+new_w] = bin_img[new_y:new_y+new_h, new_x:new_x+new_w]
segment = cv2.bitwise_not(base)
# try scaling up individual keys
# scaling = 2
# segment = cv2.resize(segment, None, fx=scaling, fy=scaling, interpolation=cv2.INTER_CUBIC)
# psm 10: treats the segment as a single character
custom_config = r'-l eng --oem 1 --psm 10 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZ"'
d = pytesseract.image_to_data(segment, config=custom_config, output_type='dict')
conf = d['conf']
c = d['text'][-1]
if c:
# sometimes recognizes multiple keys even though there is only 1
for sub_c in c:
# save character and contour to draw on image and show bounds/detection
if sub_c not in let_to_contour or (sub_c in let_to_contour and conf > let_to_contour[sub_c]['conf']):
let_to_contour[sub_c] = {'conf': conf, 'cont': (new_x, new_y, new_w, new_h)}
else:
c = "?"
text_col = (0, 0, 255)
if show_each_char:
contour_dims = (new_x, new_y, new_w, new_h)
box_col = (0, 255, 0)
text_col = (0, 0, 0)
segment_with_boxes = draw_box_and_char(segment, contour_dims, c, box_col, text_col)
cv2.imshow('segment', segment_with_boxes)
cv2.waitKey(0)
cv2.destroyAllWindows()
# draw boxes around recognized keys
for c, data in let_to_contour.items():
box_col = (0, 255, 0)
text_col = (0, 0, 0)
img_copy = draw_box_and_char(img_copy, data['cont'], c, box_col, text_col)
detected = {k: 1 for k in let_to_contour}
for det in let_to_contour:
print(det, let_to_contour[det])
print("total detected: ", let_to_contour.keys())
missing = get_missing_chars(detected)
print(f"n_missing: {len(missing)}")
print(f"chars missing: {missing}")
return img_copy
if __name__ == "__main__":
img_file = "keyboard.jpg"
img = cv2.imread(img_file)
img_with_detected_keys = detect_keys(img)
cv2.imshow("detected", img_with_detected_keys)
cv2.waitKey(0)
cv2.destroyAllWindows()
I've been trying to understand how bmp files work so I can render some Mandelbrot set pictures and output them as bmp files since that seems to be one of the easiest methods but for some reason when I use an aspect ratio that isn't 1:1 even though its something to the power of 4 (so no padding is needed) I get weird artifacts like these 200:100 48:100 what I'm trying to do is turning an array of pixels that has white for even numbers and black for odd numbers into a bmp, this (100:100) is what it looks like with 1:1 aspect ratio.
I've tried reading through the wikipedia article to see if I can figure out what I'm doing wrong but I still don't get what I'm missing.
This is the script I've written in Lua so far:
ResolutionX = 100
ResolutionY = 100
local cos, atan, sin, atan2, sqrt, floor = math.cos, math.atan, math.sin, math.atan2, math.sqrt, math.floor
local insert, concat = table.insert, table.concat
local sub, char, rep = string.sub, string.char, string.rep
io.output("Test.bmp")
function Basen(n,b)
n = floor(n)
if not b or b == 10 then return tostring(n) end
local digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
local t = {}
repeat
local d = (n % b) + 1
n = floor(n / b)
insert(t, 1, digits:sub(d,d))
until n == 0
return rep("0",32-#t)..concat(t,"")
end
FileSize = Basen(ResolutionY*ResolutionX*3 + 54,2)
FileSize4 = tonumber(sub(FileSize,1,8),2) or 0
FileSize3 = tonumber(sub(FileSize,9,16),2) or 0
FileSize2 = tonumber(sub(FileSize,17,24),2) or 0
FileSize1 = tonumber(sub(FileSize,25,32),2) or 0
Width = Basen(ResolutionX,2)
print("Width: ",Width)
Width4 = tonumber(sub(Width,1,8),2) or 0
Width3 = tonumber(sub(Width,9,16),2) or 0
Width2 = tonumber(sub(Width,17,24),2) or 0
Width1 = tonumber(sub(Width,25,32),2) or 0
Height = Basen(ResolutionY,2)
print("Height: ",Height)
Height4 = tonumber(sub(Height,1,8),2) or 0
Height3 = tonumber(sub(Height,9,16),2) or 0
Height2 = tonumber(sub(Height,17,24),2) or 0
Height1 = tonumber(sub(Height,25,32),2) or 0
BMPSize = Basen(ResolutionY*ResolutionX*3,2)
BMPSize4 = tonumber(sub(BMPSize,1,8),2) or 0
BMPSize3 = tonumber(sub(BMPSize,9,16),2) or 0
BMPSize2 = tonumber(sub(BMPSize,17,24),2) or 0
BMPSize1 = tonumber(sub(BMPSize,25,32),2) or 0
print("TotalSize: ",FileSize1,FileSize2,FileSize3,FileSize4,"\nWidth: ",Width1,Width2,Width3,Width4,"\nHeight: ",Height1,Height2,Height3,Height4,"\nImage data: ",BMPSize1,BMPSize2,BMPSize3,BMPSize4)
Result = {"BM"..char( --File type
FileSize1,FileSize2,FileSize3,FileSize4,--File size
0,0,0,0, --Reserved
54,0,0,0, --Where the pixel data starts
40,0,0,0, --DIB header
Width1,Width2,Width3,Width4, --Width
Height1,Height2,Height3,Height4, --Height
1,0, --Color planes
24,00, --Bit depth
0,0,0,0, --Compression
BMPSize1,BMPSize2,BMPSize3,BMPSize4, --The amount of bytes pixel data will consume
Width1,Width2,Width3,Width4,
Height1,Height2,Height3,Height4,
0,0,0,0, --Number of colors in palatte
0,0,0,0
)}
for X = 0, ResolutionX - 1 do
for Y = 0, ResolutionY - 1 do
insert(Result,rep(char(255 * ((X + 1) % 2) * ((Y + 1) % 2)),3))
end
end
io.write(table.concat(Result))
Ok, here's a BMP version. I've put things in a module so it might be easier to use.
local writeBMP = {}
local floor = math.floor
local insert, concat = table.insert, table.concat
local sub, char, rep = string.sub, string.char, string.rep
local function Basen(n,b)
n = floor(n)
if not b or b == 10 then return tostring(n) end
local digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
local t = {}
repeat
local d = (n % b) + 1
n = floor(n / b)
insert(t, 1, digits:sub(d,d))
until n == 0
return rep("0",32-#t)..concat(t,"")
end
local function nextMul4(x)
if ( x % 4 == 0 ) then
return x
else
return x+4-(x%4)
end
end
local function clamp(x)
local y = x
if ( x > 255 ) then
y = 255
elseif ( x < 0 ) then
y = 0
end
return floor(y)
end
-- Accepts array of type pixelsXYC[X][Y][C] of numbers 0-255
-- C=1,2,3 are the red, green and blue channels respectively
-- X increases left to right, and Y increases top to bottom
function writeBMP.data(pixelsXYC, resolutionX, resolutionY)
local Pixels = pixelsXYC
local ResolutionX = resolutionX
local ResolutionY = resolutionY
assert(#Pixels == ResolutionX, "Table size and X resolution mismatch")
assert(#Pixels[1] == ResolutionY, "Table size and Y resolution mismatch")
local FileSize = Basen(ResolutionY*nextMul4(3*ResolutionX) + 54,2)
local FileSize4 = tonumber(sub(FileSize,1,8),2) or 0
local FileSize3 = tonumber(sub(FileSize,9,16),2) or 0
local FileSize2 = tonumber(sub(FileSize,17,24),2) or 0
local FileSize1 = tonumber(sub(FileSize,25,32),2) or 0
local Width = Basen(ResolutionX,2)
local Width4 = tonumber(sub(Width,1,8),2) or 0
local Width3 = tonumber(sub(Width,9,16),2) or 0
local Width2 = tonumber(sub(Width,17,24),2) or 0
local Width1 = tonumber(sub(Width,25,32),2) or 0
local Height = Basen(ResolutionY,2)
local Height4 = tonumber(sub(Height,1,8),2) or 0
local Height3 = tonumber(sub(Height,9,16),2) or 0
local Height2 = tonumber(sub(Height,17,24),2) or 0
local Height1 = tonumber(sub(Height,25,32),2) or 0
local BMPSize = Basen(ResolutionY*nextMul4(3*ResolutionX),2)
local BMPSize4 = tonumber(sub(BMPSize,1,8),2) or 0
local BMPSize3 = tonumber(sub(BMPSize,9,16),2) or 0
local BMPSize2 = tonumber(sub(BMPSize,17,24),2) or 0
local BMPSize1 = tonumber(sub(BMPSize,25,32),2) or 0
local Result = {}
Result[1] = "BM" .. char( --File type
FileSize1,FileSize2,FileSize3,FileSize4,--File size
0,0, --Reserved
0,0, --Reserved
54,0,0,0, --Where the pixel data starts
40,0,0,0, --DIB header
Width1,Width2,Width3,Width4, --Width
Height1,Height2,Height3,Height4, --Height
1,0, --Color planes
24,0, --Bit depth
0,0,0,0, --Compression
BMPSize1,BMPSize2,BMPSize3,BMPSize4, --The amount of bytes pixel data will consume
37,22,0,0, --Pixels per meter horizontal
37,22,0,0, --Pixels per meter vertical
0,0,0,0, --Number of colors in palatte
0,0,0,0
)
local Y = ResolutionY
while( Y >= 1 ) do
for X = 1, ResolutionX do
local r = clamp( Pixels[X][Y][1] )
local g = clamp( Pixels[X][Y][2] )
local b = clamp( Pixels[X][Y][3] )
Result[#Result+1] = char(b)
Result[#Result+1] = char(g)
Result[#Result+1] = char(r)
end
-- byte alignment
if ( ( (3*ResolutionX) % 4 ) ~= 0 ) then
local Padding = 4 - ((3*ResolutionX) % 4)
Result[#Result+1] = rep(char(0),Padding)
end
Y = Y - 1
end
return table.concat(Result)
end
function writeBMP.write(pixelsXYC, resolutionX, resolutionY, filename)
local file = io.open(filename,"wb")
local data = writeBMP.data(pixelsXYC, resolutionX, resolutionY)
file:write(data)
end
return writeBMP
A simple test:
-- writeBMP example
local writeBMP = require "writeBMP"
local resolutionX = 100
local resolutionY = 100
-- Pixel data
local pixels = {}
for x=1,resolutionX do
pixels[x] = {}
for y=1, resolutionY do
pixels[x][y] = {}
local red = 255*(resolutionX-x+resolutionY-y)/(resolutionX+resolutionY)
local green = 255*y/resolutionY
local blue = 255*x/resolutionX
pixels[x][y][1] = red
pixels[x][y][2] = green
pixels[x][y][3] = blue
end
end
writeBMP.write(pixels,resolutionX,resolutionY,"testwritebmp.bmp")
return
Note: In BMP, the Y axis starts on the bottom. I am more used to Y axis going from top down in computer graphics (so I wrote it that way).
Thanks HAX for the code.
Welcome to Stack Exchange :)
I suggest having a look at PPM files, they are easy. They can be converted with other tools to png or bmp with other tools.
Wikipedia PPM Specification
Here is a PPM solution:
ResolutionX = 48
ResolutionY = 100
local cos, atan, sin, atan2, sqrt, floor = math.cos, math.atan, math.sin, math.atan2, math.sqrt, math.floor
local insert, concat = table.insert, table.concat
local sub, char, rep = string.sub, string.char, string.rep
local file = io.open("Test.ppm","w")
-- PPM File headers
local Output = { }
Output[1] = "P3"
Output[2] = tostring(ResolutionX) .. " " .. tostring(ResolutionY)
Output[3] = "255"
-- Image body
for Y = 0, ResolutionY - 1 do
for X = 0, ResolutionX - 1 do
local value = 255 * ((X + 1) % 2) * ((Y + 1) % 2)
Output[#Output+1] = rep(tostring(floor(value)),3," ")
end
end
-- Join lines together
local Result = table.concat(Output,"\n")
file:write(Result)
Notes: I could not get your code to write to file (see my usage of file). The inner loop must be X (columns) and the outer loop must be Y (rows) if the write order is English reading order (left-right down-up).
I'm trying to generate 40 circles with text and I want to be able to use them and handle them later.
According to several post I found here and on corona forums, I'm doing it well but, in all the examples, is only with one "display" object... not with several so I'm not sure if it's possible or maybe it needs something different.
After some introduction... here the code:
function _.spawn(params)
local orb = display.newCircle( display.contentWidth/2, display.contentHeight/2, 40 ) -- creates the orb shape
orb.orbTable = params.orbTable --assign the internal table
orb.index = #orb.orbTable + 1 -- takes control of the index
orb.orbTable[orb.index] = orb -- assign a orb to the internal table
orb:setFillColor( unpack(params.color) ) -- change the color according to the rules above
orb.x = params.x -- control the X position of the orb
orb.y = params.y --control the Y position of the orb
orb.alpha = 1 -- borns with alpha = 0
orb.value = params.value -- assign the internal value of the orb (1-10)
--local orbText = display.newText( orb.value, orb.x+2, orb.y-5, "Aniron", 40 ) -- generates the value on the ball
--orbText.orbTable = params.orbTable
--orbText.index = #orbText.orbTable + 1
--orbText.orbTable[orbText.index] = orbText
--orbText:setFillColor( 0,0,0 )
--orbText.alpha = 1
--The objects group
orb.group = params.group or nil
--If the function call has a parameter named group then insert it into the specified group
orb.group:insert(orb)
--orb.group:insert(orbText)
--Insert the object into the table at the specified index
orb.orbTable[orb.index] = orb
return orb -- returns the orb to have control of it
end
The function to generate all the objects I need is:
function _.generateDeck()
for i = 1, 40 do -- loop to generate the 40 orbs
internalValue = internalValue + 1 -- counter to assigns the right values
if (internalValue > 10) then -- if the internal value is more than 10 starts again (only 1-10)
internalValue = 1
end
if (i >= 1 and i <= 10) then -- assign RED color to the first 10 orbs
completeDeck[i] = _.spawn({color = {196/255,138/255,105/255}, group = orbsGroup, x = 100, y = display.contentHeight / 2, value = internalValue, orbTable = completeDeck})
elseif (i >= 11 and i <= 20) then -- assigns YELLOW color to the next 10 orbs
completeDeck[i] = _.spawn({color = {229/255,214/255,142/255}, group = orbsGroup, x = 100, y = display.contentHeight / 2, value = internalValue, orbTable = completeDeck})
elseif (i >= 11 and i <= 30) then -- assigns BLUE color to the next 10 orbs
completeDeck[i] = _.spawn({color = {157/255,195/255,212/255}, group = orbsGroup, x = 100, y = display.contentHeight / 2, value = internalValue, orbTable = completeDeck})
elseif (i >= 11 and i <= 40) then -- assigns GREEN color to the next 10 balls
completeDeck[i] = _.spawn({color = {175/255,181/255,68/255}, group = orbsGroup, x = 100, y = display.contentHeight / 2, value = internalValue, orbTable = completeDeck})
end
end
end
After calling the function, it generates the circles correctly and I can read the value:
for i = 1, #completeDeck do
print("Complete Deck Value ("..i.."): "..completeDeck[i].value)
end
But if I uncomment the lines regarding the newText (orbText) then I'm not able to read the values... (value is a nil value)
I guess I need to create a displayGroup with the objects I want inside (circle and text), then "spawn" it, modifying the values? In that case... how can I refer to the object inside the displayGroup?
I think I'm mixing different concepts.
In Lua, objects are tables and to create an object composed of existing objects, we create a table with and add the objects to it as values. The code below creates an objects with orb and orbText which can be accessed by object.orb or object.orbText
function _.spawn(params)
local orb = display.newCircle(display.contentWidth/2, display.contentHeight/2, 40) -- creates the orb shape
orb:setFillColor(unpack(params.color))
orb.x = params.x
orb.y = params.y
orb.alpha = 1
local orbText = display.newText(param.value, param.x + 2, param.y - 5, "Aniron", 40)
orbText:setFillColor(0, 0, 0)
orbText.alpha = 1
-- create an object with orb and orbText as values
local object = {orb = orb, orbText = orbText}
-- add more values to the object
object.value = params.value
object.index = #params.orbTable + 1
params.orbTable[object.index] = object
return object
end
Now to use the object:
for i, object in ipairs(completeDeck) do
print(object.value)
end