Plt Plot to OpenCV Image/Numpy Array - opencv

I have a piece of code which I use to visualize a graph:
if (visualize == True):
# Black removed and is used for noise instead.
unique_labels = set(db.labels_)
colors = [plt.cm.Spectral(each)
for each in np.linspace(0, 1, len(unique_labels))]
for k, col in zip(unique_labels, colors):
if k == -1:
# Black used for noise.
col = [0, 0, 0, 1]
class_member_mask = (db.labels_ == k)
xy = scaled_points[class_member_mask & core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
markeredgecolor='k', markersize=14)
xy = scaled_points[class_member_mask & ~core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
markeredgecolor='k', markersize=6)
# display the graph
plt.title('Estimated number of clusters: %d' % n_clusters_)
plt.show()
# get the image into a variable that OpenCV likes
# uh, how?
while this works, I want to have the end result (whatever is being shown) as an OpenCV image.
Since I don't even have the variable -image-, I have no idea how to achieve this.
Did anyone do something similar?
EDIT: I am actually getting close. Now I can create an OpenCV image out of a fig, but the contents are not right. The fig is empty. I wonder where I go wrong? Why doesn't it get the plt object from above and draw the actual content?
fig = plt.figure()
canvas = FigureCanvas(fig)
canvas.draw()
# convert canvas to image
graph_image = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
graph_image = graph_image.reshape(fig.canvas.get_width_height()[::-1] + (3,))
# it still is rgb, convert to opencv's default bgr
graph_image = cv2.cvtColor(graph_image,cv2.COLOR_RGB2BGR)

Okay, I finally got it! One has to create the fig object at the very beginning, then use the necessary plotting functions, then convert to canvas and then to OpenCV image.
EDIT: Thanks to the suggestion of #ImportanceOfBeingErnest, now the code is even more straightforward!
Here is the full code:
if (visualize == True):
# create a figure
fig = plt.figure()
# Black removed and is used for noise instead.
unique_labels = set(db.labels_)
colors = [plt.cm.Spectral(each)
for each in np.linspace(0, 1, len(unique_labels))]
for k, col in zip(unique_labels, colors):
if k == -1:
# Black used for noise.
col = [0, 0, 0, 1]
class_member_mask = (db.labels_ == k)
xy = scaled_points[class_member_mask & core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
markeredgecolor='k', markersize=14)
xy = scaled_points[class_member_mask & ~core_samples_mask]
plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
markeredgecolor='k', markersize=6)
# convert it to an OpenCV image/numpy array
canvas = FigureCanvas(fig)
canvas.draw()
# convert canvas to image
graph_image = np.array(fig.canvas.get_renderer()._renderer)
# it still is rgb, convert to opencv's default bgr
graph_image = cv2.cvtColor(graph_image,cv2.COLOR_RGB2BGR)

Related

Why does Tesseract fail to recognize 6 out of 26 of my alphabetic keyboard keys even with several parameter tunings?

TL;DR I'm using:
adaptive thresholding
segmenting by keys (width/height ratio) - see green boxes in image result
psm 10 to treat each key as a character
but it fails to recognize some keys, falsely identifies others or identifies 2 for 1 char (see the L character in the image result, it's an L and P), etc.
Note: I cropped the image and re-ran the results to get it to fit on this site, but before cropping it did slightly better (recognized more keys, fewer false positives, etc).
I just want it to recognize the alphabet keys. Ultimately I will want it to work for realtime video.
config:
'-l eng --oem 1 --psm 10 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZ"'
I've tried scaling the image differently, scaling the individual key segments, using opening/closing/etc but it doesn't recognize all the keys.
original image
image result
Update: new results if I make the image straighter (bird's eye) and remove the whitelisting, it manages to detect all for the most part (although it thinks the O is a 0 and the I is a |, which is understandable). Why is this and how could I make this adaptive enough for a dynamic video when it is so sensitive to these conditions?
Code:
import pytesseract
import numpy as np
try:
from PIL import Image
except ImportError:
import Image
import cv2
from tqdm import tqdm
from collections import defaultdict
def get_missing_chars(dict):
capital_alphabet = [chr(ascii) for ascii in range(65, 91)]
return [let for let in capital_alphabet if let not in dict]
def draw_box_and_char(img, contour_dims, c, box_col, text_col):
x, y, w, h = contour_dims
top_left = (x, y)
bot_right = (x + w, y+h)
font_offset = 3
text_pos = (x+h//2+12, y+h-font_offset)
img_copy = img.copy()
cv2.rectangle(img_copy, top_left, bot_right, box_col, 2)
cv2.putText(img_copy, c, text_pos, cv2.FONT_HERSHEY_SIMPLEX, fontScale=.5, color=text_col, thickness=1, lineType=cv2.LINE_AA)
return img_copy
def detect_keys(img):
scaling = .25
img = cv2.resize(img, None, fx=scaling, fy=scaling, interpolation=cv2.INTER_AREA)
print("img shape", img.shape)
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ratio_min = 0.7
area_min = 1000
nbrhood_size = 1001
bias = 2
# adapt to different lighting
bin_img = cv2.adaptiveThreshold(gray_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv2.THRESH_BINARY_INV, nbrhood_size, bias)
items = cv2.findContours(bin_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = items[0] if len(items) == 2 else items[1]
key_contours = []
for c in contours:
x, y, w, h = cv2.boundingRect(c)
ratio = h/w
area = cv2.contourArea(c)
# square-like ratio, try to get character
if ratio > ratio_min and area > area_min:
key_contours.append(c)
detected = defaultdict(int)
n_kept = 0
img_copy = cv2.cvtColor(bin_img, cv2.COLOR_GRAY2RGB)
let_to_contour = {}
n_contours = len(key_contours)
# offset to get smaller square within the key segment for easier char recognition
offset = 10
show_each_char = False
for _, c in tqdm(enumerate(key_contours), total=n_contours):
x, y, w, h = cv2.boundingRect(c)
ratio = h/w
area = cv2.contourArea(c)
base = np.zeros(bin_img.shape, dtype=np.uint8)
base.fill(255)
n_kept += 1
new_y = y+offset
new_x = x+offset
new_h = h-2*offset
new_w = w-2*offset
base[new_y:new_y+new_h, new_x:new_x+new_w] = bin_img[new_y:new_y+new_h, new_x:new_x+new_w]
segment = cv2.bitwise_not(base)
# try scaling up individual keys
# scaling = 2
# segment = cv2.resize(segment, None, fx=scaling, fy=scaling, interpolation=cv2.INTER_CUBIC)
# psm 10: treats the segment as a single character
custom_config = r'-l eng --oem 1 --psm 10 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZ"'
d = pytesseract.image_to_data(segment, config=custom_config, output_type='dict')
conf = d['conf']
c = d['text'][-1]
if c:
# sometimes recognizes multiple keys even though there is only 1
for sub_c in c:
# save character and contour to draw on image and show bounds/detection
if sub_c not in let_to_contour or (sub_c in let_to_contour and conf > let_to_contour[sub_c]['conf']):
let_to_contour[sub_c] = {'conf': conf, 'cont': (new_x, new_y, new_w, new_h)}
else:
c = "?"
text_col = (0, 0, 255)
if show_each_char:
contour_dims = (new_x, new_y, new_w, new_h)
box_col = (0, 255, 0)
text_col = (0, 0, 0)
segment_with_boxes = draw_box_and_char(segment, contour_dims, c, box_col, text_col)
cv2.imshow('segment', segment_with_boxes)
cv2.waitKey(0)
cv2.destroyAllWindows()
# draw boxes around recognized keys
for c, data in let_to_contour.items():
box_col = (0, 255, 0)
text_col = (0, 0, 0)
img_copy = draw_box_and_char(img_copy, data['cont'], c, box_col, text_col)
detected = {k: 1 for k in let_to_contour}
for det in let_to_contour:
print(det, let_to_contour[det])
print("total detected: ", let_to_contour.keys())
missing = get_missing_chars(detected)
print(f"n_missing: {len(missing)}")
print(f"chars missing: {missing}")
return img_copy
if __name__ == "__main__":
img_file = "keyboard.jpg"
img = cv2.imread(img_file)
img_with_detected_keys = detect_keys(img)
cv2.imshow("detected", img_with_detected_keys)
cv2.waitKey(0)
cv2.destroyAllWindows()

Resizing inputs for torch model

I'm facing with this error properly and I could not see any exact solution or a solution formula for this error. My inputs are like (48x48) and that's not matching with the input shape of the resnet101. How can I edit my input to fit to the resnet101? You can see my code below, it probably helps you to understand my problem.
if __name__ == "__main__":
vid = cv2.VideoCapture(0)
emotions = []
while vid.isOpened():
image = cv2.imread("/home/berkay/Desktop/angry_man.jpg")
_, frame = vid.read()
# takes in a gray coloured filter of the frame
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# initializing the haarcascade face detector
faces = face_cascade.detectMultiScale(frame)
for (x,y,w,h) in faces:
# takes the region of interest of the face only in gray
roi_gray = gray[y:y+h, x:x+h]
resized = cv2.resize(roi_gray, (48, 48)) # resizes to 48x48 sized image
# predict the mood
img = img2tensor(resized)
prediction = predict(img)
In that point, I'm getting this error:
weight of size [64, 3, 7, 7], expected input[1, 1, 229, 229] to have 3 channels, but got 1 channels instead
How can I fix this? Thanks in advance
You can modify the input layer of resnet so that it would accept a single-channel tensors inputs using
In [1]: model = resnet101()
In [2]: model.conv1 = nn.Conv2d(1, 64, kernel_size=(2, 2))
In [3]: model(torch.rand(10, 1, 48, 48))
Out[3]:
tensor([[-0.5015, 0.6124, 0.1370, ..., 1.2181, -0.4707, 0.3285],
[-0.4776, 1.1027, 0.0161, ..., 0.6363, -0.4733, 0.6218],
[-0.3935, 0.8276, -0.0316, ..., 0.6853, -0.4735, 0.6424],
...,
[-0.2986, 1.1758, 0.0158, ..., 0.7422, -0.4422, 0.4792],
[-0.2668, 0.7884, -0.1205, ..., 1.1445, -0.6249, 0.6697],
[-0.2139, 1.0412, 0.2326, ..., 0.8332, -0.8744, 0.4827]],
grad_fn=<AddmmBackward0>)
(you will probably need to modify the kernel size accordingly too)

error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize' OpenCV

I have this old code that is used to run fine in Python 2.7 a while ago. I just updated the code to run in Python 3.8, but when I try to execute it code in Python 3.8 and OpenCV 3.4 I get a resize error and a warning (below)!
Here is the link to the two tif images that are required to run this code.
It's worth noting that both tif images are in the same folder as the Python code
import cv2
import matplotlib.pyplot as plt
import numpy as np
## Code for C_preferred Mask and C_images##
## There are three outputs to this code:
#"Block_order_C.PNG"
#"Out_img.PNG"
#"Output_C.txt"
## Change the image name here
filename_image = '2.tif'
filename_mask = '1.tif'
## OpenCV verison Checking
#print 'OpenCV version used', cv2.__version__
filename = open("Output_C.txt","w")
filename.write("Processing Image : " + str(filename_image) + '\n\n')
## Function to sort the contours : Parameters that you can tune : tolerance_factor and size 0f the image.Here, I have used a fix size of
## (800,800)
def get_contour_precedence(contour, cols):
tolerance_factor = 10
origin = cv2.boundingRect(contour)
return ((origin[1] // tolerance_factor) * tolerance_factor) * cols + origin[0]
## Loading the colored mask, resizing it to (800,800) and converting it from RGB to HSV space, so that the color values are emphasized
p_mask_c = cv2.cvtColor(cv2.resize(cv2.imread(filename_mask),(800,800)),cv2.COLOR_RGB2HSV);
# Loading the original Image
b_image_1 = cv2.resize(cv2.imread(filename_image),(800,800));
cv2.imshow("c_mask_preferred",p_mask_c)
cv2.waitKey();
# convert the target color to HSV, As our target mask portion to be considered is green. So I have chosen target color to be green
b = 0;
g = 255;
r = 0;
# Converting target color to HSV space
target_color = np.uint8([[[b, g, r]]])
target_color_hsv = cv2.cvtColor(target_color, cv2.COLOR_BGR2HSV)
# boundaries for Hue define the proper color boundaries, saturation and values can vary a lot
target_color_h = target_color_hsv[0,0,0]
tolerance = 20
lower_hsv = np.array([max(0, target_color_h - tolerance), 10, 10])
upper_hsv = np.array([min(179, target_color_h + tolerance), 250, 250])
# apply threshold on hsv image
mask = cv2.inRange(p_mask_c, lower_hsv, upper_hsv)
cv2.imshow("mask",mask)
cv2.waitKey()
# Eroding the binary mask, such that every white portion (grids) are seperated from each other, to avoid overlapping and mixing of
# adjacent grids
b_mask = mask;
kernel = np.ones((5,5))
#kernel = cv2.getStructuringElement(cv2.MORPH_CROSS,(3,3))
sharp = cv2.erode(b_mask,kernel, iterations=2)
# Finding all the grids (from binary image)
contours, hierarchy = cv2.findContours(sharp,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
print (' Number of contours', len(contours))
# Sorting contours
contours.sort(key=lambda x:get_contour_precedence(x, np.shape(b_mask)[0]))
#cv2.drawContours(b_image_1, contours, -1, (0,255,0), 1)
# Label variable for each grid/panel
label = 1;
b_image = b_image_1.copy();
temp =np.zeros(np.shape(b_image_1),np.uint8)
print (' size of temp',np.shape(temp), np.shape(b_image))
out_img = b_image_1.copy()
# Processing in each contour/label one by one
for cnt in contours:
cv2.drawContours(b_image_1,[cnt],0,(255,255,0), 1)
## Just to draw labels in the center of each grid
((x, y), r) = cv2.minEnclosingCircle(cnt)
x = int(x)
y = int(y)
r = int(r)
cv2.putText(b_image_1, "#{}".format(label), (int(x) - 10, int(y)),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
##
cv2.drawContours(temp,[cnt],0,(255,255,255), -1)
#crop_img = np.bitwise_and(b_image,temp)
r = cv2.boundingRect(cnt)
crop_img = b_image[r[1]:r[1]+r[3], r[0]:r[0]+r[2]]
mean = cv2.mean(crop_img);
mean = np.array(mean).reshape(-1,1)
print (' Mean color', mean, np.shape(mean))
if mean[1] < 50:
cv2.putText(out_img, "M", (int(x) - 10, int(y)),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 1)
filename.write("Block number #"+ str(label)+ ' is : ' + 'Magenta'+'\n');
else:
cv2.putText(out_img, "G", (int(x) - 10, int(y)),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 1)
filename.write("Block number #"+ str(label)+ ' is : ' +'Gray'+'\n');
label = label+1;
cv2.imwrite("Block_order_C.PNG",b_image_1)
cv2.imwrite("Out_img.PNG",out_img)
filename.close()
cv2.imshow("preferred",b_image_1)
cv2.waitKey()
Error
[ WARN:0] global C:\projects\opencv-python\opencv\modules\imgcodecs\src\grfmt_tiff.cpp (449) cv::TiffDecoder::readData OpenCV TIFF: TIFFRGBAImageOK: Sorry, can not handle images with IEEE floating-point samples
Traceback (most recent call last):
File "Processing_C_preferred.py", line 32, in
p_mask_c = cv2.cvtColor(cv2.resize(cv2.imread(filename_mask),(800,800)),cv2.COLOR_RGB2HSV);
cv2.error: OpenCV(4.2.0) C:\projects\opencv-python\opencv\modules\imgproc\src\resize.cpp:4045: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'
When you read in the image pass the cv::IMREAD_ANYDEPTH = 2 parameter as the second parameter in cv2.imread().
Changing your lines to
p_mask_c = cv2.cvtColor(cv2.resize(cv2.imread(filename_mask, 2),(800,800)),cv2.COLOR_RGB2HSV);
and
b_image_1 = cv2.resize(cv2.imread(filename_image, 2),(800,800));
removes the resize error you're seeing.
But you get another error when changing the color since your TIFF image apparently has only one channel so cv2.COLOR_RGB2HSV won't work..
You could also use multiple flags like cv::IMREAD_COLOR = 1,
p_mask_c = cv2.cvtColor(cv2.resize(cv2.imread(filename_mask, 2 | 1),(800,800)),cv2.COLOR_BGR2HSV);
to read in a color image. But you get a different error. Perhaps you understand this image better than I do and can solve the problem from here on out.

Conversion between keypoints Coco and open pose?

Hi I am currently struggling between converting between popular 2d keypoint output , from COCO keypoints to openpose . I have the following keypoint order from coco keypoints of the order x1,y1,c1 ....x17,y17,c17 where x,y are the x y cordinates and C is the confidence score of the joints being detected. I was wondering if any one has successfully mapped between Coco and openpose
def convert_coco_to_openpose_cords(coco_keypoints_list):
# coco keypoints: [x1,y1,v1,...,xk,yk,vk] (k=17)
# ['Nose', Leye', 'Reye', 'Lear', 'Rear', 'Lsho', 'Rsho', 'Lelb',
# 'Relb', 'Lwri', 'Rwri', 'Lhip', 'Rhip', 'Lkne', 'Rkne', 'Lank', 'Rank']
# openpose keypoints: [y1,...,yk], [x1,...xk] (k=18, with Neck)
# ['Nose', *'Neck'*, 'Rsho', 'Relb', 'Rwri', 'Lsho', 'Lelb', 'Lwri','Rhip',
# 'Rkne', 'Rank', 'Lhip', 'Lkne', 'Lank', 'Leye', 'Reye', 'Lear', 'Rear']
indices = [0, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 1, 2, 3, 4]
y_cords = []
x_cords = []
for i in indices:
xi, yi, vi = coco_keypoints_list[i*3:(i+1)*3]
if vi == 0: # not labeled
y_cords.append(MISSING_VALUE)
x_cords.append(MISSING_VALUE)
elif vi == 1: # labeled but not visible
y_cords.append(yi)
x_cords.append(xi)
elif vi == 2: # labeled and visible
y_cords.append(yi)
x_cords.append(xi)
else:
raise ValueError("vi value: {}".format(vi))
# Get 'Neck' keypoint by interpolating between 'Lsho' and 'Rsho' keypoints
l_shoulder_index = 5
r_shoulder_index = 6
l_shoulder_keypoint = coco_keypoints_list[l_shoulder_index*3:(l_shoulder_index+1)*3]
r_shoulder_keypoint = coco_keypoints_list[r_shoulder_index*3:(r_shoulder_index+1)*3]
if l_shoulder_keypoint[2] > 0 and r_shoulder_keypoint[2] > 0:
neck_keypoint_y = int((l_shoulder_keypoint[1]+r_shoulder_keypoint[1])/2.)
neck_keypoint_x = int((l_shoulder_keypoint[0]+r_shoulder_keypoint[0])/2.)
else:
neck_keypoint_y = neck_keypoint_x = MISSING_VALUE
open_pose_neck_index = 1
y_cords.insert(open_pose_neck_index, neck_keypoint_y)
x_cords.insert(open_pose_neck_index, neck_keypoint_x)
return np.concatenate([np.expand_dims(y_cords, -1),
np.expand_dims(x_cords, -1)], axis=1)

imshow seems to show the wrong image (not duplicate)

I am trying to show image using cv2.imshow(), but batch_data (original image) got altered after I concatenate it with data (set of all image). I am note sure how original image is altered.
data = np.array([]).reshape([0, IMG_WIDTH , IMG_HEIGHT ,IMG_DEPTH ])
label = np.array([])
batch_label = np.array([255]).reshape(1) #label number represent background
x = True
for (n,address) in enumerate(address_list):
print("Reading all images with background from ", address)
batch_data = cv2.imread(image_dir + address)
dim = (IMG_WIDTH ,IMG_HEIGHT)
if batch_data is not None:
batch_data = cv2.resize(batch_data,dim, interpolation = cv2.INTER_NEAREST)
else:
print("batch_data is not read.")
batch_data = np.expand_dims(batch_data, axis= 0)
data = np.concatenate((data,batch_data))
label = np.concatenate((label,batch_label))
while x:
print("batch_data.shape",batch_data.shape)
print("data.shape", data.shape)
print((np.squeeze(batch_data, axis=0) == data[n,...]).all()) # return true
cv2.imshow('image', np.squeeze(batch_data, axis= 0)) # show original image
cv2.imshow('image2', data[n,...]) #show original image but color is alter to white and red
cv2.waitKey(0)
cv2.destroyAllWindows()
x = False
I think cv2.imshow('image2', data[n,...]) show the original image because I tried using transpose to swap axis=1 to axis=2, and the red spot is moved accordingly. I may be wrong.
Can anyone spot the mistake? I feel like it is going to be a very stupid mistake, but I just could not find it.
I think this is a data type problem.
Try to change data from float64 to uint8:
data = np.array([], dtype=np.uint8).reshape([0, IMG_WIDTH , IMG_HEIGHT ,IMG_DEPTH])
The white and red are signs that indicate saturation. float64 range is expected to be [0, 1], while uint8 is expected to be [0, 255]. You can find more about this problem here.

Resources