I am given 4 camera extrinsic parameter matrices, and i wrote some code to display those cameras and their vector systems in 3D.
Here is the code:
def plot_cameras(views):
fig = plt.figure()
ax = fig.gca(projection='3d')
for name, view in views.items():
#for name, view in {'test_cam': 0}.items():
m =
#m = Camera.make_lookat_m(
# colvec([10, 10, 10]),
# colvec( [0,0,0] ),
# colvec([0, 0, -1])
# )
r = m[:3, :3].copy()
r_t = r.T
t = m[:3, 3].copy()
pos =
x_cam, y_cam, z_cam = pos # Camera pose
u = 100*r_t[:, 0]
v = 100*r_t[:, 1]
w = 100*r_t[:, 2] # Camera u,v,w vectors
ax.text(x_cam, y_cam, z_cam, name)
[x_cam, x_cam + u[0]],
[y_cam, y_cam + u[1]],
[z_cam, z_cam + u[2]],
[x_cam, x_cam + v[0]],
[y_cam, y_cam + v[1]],
[z_cam, z_cam + v[2]],
[x_cam, x_cam + w[0]],
[y_cam, y_cam + w[1]],
[z_cam, z_cam + w[2]],
I have 4 cameras at 0, +-25 and +90 degrees of the target.
I am told that these cameras are in OpenCV convention but my function clearly shows they are in OpenGL convention (looking down negative z axis).
Am I properly decomposing the camera matrix and extracting the vectors?
If so, is there a way of transforming my OpenGL-style camera matrices into OpenCV-style?


Why does Tesseract fail to recognize 6 out of 26 of my alphabetic keyboard keys even with several parameter tunings?

TL;DR I'm using:
adaptive thresholding
segmenting by keys (width/height ratio) - see green boxes in image result
psm 10 to treat each key as a character
but it fails to recognize some keys, falsely identifies others or identifies 2 for 1 char (see the L character in the image result, it's an L and P), etc.
Note: I cropped the image and re-ran the results to get it to fit on this site, but before cropping it did slightly better (recognized more keys, fewer false positives, etc).
I just want it to recognize the alphabet keys. Ultimately I will want it to work for realtime video.
'-l eng --oem 1 --psm 10 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZ"'
I've tried scaling the image differently, scaling the individual key segments, using opening/closing/etc but it doesn't recognize all the keys.
original image
image result
Update: new results if I make the image straighter (bird's eye) and remove the whitelisting, it manages to detect all for the most part (although it thinks the O is a 0 and the I is a |, which is understandable). Why is this and how could I make this adaptive enough for a dynamic video when it is so sensitive to these conditions?
import pytesseract
import numpy as np
from PIL import Image
except ImportError:
import Image
import cv2
from tqdm import tqdm
from collections import defaultdict
def get_missing_chars(dict):
capital_alphabet = [chr(ascii) for ascii in range(65, 91)]
return [let for let in capital_alphabet if let not in dict]
def draw_box_and_char(img, contour_dims, c, box_col, text_col):
x, y, w, h = contour_dims
top_left = (x, y)
bot_right = (x + w, y+h)
font_offset = 3
text_pos = (x+h//2+12, y+h-font_offset)
img_copy = img.copy()
cv2.rectangle(img_copy, top_left, bot_right, box_col, 2)
cv2.putText(img_copy, c, text_pos, cv2.FONT_HERSHEY_SIMPLEX, fontScale=.5, color=text_col, thickness=1, lineType=cv2.LINE_AA)
return img_copy
def detect_keys(img):
scaling = .25
img = cv2.resize(img, None, fx=scaling, fy=scaling, interpolation=cv2.INTER_AREA)
print("img shape", img.shape)
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ratio_min = 0.7
area_min = 1000
nbrhood_size = 1001
bias = 2
# adapt to different lighting
bin_img = cv2.adaptiveThreshold(gray_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv2.THRESH_BINARY_INV, nbrhood_size, bias)
items = cv2.findContours(bin_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = items[0] if len(items) == 2 else items[1]
key_contours = []
for c in contours:
x, y, w, h = cv2.boundingRect(c)
ratio = h/w
area = cv2.contourArea(c)
# square-like ratio, try to get character
if ratio > ratio_min and area > area_min:
detected = defaultdict(int)
n_kept = 0
img_copy = cv2.cvtColor(bin_img, cv2.COLOR_GRAY2RGB)
let_to_contour = {}
n_contours = len(key_contours)
# offset to get smaller square within the key segment for easier char recognition
offset = 10
show_each_char = False
for _, c in tqdm(enumerate(key_contours), total=n_contours):
x, y, w, h = cv2.boundingRect(c)
ratio = h/w
area = cv2.contourArea(c)
base = np.zeros(bin_img.shape, dtype=np.uint8)
n_kept += 1
new_y = y+offset
new_x = x+offset
new_h = h-2*offset
new_w = w-2*offset
base[new_y:new_y+new_h, new_x:new_x+new_w] = bin_img[new_y:new_y+new_h, new_x:new_x+new_w]
segment = cv2.bitwise_not(base)
# try scaling up individual keys
# scaling = 2
# segment = cv2.resize(segment, None, fx=scaling, fy=scaling, interpolation=cv2.INTER_CUBIC)
# psm 10: treats the segment as a single character
custom_config = r'-l eng --oem 1 --psm 10 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZ"'
d = pytesseract.image_to_data(segment, config=custom_config, output_type='dict')
conf = d['conf']
c = d['text'][-1]
if c:
# sometimes recognizes multiple keys even though there is only 1
for sub_c in c:
# save character and contour to draw on image and show bounds/detection
if sub_c not in let_to_contour or (sub_c in let_to_contour and conf > let_to_contour[sub_c]['conf']):
let_to_contour[sub_c] = {'conf': conf, 'cont': (new_x, new_y, new_w, new_h)}
c = "?"
text_col = (0, 0, 255)
if show_each_char:
contour_dims = (new_x, new_y, new_w, new_h)
box_col = (0, 255, 0)
text_col = (0, 0, 0)
segment_with_boxes = draw_box_and_char(segment, contour_dims, c, box_col, text_col)
cv2.imshow('segment', segment_with_boxes)
# draw boxes around recognized keys
for c, data in let_to_contour.items():
box_col = (0, 255, 0)
text_col = (0, 0, 0)
img_copy = draw_box_and_char(img_copy, data['cont'], c, box_col, text_col)
detected = {k: 1 for k in let_to_contour}
for det in let_to_contour:
print(det, let_to_contour[det])
print("total detected: ", let_to_contour.keys())
missing = get_missing_chars(detected)
print(f"n_missing: {len(missing)}")
print(f"chars missing: {missing}")
return img_copy
if __name__ == "__main__":
img_file = "keyboard.jpg"
img = cv2.imread(img_file)
img_with_detected_keys = detect_keys(img)
cv2.imshow("detected", img_with_detected_keys)

how to draw a correct hyper plane in python

my code:
My Drawing Function:
def draw_hyper_plane(coef,intercept,y_max,y_min):
points=np.array([[((-coef*y_min - intercept)/coef), y_min],[((-coef*y_max - intercept)/coef), y_max]])
plt.plot(points[:,0], points[:,1])
Actual Output:
Desired Output:
Through my code i am not able to find the proper hyper plane which can correctly classify the point as in desired output plot. Could any body help me in this
One way is to use the decision_function from the classifier and plot some level line (level=0 correspond to your hyperplane). Here is some code.
def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None):
if eps is None:
eps = X.std() / 2.
x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
xx = np.linspace(x_min, x_max, 100)
yy = np.linspace(y_min, y_max, 100)
X1, X2 = np.meshgrid(xx, yy)
X_grid = np.c_[X1.ravel(), X2.ravel()]
decision_values = classifier.decision_function(X_grid)
levels = [0]
fill_levels = [decision_values.min(), 0, decision_values.max()]
except AttributeError:
# no decision_function
decision_values = classifier.predict_proba(X_grid)[:, 1]
levels = [.5]
fill_levels = [0, .5, 1]
if ax is None:
ax = plt.gca()
if fill:
ax.contourf(X1, X2, decision_values.reshape(X1.shape),
levels=fill_levels, colors=['tab:blue', 'tab:orange'],
ax.contour(X1, X2, decision_values.reshape(X1.shape), levels=levels,
ax.set_xlim(x_min, x_max)
ax.set_ylim(y_min, y_max)
This code was developed there

How to count vehicles using opencv in python?

I am working on a VCS (vehicle counting system) project. The scope of the project is to classify and count vehicles. I have built a custom model using Faster-RCNN in Tensorflow-object-detection-API This model only contains 7 classes such as car motorbike, bicycle and etc. The model works perfectly, But, the problem is "COUNTING". It is very hard to count vehicles in video frame. I did a pre-research on the internet. I tried a lot. but i could not find any useful information. There are some projects on github, they use tracking methods.
I want the following things. I want to draw an horizontal line in the frame. when the vehicle touch it, the counting should take place. How to do it. I don't know the algorithm behind it. I heard that centroid tracking would help me.
My question is, i want to count vehicles when it touch the horizontal line. I have linked a sample image bellow.
import os
import cv2
import numpy as np
import tensorflow as tf
import sys
# This is needed since the notebook is stored in the object_detection folder.
# Import utilites
from utils import label_map_util
from utils import visualization_utils as vis_util
# Name of the directory containing the object detection module we're using
MODEL_NAME = 'inference_graph'
VIDEO_NAME = 'Video_105.mp4'
# Grab path to current working directory
CWD_PATH = os.getcwd()
# Path to frozen detection graph .pb file, which contains the model that is used
# for object detection.
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,'frozen_inference_graph.pb')
# Path to label map file
PATH_TO_LABELS = os.path.join(CWD_PATH,'training','labelmap.pbtxt')
# Path to video
# Number of classes the object detector can identify
# Load the label map.
# Label maps map indices to category names, so that when our convolution
# network predicts `5`, we know that this corresponds to `king`.
# Here we use internal utility functions, but anything that returns a
# dictionary mapping integers to appropriate string labels would be fine
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
# Load the Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph =
tf.import_graph_def(od_graph_def, name='')
sess = tf.Session(graph=detection_graph)
# Define input and output tensors (i.e. data) for the object detection classifier
# Input tensor is the image
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# Output tensors are the detection boxes, scores, and classes
# Each box represents a part of the image where a particular object was detected
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
# Each score represents level of confidence for each of the objects.
# The score is shown on the result image, together with the class label.
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
# Number of objects detected
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# Open video file
video = cv2.VideoCapture(PATH_TO_VIDEO)
# Acquire frame and expand frame dimensions to have shape: [1, None, None, 3]
# i.e. a single-column array, where each item in the column has the pixel RGB value
ret, frame =
frame_expanded = np.expand_dims(frame, axis=0)
# Perform the actual detection by running the model with the image as input
(boxes, scores, classes, num) =
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: frame_expanded})
# Draw the results of the detection (aka 'visulaize the results')
# All the results have been drawn on the frame, so it's time to display it.
final_score = np.squeeze(scores)
count = 0
cv2.line(frame, (1144, 568), (1723,664), (0,0,255), 2) #Line
for i in range(100):
if scores is None or final_score[i] > 0.90:
min_score_thresh = 0.90
bboxes = boxes[scores > min_score_thresh]
im_width = video.get(cv2.CAP_PROP_FRAME_WIDTH)
im_height = video.get(cv2.CAP_PROP_FRAME_HEIGHT)
final_box = []
for box in bboxes:
ymin, xmin, ymax, xmax = box
final_box.append([xmin * im_width, xmax * im_width, ymin * im_height, ymax * im_height])
cv2.imshow('Object detector', frame)
# Press 'q' to quit
if cv2.waitKey(1) == ord('q'):
# Clean up
# import the necessary packages
from import VideoStream
from import FPS
import argparse
import imutils
import time
import cv2
tracker = cv2.TrackerCSRT_create()
vs = cv2.VideoCapture("Video.mp4")
initBB = None
detec = []
def pega_centro(x, y, w, h):
x1 = int(w / 2)
y1 = int(h / 2)
cx = x + x1
cy = y + y1
return cx,cy
roi = 480
counter = 0
offset = 6
# loop over frames from the video stream
while vs.isOpened():
ret,frame =
cv2.line(frame, (769 , roi), (1298 , roi), (255,0,0), 3)
# check to see if we are currently tracking an object
if initBB is not None:
# grab the new bounding box coordinates of the object
(success, box) = tracker.update(frame)
# check to see if the tracking was a success
if success:
(x, y, w, h) = [int(v) for v in box]
cv2.rectangle(frame, (x, y), (x + w, y + h),
(0, 255, 0), 2)
cX = int((x + x+w) / 2.0)
cY = int((y + y+h) / 2.0), (cX, cY), 3, (0, 0, 255), -1)
c=pega_centro(x, y, w, h)
for (x,y) in detec:
if y<(roi+offset) and y>(roi-offset):
cv2.line(frame, (769 , roi), (1298 , roi), (0,0,255), 3)
# show the output frame
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
if key == ord("s"):
# select the bounding box of the object we want to track (make
# sure you press ENTER or SPACE after selecting the ROI)
initBB = cv2.selectROI("Frame", frame, fromCenter=False,
# start OpenCV object tracker using the supplied bounding box
# coordinates, then start the FPS throughput estimator as well
tracker.init(frame, initBB)
fps = FPS().start()
# if the `q` key was pressed, break from the loop
elif key == ord("q"):

OpenCV 2.4 estimateAffine3D in Python

I'm trying to use the method cv2.estimateAffine3D but without success. Here is my code sample :
import numpy as np
import cv2
shape = (1, 4, 3)
source = np.zeros(shape, np.float32)
# [x, y, z]
source[0][0] = [857, 120, 854]
source[0][1] = [254, 120, 855]
source[0][2] = [256, 120, 255]
source[0][3] = [858, 120, 255]
target = source * 10
retval, M, inliers = cv2.estimateAffine3D(source, target)
When I try to run this sample, I obtain the same error as this other post here.
I'm using OpenCV 2.4.3 and Python 2.7.3
Please help me!
This is a known bug that is fixed in 2.4.4.
If you just need rigid (rotation + translation) alignment, here's the standard method:
def get_rigid(src, dst): # Assumes both or Nx3 matrices
src_mean = src.mean(0)
dst_mean = dst.mean(0)
# Compute covariance
H = reduce(lambda s, (a,b) : s + np.outer(a, b), zip(src - src_mean, dst - dst_mean), np.zeros((3,3)))
u, s, v = np.linalg.svd(H)
R = # Rotation
T = - + dst_mean # Translation
return np.hstack((R, T[:, np.newaxis]))
Change covariance toH = reduce(lambda s, a: s + np.outer(a[0], a[1]), zip(src - src_mean, dst - dst_mean), np.zeros((3,3)))
for python3 in previous post. Can't comment bacause of reputation score.

Cropping A Detected Object On A Video With Tensorflow Api And Opencv

-Python 3.6
-Tensorflow 1.11 with GPU support.
-Opencv 3.4.2
I am working on Tensorflow Api, and I have already trained my dataset. It works fine. But I have to crop the detected object and make some preprocess on it. It seems easy, because Tensroflow draws the detected object with green box as well. When I try to find the coordinates of the object it gives me numbers of range 0 to 1. When I put the coordinates on Opencv Crop Image I have to multply the image with pictures height and width, but it works wrong. says that I can use "tf.image.crop_and_resize" function. But I can't run it on my own code.
This is my run_inference_for_single_image function and returns output_dict:
def run_inference_for_single_image(image, graph):
with graph.as_default():
#with tf.Session() as sess:
# Get handles to input and output tensors
ops = tf.get_default_graph().get_operations()
all_tensor_names = { for op in ops for output in op.outputs}
tensor_dict = {}
for key in [
'num_detections', 'detection_boxes', 'detection_scores',
'detection_classes', 'detection_masks'
tensor_name = key + ':0'
if tensor_name in all_tensor_names:
tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
if 'detection_masks' in tensor_dict:
# The following processing is only for single image
detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
# Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image.shape[0], image.shape[1])
detection_masks_reframed = tf.cast(
tf.greater(detection_masks_reframed, 0.5), tf.uint8)
# Follow the convention by adding back the batch dimension
tensor_dict['detection_masks'] = tf.expand_dims(
detection_masks_reframed, 0)
image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
# Run inference
output_dict =,
feed_dict={image_tensor: np.expand_dims(image, 0)})
# all outputs are float32 numpy arrays, so convert types as appropriate
output_dict['num_detections'] = int(output_dict['num_detections'][0])
output_dict['detection_classes'] = output_dict[
output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
output_dict['detection_scores'] = output_dict['detection_scores'][0]
if 'detection_masks' in output_dict:
output_dict['detection_masks'] = output_dict['detection_masks'][0]
return output_dict
This is my video capture funtion. It Crops the wrong coordinates.
video = cv2.VideoCapture(0)
ret = video.set(3,1080)
ret = video.set(4,720)
# Acquire frame and expand frame dimensions to have shape: [1, None, None, 3]
# i.e. a single-column array, where each item in the column has the pixel RGB value
ret,frame =
frame = cv2.flip(frame, 1)
frame_expanded = np.expand_dims(frame, axis=0)
# Perform the actual detection by running the model with the image as input
(boxes, scores, classes, num) =
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: frame_expanded})
# Draw the results of the detection (aka 'visulaize the results')
output_dict = run_inference_for_single_image(frame, detection_graph)
max_boxes_to_draw = output_dict['detection_boxes'].shape[0]
for i in range(min(max_boxes_to_draw, output_dict['detection_boxes'].shape[0])):
if output_dict['detection_scores'][i] > 0.95:
if output_dict['detection_classes'][i] in category_index.keys():
class_name = category_index[output_dict['detection_classes'][i]]['name']
crop_img = frame[int((output_dict['detection_boxes'][i][0]) * 720): int(
(output_dict['detection_boxes'][i][2]) * 720),
int((output_dict['detection_boxes'][i][1]) * 1080):int(
(output_dict['detection_boxes'][i][3]) * 1080)]
cv2.imshow("asdasd", crop_img)
cv2.imshow('Object detector', frame)
# Press 'q' to quit
if cv2.waitKey(1) == ord('q'):
It might be about output_dict.
class_name = category_index[output_dict['detection_classes'][i]]['name'] => This codes give me the name of the class. It works well.
I found an answer for my question. This is the solution code:
# Acquire frame and expand frame dimensions to have shape: [1, None, None, 3]
# i.e. a single-column array, where each item in the column has the pixel RGB value
ret,frame =
frame = cv2.flip(frame, 1)
frame_expanded = np.expand_dims(frame, axis=0)
# Perform the actual detection by running the model with the image as input
(boxes, scores, classes, num) =
[detection_boxes, detection_scores, detection_classes, num_detections],
feed_dict={image_tensor: frame_expanded})
# Draw the results of the detection (aka 'visulaize the results')
output_dict = run_inference_for_single_image(frame, detection_graph)
max_boxes_to_draw = output_dict['detection_boxes'].shape[0]
for i in range(min(max_boxes_to_draw, output_dict['detection_boxes'].shape[0])):
if output_dict['detection_scores'][i] > 0.80:
if output_dict['detection_classes'][i] in category_index.keys():
class_name = category_index[output_dict['detection_classes'][i]]['name']
ymin = boxes[0, i, 0]
xmin = boxes[0, i, 1]
ymax = boxes[0, i, 2]
xmax = boxes[0, i, 3]
im_width = 1280
im_height = 720
(xminn, xmaxx, yminn, ymaxx) = (xmin * im_width, xmax * im_width, ymin * im_height, ymax * im_height)
crop_img=tf.image.crop_to_bounding_box(frame,int(yminn), int(xminn), int(ymaxx-yminn), int(xmaxx-xminn))
# print(
"""crop_img = frame[int((output_dict['detection_boxes'][i][0]) * 720): int(
(output_dict['detection_boxes'][i][2]) * 720),
int((output_dict['detection_boxes'][i][1]) * 1080):int(
(output_dict['detection_boxes'][i][3]) * 1080)]"""
cv2.imshow('Object detector', frame)
# Press 'q' to quit
if cv2.waitKey(1) == ord('q'):
im_width = 1280 means nothing for me but it works on my project, but It works. Thanks for helps.
