OpenCV TypeError: Expected Ptr<cv::UMat> for argument 'img' - What is this? - opencv

I am not able to understand what type of error this is..i have written a python script which accepts image input from the user and runs emotion detection.
here's the code:
import numpy as np
import os
import sys
import tensorflow as tf
import json
from PIL import Image
sys.path.append("..")
from object_detection.utils import ops as utils_ops
from utils import label_map_util
from utils import visualization_utils as vis_util
def Image_tensorflow(xa,ya):
PATH_TO_FROZEN_GRAPH = 'frozen_inference_graph.pb'
PATH_TO_LABELS = 'object-detection.pbtxt'
NUM_CLASSES = 4
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
use_display_name=True)
category_index = label_map_util.create_category_index(categories)
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
def image_url(xa, ya):
file_path = 'images/'
file_name = ya
image = xa
f = open((file_path + str(file_name) + ".json"), "w")
f.close
return_dict = {'image': image, 'file': f};
return return_dict
get_image_data = image_url(xa,ya)
image_path= get_image_data['image']
IMAGE_SIZE = (12, 8)
def run_inference_for_single_image(image, graph):
with graph.as_default():
with tf.Session() as sess:
# Get handles to input and output tensors
ops = tf.get_default_graph().get_operations()
all_tensor_names = {output.name for op in ops for output in op.outputs}
tensor_dict = {}
for key in [
'num_detections', 'detection_boxes', 'detection_scores',
'detection_classes', 'detection_masks'
]:
tensor_name = key + ':0'
if tensor_name in all_tensor_names:
tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
tensor_name)
if 'detection_masks' in tensor_dict:
# The following processing is only for single image
detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
# Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image.shape[0], image.shape[1])
detection_masks_reframed = tf.cast(
tf.greater(detection_masks_reframed, 0.5), tf.uint8)
tensor_dict['detection_masks'] = tf.expand_dims(
detection_masks_reframed, 0)
image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
output_dict = sess.run(tensor_dict,
feed_dict={image_tensor: np.expand_dims(image, 0)})
output_dict['num_detections'] = int(output_dict['num_detections'][0])
output_dict['detection_classes'] = output_dict[
'detection_classes'][0].astype(np.uint8)
output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
output_dict['detection_scores'] = output_dict['detection_scores'][0]
if 'detection_masks' in output_dict:
output_dict['detection_masks'] = output_dict['detection_masks'][0]
return output_dict
for img in xa:
image = Image.open(img)
image_np = load_image_into_numpy_array(image)
image_np_expanded = np.expand_dims(image_np, axis=0)
# Actual detection.
output_dict = run_inference_for_single_image(image_np, detection_graph)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks'),
use_normalized_coordinates=True,
line_thickness=8)
# get_image_data = image_url(sys.argv[1],sys.argv[2])
# image_file = get_image_data['image']
# pass values
import cv2 as cv
image_file = image_path
img = cv.imread('image_file')
i = 0
j = 0
limiter = 0.3
while (i < 100):
if (output_dict['detection_scores'][i] > limiter):
j = j + 1
i = i + 1
# In[17]:
# store the pass values in lists
i = 0
detection_classes = []
detection_boxes = [[]] * j
detection_scores = []
while (i < j):
detection_classes.append(output_dict['detection_classes'][i])
detection_scores.append(output_dict['detection_scores'][i])
detection_boxes[i].append(output_dict['detection_boxes'][i])
i = i + 1
list1 = []
for items in detection_classes:
if items == 1:
list1.append("Angry")
elif items == 2:
list1.append("Sad")
elif items == 3:
list1.append("Neutral")
elif items == 4:
list1.append("Happy")
final_dict = {'DETECTION': list1}
file_to_write_to = get_image_data['file'].name
file_to_write_to = str(file_to_write_to)
text_file = open(file_to_write_to, "w")
text_file.write(json.dumps(final_dict))
text_file.close()
final_path = "images/" + str(ya) + "_annotated" + ".jpg"
# draw bounding boxes
img = cv.imread('xa')
i = 0
for item in detection_classes:
width, height = image.size
ymin = int(detection_boxes[0][i][0] * height)
xmin = int(detection_boxes[0][i][1] * width)
ymax = int(detection_boxes[0][i][2] * height)
xmax = int(detection_boxes[0][i][3] * width)
font = cv.FONT_HERSHEY_SIMPLEX
panel_colour = (182, 182, 42)
bumper_colour = (241, 239, 236)
damage_colour = (0, 255, 0)
text_colour = (255, 255, 255)
bumper_text = (0, 0, 0)
buffer = int(5 * width / 1000)
if (detection_classes[i] == 1):
img = cv.rectangle(img, (xmin, ymin), (xmax, ymax), panel_colour, int(2 * (height / 600)))
cv.rectangle(img, (xmin, (ymin + (buffer * 8))), (xmax, ymin), panel_colour, -1)
cv.putText(img, 'angry', (xmin, (ymin + (buffer * 6))), font, 0.8 * (height / 500), text_colour,
int(2 * (height / 400)), cv.LINE_AA)
elif (detection_classes[i] == 2):
img = cv.rectangle(img, (xmin, ymin), (xmax, ymax), panel_colour, int(2 * (height / 600)))
cv.rectangle(img, (xmin, (ymin + (buffer * 8))), (xmax, ymin), panel_colour, -1)
cv.putText(img, 'sad', (xmin, (ymin + (buffer * 6))), font, 0.8 * (height / 500), text_colour,
int(2 * (height / 400)), cv.LINE_AA)
elif (detection_classes[i] == 3):
img = cv.rectangle(img, (xmin, ymin), (xmax, ymax), bumper_colour, int(2 * (height / 600)))
cv.rectangle(img, (xmin, (ymin + (buffer * 8))), (xmax, ymin), bumper_colour, -1)
cv.putText(img, 'neutral', (xmin, (ymin + (buffer * 6))), font, 0.8 * (height / 500), bumper_text,
int(2 * (height / 400)), cv.LINE_AA)
elif (detection_classes[i] == 4):
img = cv.rectangle(img, (xmin, ymin), (xmax, ymax), panel_colour, int(2 * (height / 600)))
cv.rectangle(img, (xmin, (ymin + (buffer * 8))), (xmax, ymin), panel_colour, -1)
cv.putText(img, 'happy', (xmin, (ymin + (buffer * 6))), font, 0.8 * (height / 500), text_colour,
int(2 * (height / 400)), cv.LINE_AA)
i = i + 1
final_path = "/home/mayureshk/PycharmProjects/ImageDetection/venv/models/research/object_detection/images/" + str(ya) + "_annotated" + ".jpg"
cv.imwrite(final_path, img)
Stuck on this problem since 2 days and i am unable to solve it by myself. Need help from an OpenCV expert.. What exactly am I doing wrong here?

Try to print your image and check if it is None or corrupted.
import cv2 as cv
image_file = image_path
img = cv.imread('image_file') # here is mistake image_file is variable but you have taken it as string.
cv2.imread('image_file')
try with
cv2.imread(image_file)

Related

i can't grab frame 2 object has no attribute

i want to make a separate tracking but the total inside is the sum of each track but it just keep giving me frame2 = imutils.resize(frame2, width = 500)
pture_MSMF::grabFr File "D:\pyli\lib\site-packages\imutils\convenience.py", line 69, in resize
ame videoio(MSMF): can't grab frame. Error: -2147023901
(h, w) = image.shape[:2].
from mylib.centroidtracker import CentroidTracker
from mylib.trackableobject import TrackableObject
from imutils.video import VideoStream
from imutils.video import FPS
from mylib.mailer import Mailer
from mylib import config, thread
import time, schedule, csv
import numpy as np
import argparse, imutils
import time, dlib, cv2, datetime
from itertools import zip_longest
t0 = time.time()
def run():
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--prototxt", required=False,
help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model", required=True,
help="path to Caffe pre-trained model")
ap.add_argument("-i", "--input", type=str,
help="path to optional input video file")
ap.add_argument("-o", "--output", type=str,
help="path to optional output video file")
ap.add_argument("-c", "--confidence", type=float, default=0.4,
help="minimum probability to filter weak detections")
ap.add_argument("-s", "--skip-frames", type=int, default=30,
help="# of skip frames between detections")
args = vars(ap.parse_args())
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
"sofa", "train", "tvmonitor"]
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
net2 = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
if not args.get("input", False):
print("[INFO] Starting live cam 1 & 2..")
vs = VideoStream(config.url).start()
vs2 = VideoStream(config.url1).start()
time.sleep(2.0)
writer = None
W = None
H = None
ct = CentroidTracker(maxDisappeared=10, maxDistance=100)
trackers = []
trackableObjects = {}
ct2 = CentroidTracker(maxDisappeared=10, maxDistance=100)
trackers2 = []
trackableObjects2 = {}
totalFrames = 0
totalDown = 0
totalUp = 0
x = []
empty=[]
empty1=[]
totalFrames2 = 0
totalDown2 = 0
totalUp2 = 0
x2 = []
empty2=[]
empty3=[]
fps = FPS().start()
if config.Thread:
vs = thread.ThreadingClass(config.url)
vs2 = thread.ThreadingClass(config.url1)
while True:
frame = vs.read()
frame = frame[1] if args.get("input", False) else frame
frame2 = vs2.read()
frame2 = frame2[1] if args.get("input", False) else frame2
frame = imutils.resize(frame, width = 500)
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame2 = imutils.resize(frame2, width = 500)
rgb2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2RGB)
if W is None or H is None:
(H, W) = frame.shape[:2]
(H, W) = frame2.shape[:2]
status = "Waiting"
rects = []
status2 = "Waiting"
rects2 = []
if totalFrames % args["skip_frames"] == 0:
status = "Detecting"
trackers = []
blob = cv2.dnn.blobFromImage(frame, 0.007843, (W, H), 127.5)
net.setInput(blob)
detections = net.forward()
for i in np.arange(0, detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > args["confidence"]:
idx = int(detections[0, 0, i, 1])
if CLASSES[idx] != "person":
continue
box = detections[0, 0, i, 3:7] * np.array([W, H, W, H])
(startX, startY, endX, endY) = box.astype("int")
tracker = dlib.correlation_tracker()
rect = dlib.rectangle(startX, startY, endX, endY)
tracker.start_track(rgb, rect)
trackers.append(tracker)
else:
for tracker in trackers:
status = "Tracking"
tracker.update(rgb)
pos = tracker.get_position()
startX = int(pos.left())
startY = int(pos.top())
endX = int(pos.right())
endY = int(pos.bottom())
rects.append((startX, startY, endX, endY))
if totalFrames2 % args["skip_frames"] == 0:
status2 = "Detecting"
trackers2 = []
blob2 = cv2.dnn.blobFromImage(frame2, 0.007843, (W, H), 127.5)
net2.setInput(blob2)
detections2 = net2.forward()
for i in np.arange(0, detections2.shape[2]):
confidence2 = detections2[0, 0, i, 2]
if confidence2 > args["confidence"]:
idx2 = int(detections2[0, 0, i, 1])
if CLASSES[idx2] != "person":
continue
box2 = detections2[0, 0, i, 3:7] * np.array([W, H, W, H])
(startX2, startY2, endX2, endY2) = box2.astype("int")
trackers2 = dlib.correlation_tracker()
rects2 = dlib.rectangle(startX2, startY2, endX2, endY2)
tracker2.start_track(rgb2, rects2)
trackers2.append(tracker2)
else:
for tracker2 in trackers2:
status2 = "Tracking"
tracker2.update(rgb2)
pos2 = tracker2.get_position()
startX2 = int(pos2.left())
startY2 = int(pos2.top())
endX2 = int(pos2.right())
endY2 = int(pos2.bottom())
rects2.append((startX2, startY2, endX2, endY2))
cv2.line(frame, (0, H // 2), (W, H // 2), (0, 0, 0), 3)
cv2.putText(frame, "-Prediction border - Entrance-", (10, H - ((i * 20) + 200)),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
cv2.line(frame2, (0, H // 2), (W, H // 2), (0, 0, 0), 3)
cv2.putText(frame2, "-Prediction border - Entrance-", (10, H - ((i * 20) + 200)),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
objects = ct.update(rects)
objects2 = ct2.update(rects2)
for (objectID, centroid) in objects.items():
to = trackableObjects.get(objectID, None)
if to is None:
to = TrackableObject(objectID, centroid)
else:
y = [c[1] for c in to.centroids]
direction = centroid[1] - np.mean(y)
to.centroids.append(centroid)
if not to.counted:
if direction < 0 and centroid[1] < H // 2:
totalUp += 1
empty.append(totalUp)
to.counted = True
elif direction > 0 and centroid[1] > H // 2:
totalDown += 1
empty1.append(totalDown)
if sum(x) >= config.Threshold:
cv2.putText(frame, "-ALERT: People limit exceeded-", (10, frame.shape[0] - 80),
cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 2)
if config.ALERT:
print("[INFO] Sending email alert..")
Mailer().send(config.MAIL)
print("[INFO] Alert sent")
to.counted = True
x = []
x.append(len(empty1)-len(empty))
trackableObjects[objectID] = to
text = "ID {}".format(objectID)
cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
cv2.circle(frame, (centroid[0], centroid[1]), 4, (255, 255, 255), -1)
for (objectID2, centroid2) in objects2.items():
to2 = trackableObjects2.get(objectID2, None)
if to2 is None:
to2 = TrackableObject(objectID2, centroid2)
else:
y2 = [c[1] for c in to2.centroids]
direction2 = centroid2[1] - np.mean(y2)
to2.centroids.append(centroid2)
if not to2.counted2:
if direction2 < 0 and centroid2[1] < H // 2:
totalUp2 += 1
empty2.append(totalUp2)
to2.counted2 = True
elif direction2 > 0 and centroid2[1] > H // 2:
totalDown2 += 1
empty3.append(totalDown2)
if sum(x) >= config.Threshold:
cv2.putText(frame2, "-ALERT: People limit exceeded-", (10, frame2.shape[0] - 80),
cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 2)
if config.ALERT:
print("[INFO] Sending email alert..")
Mailer().send(config.MAIL)
print("[INFO] Alert sent")
to2.counted2 = True
x2 = []
x2.append(len(empty3)-len(empty2))
trackableObjects2[objectID2] = to2
text2 = "ID2 {}".format(objectID2)
cv2.putText(frame2, text2, (centroid2[0] - 10, centroid2[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
cv2.circle(frame2, (centroid2[0], centroid2[1]), 4, (255, 255, 255), -1)
info = [
("Exit", totalUp),
("Enter", totalDown),
("Status", status),
]
info3 = [
("Total people inside", x+x2),
]
info2 = [
("Exit", totalUp2),
("Enter", totalDown2),
("Status", status2),
]
for (i, (k, v)) in enumerate(info):
text = "{}: {}".format(k, v)
cv2.putText(frame, text, (10, H - ((i * 20) + 20)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)
for (i, (k, v)) in enumerate(info3):
text = "{}: {}".format(k, v)
cv2.putText(frame, text, (265, H - ((i * 20) + 60)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
text2 = "{}: {}".format(k, v)
cv2.putText(frame2, text2, (265, H - ((i * 20) + 60)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
for (i, (k, v)) in enumerate(info2):
text2 = "{}: {}".format(k, v)
cv2.putText(frame2, text2, (265, H - ((i * 20) + 60)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
if config.Log:
datetimee = [datetime.datetime.now()]
d = [datetimee, empty1+empty3, empty+empty2, x+x2]
export_data = zip_longest(*d, fillvalue = '')
with open('Log.csv', 'w', newline='') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow(("End Time", "In", "Out", "Total Inside"))
wr.writerows(export_data)
if writer is not None:
writer.write(frame)
writer.write(frame2)
cv2.imshow("Real-Time Monitoring/Analysis Window", frame)
cv2.imshow("Real-Time Monitoring 2", frame2)
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
break
totalFrames += 1
totalFrames2 += 1
fps.update()
if config.Timer:
t1 = time.time()
num_seconds=(t1-t0)
if num_seconds > 28800:
break
fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
if config.Thread:
vs.release()
vs2.release()
cv2.destroyAllWindows()
if config.Scheduler:
schedule.every().day.at("09:00").do(run)
while 1:
schedule.run_pending()
else:
run()

How can i convert a point cloud data `(x, y, z)` into a depth map where `(x, y)` has depth `z`?

I got point cloud data in the form of [(x, y, z) , (norm_x, norm_y, norm_z)] in a text file. I am trying to convert this into a png or jpg image file where any points intensity corresponds to its depth (z).
here is how an stl 3d file looks like (left). On the right is what i am trying to make.
Thank you all for taking time to read this.
x_min = -2
x_max = 2
y_min = -1
y_max = 1
z_min = 0
z_max = 1
dx = x_max - x_min
dy = y_max - y_min
dz = z_max - z_min
Ps = []
for (i = 0; i < 1000; ++i) Ps.push([x_min + Math.random()*dx, y_min + Math.random()*dy, z_min + Math.random()*dz])
width = canvas.width
height = canvas.height
context = canvas.getContext('2d')
context.setFillColor('#000000')
context.fillRect(0, 0, width, height)
imagedata = context.getImageData(0, 0, width, height)
data = imagedata.data
w = width - 1
h = height - 1
for (P of Ps) {
col = Math.round(((P[0] - x_min)/dx)*w)
row = Math.round(((y_max - P[1])/dy)*h)
val = ((P[2] - z_min)/dz)*255
i = 4*(width*row + col)
if (data[i] < val) data[i] = data[i + 1] = data[i + 2] = val
}
context.putImageData(imagedata, 0, 0)
a.href = canvas.toDataURL()
<canvas id=canvas>HTML5</canvas><br><a id=a>Download</a>

Unexpected roll and pitch extracted from GLKQuaternion

I use quaternion from CMAttitude to rotate SceneKit camera.
Also I need to use Y rotation angle extracted from the quaternion.
I expected that it would be roll, but after extraction Y rotation angle corresponds to the pitch which has -90:90 range.
How can I convert this range to 0:180 or 0:360?
- (SCNQuaternion)SCNQuaternionFromCMQuaternion:(CMQuaternion)q {
GLKQuaternion Q = GLKQuaternionMake(q.x, q.y, q.z, q.w);
GLKQuaternion xRotQ = GLKQuaternionMakeWithAngleAndAxis(-M_PI_2, 1, 0, 0);
Q = GLKQuaternionMultiply(xRotQ, Q);
double roll = atan2(2.0 * (Q.y * Q.z - Q.w * Q.x), 1.0 - 2.0 * (Q.x * Q.x + Q.y * Q.y)); // 0:180 but around X
double pitch = RADIANS_TO_DEGREES(asin(-2.0f * (Q.x * Q.z + Q.w * Q.y))); // 0:90 around Y
NSLog(#"%f", pitch);
// ...
CMQuaternion rq = {.x = Q.x, .y = Q.y, .z = Q.z, .w = Q.w};
return SCNVector4Make(rq.x, rq.y, rq.z, rq.w);
}
I found this way:
- (SCNQuaternion)SCNQuaternionFromCMQuaternion:(CMQuaternion)q {
GLKQuaternion Q = GLKQuaternionMake(q.x, q.y, q.z, q.w);
GLKQuaternion xRotQ = GLKQuaternionMakeWithAngleAndAxis(-M_PI_2, 1, 0, 0);
Q = GLKQuaternionMultiply(xRotQ, Q);
double gx = 2.0 * (Q.y * Q.w - Q.x * Q.z);
//double gy = 2.0 * (Q.x * Q.y + Q.z * Q.w);
double gz = Q.x * Q.x - Q.y * Q.y - Q.z * Q.z + Q.w * Q.w;
double pitch = RADIANS_TO_DEGREES(-asin( -2.0 * (Q.y * Q.w - Q.x * Q.z)));
if (gx >= 0 && gz < 0)
pitch = 180 - pitch;
else if (gx < 0 && gz < 0)
pitch = 180 - pitch;
else if (gx < 0 && gz >= 0)
pitch = 360 + pitch;
NSLog(#"%f", pitch); // now it has 0-360 range
CMQuaternion rq = {.x = Q.x, .y = Q.y, .z = Q.z, .w = Q.w};
return SCNVector4Make(rq.x, rq.y, rq.z, rq.w);
}

OpenCV SVM Kernel Sample

The OpenCV docs give the following SVM kernel type example:
A comparison of different kernels on the following 2D test case with four classes. Four SVM::C_SVC SVMs have been trained (one against rest) with auto_train. Evaluation on three different kernels (SVM::CHI2, SVM::INTER, SVM::RBF). The color depicts the class with max score. Bright means max-score > 0, dark means max-score < 0.
Where can I find the sample code that generates this example?
Specifically, the SVM predict() method presumably returns a label value and not a max-score. How can it return a max-score?
Note that the quote states that it uses SVM::C_SVC which is a classification, not a regression, type.
You can get the score with 2-class SVM, and if you pass RAW_OUTPUT to predict:
// svm.cpp, SVMImpl::predict(...) , line 1917
bool returnDFVal = (flags & RAW_OUTPUT) != 0;
// svm.cpp, PredictBody::operator(), line 1896,
float result = returnDFVal && class_count == 2 ?
(float)sum : (float)(svm->class_labels.at<int>(k));
Then you need to train 4 different 2 class SVM, one against rest.
These are the result I get on these samples:
INTER with trainAuto
CHI2 with trainAuto
RBF with train (C = 0.1, gamma = 0.001) (trainAuto overfits in this case)
Here is the code. You can enable trainAuto with AUTO_TRAIN_ENABLED boolean variable, and you can set the KERNEL as well as images dimensions, etc.
#include <opencv2/opencv.hpp>
#include <vector>
#include <algorithm>
using namespace std;
using namespace cv;
using namespace cv::ml;
int main()
{
const int WIDTH = 512;
const int HEIGHT = 512;
const int N_SAMPLES_PER_CLASS = 10;
const float NON_LINEAR_SAMPLES_RATIO = 0.1;
const int KERNEL = SVM::CHI2;
const bool AUTO_TRAIN_ENABLED = false;
int N_NON_LINEAR_SAMPLES = N_SAMPLES_PER_CLASS * NON_LINEAR_SAMPLES_RATIO;
int N_LINEAR_SAMPLES = N_SAMPLES_PER_CLASS - N_NON_LINEAR_SAMPLES;
vector<Scalar> colors{Scalar(255,0,0), Scalar(0,255,0), Scalar(0,0,255), Scalar(0,255,255)};
vector<Vec3b> colorsv{ Vec3b(255, 0, 0), Vec3b(0, 255, 0), Vec3b(0, 0, 255), Vec3b(0, 255, 255) };
vector<Vec3b> colorsv_shaded{ Vec3b(200, 0, 0), Vec3b(0, 200, 0), Vec3b(0, 0, 200), Vec3b(0, 200, 200) };
Mat1f data(4 * N_SAMPLES_PER_CLASS, 2);
Mat1i labels(4 * N_SAMPLES_PER_CLASS, 1);
RNG rng(0);
////////////////////////
// Set training data
////////////////////////
// Class 1
Mat1f class1 = data.rowRange(0, 0.5 * N_LINEAR_SAMPLES);
Mat1f x1 = class1.colRange(0, 1);
Mat1f y1 = class1.colRange(1, 2);
rng.fill(x1, RNG::UNIFORM, Scalar(1), Scalar(WIDTH));
rng.fill(y1, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT / 8));
class1 = data.rowRange(0.5 * N_LINEAR_SAMPLES, 1 * N_LINEAR_SAMPLES);
x1 = class1.colRange(0, 1);
y1 = class1.colRange(1, 2);
rng.fill(x1, RNG::UNIFORM, Scalar(1), Scalar(WIDTH));
rng.fill(y1, RNG::UNIFORM, Scalar(7*HEIGHT / 8), Scalar(HEIGHT));
class1 = data.rowRange(N_LINEAR_SAMPLES, 1 * N_SAMPLES_PER_CLASS);
x1 = class1.colRange(0, 1);
y1 = class1.colRange(1, 2);
rng.fill(x1, RNG::UNIFORM, Scalar(1), Scalar(WIDTH));
rng.fill(y1, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT));
// Class 2
Mat1f class2 = data.rowRange(N_SAMPLES_PER_CLASS, N_SAMPLES_PER_CLASS + N_LINEAR_SAMPLES);
Mat1f x2 = class2.colRange(0, 1);
Mat1f y2 = class2.colRange(1, 2);
rng.fill(x2, RNG::NORMAL, Scalar(3 * WIDTH / 4), Scalar(WIDTH/16));
rng.fill(y2, RNG::NORMAL, Scalar(HEIGHT / 2), Scalar(HEIGHT/4));
class2 = data.rowRange(N_SAMPLES_PER_CLASS + N_LINEAR_SAMPLES, 2 * N_SAMPLES_PER_CLASS);
x2 = class2.colRange(0, 1);
y2 = class2.colRange(1, 2);
rng.fill(x2, RNG::UNIFORM, Scalar(1), Scalar(WIDTH));
rng.fill(y2, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT));
// Class 3
Mat1f class3 = data.rowRange(2 * N_SAMPLES_PER_CLASS, 2 * N_SAMPLES_PER_CLASS + N_LINEAR_SAMPLES);
Mat1f x3 = class3.colRange(0, 1);
Mat1f y3 = class3.colRange(1, 2);
rng.fill(x3, RNG::NORMAL, Scalar(WIDTH / 4), Scalar(WIDTH/8));
rng.fill(y3, RNG::NORMAL, Scalar(HEIGHT / 2), Scalar(HEIGHT/8));
class3 = data.rowRange(2*N_SAMPLES_PER_CLASS + N_LINEAR_SAMPLES, 3 * N_SAMPLES_PER_CLASS);
x3 = class3.colRange(0, 1);
y3 = class3.colRange(1, 2);
rng.fill(x3, RNG::UNIFORM, Scalar(1), Scalar(WIDTH));
rng.fill(y3, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT));
// Class 4
Mat1f class4 = data.rowRange(3 * N_SAMPLES_PER_CLASS, 3 * N_SAMPLES_PER_CLASS + 0.5 * N_LINEAR_SAMPLES);
Mat1f x4 = class4.colRange(0, 1);
Mat1f y4 = class4.colRange(1, 2);
rng.fill(x4, RNG::NORMAL, Scalar(WIDTH / 2), Scalar(WIDTH / 16));
rng.fill(y4, RNG::NORMAL, Scalar(HEIGHT / 4), Scalar(HEIGHT / 16));
class4 = data.rowRange(3 * N_SAMPLES_PER_CLASS + 0.5 * N_LINEAR_SAMPLES, 3 * N_SAMPLES_PER_CLASS + N_LINEAR_SAMPLES);
x4 = class4.colRange(0, 1);
y4 = class4.colRange(1, 2);
rng.fill(x4, RNG::NORMAL, Scalar(WIDTH / 2), Scalar(WIDTH / 16));
rng.fill(y4, RNG::NORMAL, Scalar(3 * HEIGHT / 4), Scalar(HEIGHT / 16));
class4 = data.rowRange(3 * N_SAMPLES_PER_CLASS + N_LINEAR_SAMPLES, 4 * N_SAMPLES_PER_CLASS);
x4 = class4.colRange(0, 1);
y4 = class4.colRange(1, 2);
rng.fill(x4, RNG::UNIFORM, Scalar(1), Scalar(WIDTH));
rng.fill(y4, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT));
// Labels
labels.rowRange(0*N_SAMPLES_PER_CLASS, 1*N_SAMPLES_PER_CLASS).setTo(1);
labels.rowRange(1*N_SAMPLES_PER_CLASS, 2*N_SAMPLES_PER_CLASS).setTo(2);
labels.rowRange(2*N_SAMPLES_PER_CLASS, 3*N_SAMPLES_PER_CLASS).setTo(3);
labels.rowRange(3*N_SAMPLES_PER_CLASS, 4*N_SAMPLES_PER_CLASS).setTo(4);
// Draw training data
Mat3b samples(HEIGHT, WIDTH, Vec3b(0,0,0));
for (int i = 0; i < labels.rows; ++i)
{
circle(samples, Point(data(i, 0), data(i, 1)), 3, colors[labels(i,0) - 1], CV_FILLED);
}
//////////////////////////
// SVM
//////////////////////////
// SVM label 1
Ptr<SVM> svm1 = SVM::create();
svm1->setType(SVM::C_SVC);
svm1->setKernel(KERNEL);
Mat1i labels1 = (labels != 1) / 255;
if (AUTO_TRAIN_ENABLED)
{
Ptr<TrainData> td1 = TrainData::create(data, ROW_SAMPLE, labels1);
svm1->trainAuto(td1);
}
else
{
svm1->setC(0.1);
svm1->setGamma(0.001);
svm1->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, (int)1e7, 1e-6));
svm1->train(data, ROW_SAMPLE, labels1);
}
// SVM label 2
Ptr<SVM> svm2 = SVM::create();
svm2->setType(SVM::C_SVC);
svm2->setKernel(KERNEL);
Mat1i labels2 = (labels != 2) / 255;
if (AUTO_TRAIN_ENABLED)
{
Ptr<TrainData> td2 = TrainData::create(data, ROW_SAMPLE, labels2);
svm2->trainAuto(td2);
}
else
{
svm2->setC(0.1);
svm2->setGamma(0.001);
svm2->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, (int)1e7, 1e-6));
svm2->train(data, ROW_SAMPLE, labels2);
}
// SVM label 3
Ptr<SVM> svm3 = SVM::create();
svm3->setType(SVM::C_SVC);
svm3->setKernel(KERNEL);
Mat1i labels3 = (labels != 3) / 255;
if (AUTO_TRAIN_ENABLED)
{
Ptr<TrainData> td3 = TrainData::create(data, ROW_SAMPLE, labels3);
svm3->trainAuto(td3);
}
else
{
svm3->setC(0.1);
svm3->setGamma(0.001);
svm3->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, (int)1e7, 1e-6));
svm3->train(data, ROW_SAMPLE, labels3);
}
// SVM label 4
Ptr<SVM> svm4 = SVM::create();
svm4->setType(SVM::C_SVC);
svm4->setKernel(KERNEL);
Mat1i labels4 = (labels != 4) / 255;
if (AUTO_TRAIN_ENABLED)
{
Ptr<TrainData> td4 = TrainData::create(data, ROW_SAMPLE, labels4);
svm4->trainAuto(td4);
}
else
{
svm4->setC(0.1);
svm4->setGamma(0.001);
svm4->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, (int)1e7, 1e-6));
svm4->train(data, ROW_SAMPLE, labels4);
}
//////////////////////////
// Show regions
//////////////////////////
Mat3b regions(HEIGHT, WIDTH);
Mat1f R(HEIGHT, WIDTH);
Mat1f R1(HEIGHT, WIDTH);
Mat1f R2(HEIGHT, WIDTH);
Mat1f R3(HEIGHT, WIDTH);
Mat1f R4(HEIGHT, WIDTH);
for (int r = 0; r < HEIGHT; ++r)
{
for (int c = 0; c < WIDTH; ++c)
{
Mat1f sample = (Mat1f(1,2) << c, r);
vector<float> responses(4);
responses[0] = svm1->predict(sample, noArray(), StatModel::RAW_OUTPUT);
responses[1] = svm2->predict(sample, noArray(), StatModel::RAW_OUTPUT);
responses[2] = svm3->predict(sample, noArray(), StatModel::RAW_OUTPUT);
responses[3] = svm4->predict(sample, noArray(), StatModel::RAW_OUTPUT);
int best_class = distance(responses.begin(), max_element(responses.begin(), responses.end()));
float best_response = responses[best_class];
// View responses for each SVM, and the best responses
R(r,c) = best_response;
R1(r, c) = responses[0];
R2(r, c) = responses[1];
R3(r, c) = responses[2];
R4(r, c) = responses[3];
if (best_response >= 0) {
regions(r, c) = colorsv[best_class];
}
else {
regions(r, c) = colorsv_shaded[best_class];
}
}
}
imwrite("svm_samples.png", samples);
imwrite("svm_x.png", regions);
imshow("Samples", samples);
imshow("Regions", regions);
waitKey();
return 0;
}

How to get a color range from a "Drag Box" in OpenCV 2.4

I'm using Python and OpenCV 2.4. I'm trying to get a HSV average from an area selected by dragging the mouse, much like in the camShift example provided by OpenCV. But I want the X, Y of the selected color instances in a video feed.
I've been hacking at the onmouse function in camShift. I feel it is close to want I want, I just can't seem to extract the mean HSV values of the area selected. I know I could probably get this done with a for loop, but trying to make it as responsive as possible.
def onmouse(self, event, x, y, flags, param):
x, y = np.int16([x, y]) # BUG
if event == cv2.EVENT_LBUTTONDOWN:
self.drag_start = (x, y)
self.tracking_state = 0
if self.drag_start:
if flags & cv2.EVENT_FLAG_LBUTTON:
h, w = 480, 640 # self.frame.shape[:2]
xo, yo = self.drag_start
x0, y0 = np.maximum(0, np.minimum([xo, yo], [x, y]))
x1, y1 = np.minimum([w, h], np.maximum([xo, yo], [x, y]))
self.selection = None
if x1-x0 > 0 and y1-y0 > 0:
self.selection = (x0, y0, x1, y1)
else:
self.drag_start = None
if self.selection is not None:
self.tracking_state = 1
Ok. It's crude, but this seems to be a lot closer than I was:
import numpy as np
import cv2
import video
class App(object):
def __init__(self, video_src):
#self.cam = video.create_capture(video_src)
self.cam = cv2.VideoCapture(0)
ret, self.frame = self.cam.read()
cv2.namedWindow('camshift')
cv2.setMouseCallback('camshift', self.onmouse)
self.selection = None
self.drag_start = None
self.tracking_state = 0
self.show_backproj = False
def onmouse(self, event, x, y, flags, param):
x, y = np.int16([x, y]) # BUG
if event == cv2.EVENT_LBUTTONDOWN:
self.drag_start = (x, y)
self.tracking_state = 0
if self.drag_start:
if flags & cv2.EVENT_FLAG_LBUTTON:
h, w = self.frame.shape[:2]
xo, yo = self.drag_start
x0, y0 = np.maximum(0, np.minimum([xo, yo], [x, y]))
x1, y1 = np.minimum([w, h], np.maximum([xo, yo], [x, y]))
self.selection = None
if x1-x0 > 0 and y1-y0 > 0:
self.selection = (x0, y0, x1, y1)
else:
self.drag_start = None
if self.selection is not None:
self.tracking_state = 1
def show_hist(self):
bin_count = self.hist.shape[0]
bin_w = 24
img = np.zeros((256, bin_count*bin_w, 3), np.uint8)
for i in xrange(bin_count):
h = int(self.hist[i])
cv2.rectangle(img, (i*bin_w+2, 255), ((i+1)*bin_w-2, 255-h), (int(180.0*i/bin_count), 255, 255), -1)
img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
cv2.imshow('hist', img)
def run(self):
while True:
ret, self.frame = self.cam.read()
self.frame = cv2.blur(self.frame,(3,3))
vis = self.frame.copy()
hsv = cv2.cvtColor(self.frame, cv2.COLOR_BGR2HSV)
mask = cv2.inRange(hsv, np.array((0., 60., 32.)), np.array((180., 255., 255.)))
mask2 = mask.copy()
if self.selection:
x0, y0, x1, y1 = self.selection
self.track_window = (x0, y0, x1-x0, y1-y0)
hsv_roi = hsv[y0:y1, x0:x1]
mask_roi = mask[y0:y1, x0:x1]
#cv2.norm(hsv_roi)
dHSV = cv2.mean(hsv_roi)
h, s, v = int(dHSV[0]), int(dHSV[1]), int(dHSV[2])
hist = cv2.calcHist( [hsv_roi], [0], mask_roi, [16], [0, 180] )
cv2.normalize(hist, hist, 0, 255, cv2.NORM_MINMAX);
self.hist = hist.reshape(-1)
self.show_hist()
vis_roi = vis[y0:y1, x0:x1]
cv2.bitwise_not(vis_roi, vis_roi)
vis[mask == 0] = 0
if self.tracking_state == 1:
self.selection = None
cv2.imshow('camshift', vis)
if self.tracking_state == 1:
if h > 159:
h = 159
if s > 235:
s = 235
if v > 235:
v = 235
if h < 20:
h = 20
if s < 20:
s = 20
if v < 20:
v = 20
thresh = cv2.inRange(hsv,np.array(((h-20), (s-20), (v-20))), np.array(((h+20), (s+20), (v+20))))
thresh2 = thresh.copy()
# find contours in the threshold image
contours,hierarchy = cv2.findContours(thresh,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
#best_cnt = 1
max_area = 0
for cnt in contours:
area = cv2.contourArea(cnt)
if area > max_area:
max_area = area
best_cnt = cnt
# finding centroids of best_cnt and draw a circle there
M = cv2.moments(best_cnt)
cx,cy = int(M['m10']/M['m00']), int(M['m01']/M['m00'])
print cx, cy
cv2.circle(thresh2,(cx,cy),20,255,-1)
cv2.imshow('thresh',thresh2)
ch = 0xFF & cv2.waitKey(5)
if ch == 27:
break
if ch == ord('b'):
self.show_backproj = not self.show_backproj
cv2.destroyAllWindows()
if __name__ == '__main__':
import sys
try: video_src = sys.argv[1]
except: video_src = 0
print __doc__
App(video_src).run()
I hack-sawed Rahman's example and camShift

Resources