Car detection using HOG features and cvsvm - opencv
I am doing a project for which I need to detect the rear of a car using HOG features. Once I calculated the HOG features I trained the cvsvm using positive and negative samples. cvsvm is correctly classifying the new data. Here is my code that I used to train cvsvm.
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/ml/ml.hpp>
#include "opencv2/opencv.hpp"
#include "LinearSVM.h"
using namespace cv;
using namespace std;
int main(void)
{
LinearSVM *s = new LinearSVM;
vector<float> values, values1, values2, values3, values4;
FileStorage fs2("/home/ubuntu/Desktop/opencv-svm/vecSupport.yml", FileStorage::READ);
FileStorage fs3("/home/ubuntu/Desktop/opencv-svm/vecSupport1.yml", FileStorage::READ);
FileStorage fs4("/home/ubuntu/Desktop/opencv-svm/vecSupport2.yml", FileStorage::READ);
FileStorage fs5("/home/ubuntu/Desktop/opencv-svm/vecSupport3.yml", FileStorage::READ);
FileStorage fs6("/home/ubuntu/Desktop/opencv-svm/vecSupport4.yml", FileStorage::READ);
fs2["vector"]>>values;
fs3["vector"]>>values1;
fs4["vector"]>>values2;
fs5["vector"]>>values3;
fs6["vector"]>>values4;
//fill with data
values.insert(values.end(), values1.begin(), values1.end());
values.insert(values.end(), values2.begin(), values2.end());
fs2.release();
fs3.release();
fs4.release();
float arr[188496];
float car[2772];
float noncar[2772];
// move positive and negative to arr
std::copy(values.begin(), values.end(), arr);
std::copy(values3.begin(), values3.end(), car);
std::copy(values4.begin(), values4.end(), noncar);
float labels[68];
for (unsigned int s = 0; s < 68; s++)
{
if (s<34)
labels[s] = +1;
else
labels[s] = -1;
}
Mat labelsMat(68, 1, CV_32FC1, labels);
Mat trainingDataMat(68,2772, CV_32FC1, arr);
// Set up SVM's parameters
CvSVMParams params;
params.svm_type = CvSVM::C_SVC;
params.kernel_type = CvSVM::LINEAR;
params.term_crit = cvTermCriteria(CV_TERMCRIT_ITER, 100, 1e-6);
// Train the SVM
LinearSVM SVM;
SVM.train(trainingDataMat, labelsMat, Mat(), Mat(), params);
Mat matinput(1,2772,CV_32FC1,noncar);
//cout<<matinput;
float response = SVM.predict(matinput);
cout<<"Response : "<<response<<endl;
SVM.save("Classifier.xml");
vector<float>primal;
// LinearSVM s;
//s.getSupportVector(primal);
SVM.getSupportVector(primal);
FileStorage fs("/home/ubuntu/Desktop/opencv-svm/test.yml", FileStorage::WRITE);
fs << "dector" << primal;
fs.release();
}
// LinearSVM cpp file
#include "LinearSVM.h"
void LinearSVM::getSupportVector(std::vector<float>& support_vector) const {
int sv_count = get_support_vector_count();
const CvSVMDecisionFunc* df = decision_func;
const double* alphas = df[0].alpha;
double rho = df[0].rho;
int var_count = get_var_count();
support_vector.resize(var_count, 0);
for (unsigned int r = 0; r < (unsigned)sv_count; r++) {
float myalpha = alphas[r];
const float* v = get_support_vector(r);
for (int j = 0; j < var_count; j++,v++) {
support_vector[j] += (-myalpha) * (*v);
}
}
support_vector.push_back(rho);
}
// LinearSVM head file
#ifndef LINEAR_SVM_H_
#define LINEAR_SVM_H_
#include <opencv2/core/core.hpp>
#include <opencv2/ml/ml.hpp>
class LinearSVM: public CvSVM {
public:
void getSupportVector(std::vector<float>& support_vector) const;
};
#endif /* LINEAR_SVM_H_ */
After this step I got the vector file that I can fed into setsvmdetector method. Here is my code. I have used window size of 96 x 64 and scale of 1.11
#include <iostream>
#include <fstream>
#include <string>
#include <time.h>
#include <iostream>
#include <sstream>
#include <iomanip>
#include <stdexcept>
#include <stdexcept>
#include "opencv2/gpu/gpu.hpp"
#include "opencv2/highgui/highgui.hpp"
using namespace std;
using namespace cv;
bool help_showed = false;
class Args
{
public:
Args();
static Args read(int argc, char** argv);
string src;
bool src_is_video;
bool src_is_camera;
int camera_id;
bool write_video;
string dst_video;
double dst_video_fps;
bool make_gray;
bool resize_src;
int width, height;
double scale;
int nlevels;
int gr_threshold;
double hit_threshold;
bool hit_threshold_auto;
int win_width;
int win_stride_width, win_stride_height;
bool gamma_corr;
};
class App
{
public:
App(const Args& s);
void run();
void handleKey(char key);
void hogWorkBegin();
void hogWorkEnd();
string hogWorkFps() const;
void workBegin();
void workEnd();
string workFps() const;
string message() const;
private:
App operator=(App&);
Args args;
bool running;
bool use_gpu;
bool make_gray;
double scale;
int gr_threshold;
int nlevels;
double hit_threshold;
bool gamma_corr;
int64 hog_work_begin;
double hog_work_fps;
int64 work_begin;
double work_fps;
};
static void printHelp()
{
cout << "Histogram of Oriented Gradients descriptor and detector sample.\n"
<< "\nUsage: hog_gpu\n"
<< " (<image>|--video <vide>|--camera <camera_id>) # frames source\n"
<< " [--make_gray <true/false>] # convert image to gray one or not\n"
<< " [--resize_src <true/false>] # do resize of the source image or not\n"
<< " [--width <int>] # resized image width\n"
<< " [--height <int>] # resized image height\n"
<< " [--hit_threshold <double>] # classifying plane distance threshold (0.0 usually)\n"
<< " [--scale <double>] # HOG window scale factor\n"
<< " [--nlevels <int>] # max number of HOG window scales\n"
<< " [--win_width <int>] # width of the window (48 or 64)\n"
<< " [--win_stride_width <int>] # distance by OX axis between neighbour wins\n"
<< " [--win_stride_height <int>] # distance by OY axis between neighbour wins\n"
<< " [--gr_threshold <int>] # merging similar rects constant\n"
<< " [--gamma_correct <int>] # do gamma correction or not\n"
<< " [--write_video <bool>] # write video or not\n"
<< " [--dst_video <path>] # output video path\n"
<< " [--dst_video_fps <double>] # output video fps\n";
help_showed = true;
}
int main(int argc, char** argv)
{
try
{
if (argc < 2)
printHelp();
Args args = Args::read(argc, argv);
if (help_showed)
return -1;
App app(args);
app.run();
}
catch (const Exception& e) { return cout << "error: " << e.what() << endl, 1; }
catch (const exception& e) { return cout << "error: " << e.what() << endl, 1; }
catch(...) { return cout << "unknown exception" << endl, 1; }
return 0;
}
Args::Args()
{
src_is_video = false;
src_is_camera = false;
camera_id = 0;
write_video = false;
dst_video_fps = 24.;
make_gray = false;
resize_src = false;
width = 640;
height = 480;
scale = 1.11;
nlevels = 13;
gr_threshold = 1;
hit_threshold = 1.4;
hit_threshold_auto = true;
win_width = 64;
win_stride_width = 8;
win_stride_height = 8;
gamma_corr = true;
}
Args Args::read(int argc, char** argv)
{
Args args;
for (int i = 1; i < argc; i++)
{
if (string(argv[i]) == "--make_gray") args.make_gray = (string(argv[++i]) == "true");
else if (string(argv[i]) == "--resize_src") args.resize_src = (string(argv[++i]) == "true");
else if (string(argv[i]) == "--width") args.width = atoi(argv[++i]);
else if (string(argv[i]) == "--height") args.height = atoi(argv[++i]);
else if (string(argv[i]) == "--hit_threshold")
{
args.hit_threshold = atof(argv[++i]);
args.hit_threshold_auto = false;
}
else if (string(argv[i]) == "--scale") args.scale = atof(argv[++i]);
else if (string(argv[i]) == "--nlevels") args.nlevels = atoi(argv[++i]);
else if (string(argv[i]) == "--win_width") args.win_width = atoi(argv[++i]);
else if (string(argv[i]) == "--win_stride_width") args.win_stride_width = atoi(argv[++i]);
else if (string(argv[i]) == "--win_stride_height") args.win_stride_height = atoi(argv[++i]);
else if (string(argv[i]) == "--gr_threshold") args.gr_threshold = atoi(argv[++i]);
else if (string(argv[i]) == "--gamma_correct") args.gamma_corr = (string(argv[++i]) == "true");
else if (string(argv[i]) == "--write_video") args.write_video = (string(argv[++i]) == "true");
else if (string(argv[i]) == "--dst_video") args.dst_video = argv[++i];
else if (string(argv[i]) == "--dst_video_fps") args.dst_video_fps = atof(argv[++i]);
else if (string(argv[i]) == "--help") printHelp();
else if (string(argv[i]) == "--video") { args.src = argv[++i]; args.src_is_video = true; }
else if (string(argv[i]) == "--camera") { args.camera_id = atoi(argv[++i]); args.src_is_camera = true; }
else if (args.src.empty()) args.src = argv[i];
else throw runtime_error((string("unknown key: ") + argv[i]));
}
return args;
}
App::App(const Args& s)
{
cv::gpu::printShortCudaDeviceInfo(cv::gpu::getDevice());
args = s;
cout << "\nControls:\n"
<< "\tESC - exit\n"
<< "\tm - change mode GPU <-> CPU\n"
<< "\tg - convert image to gray or not\n"
<< "\t1/q - increase/decrease HOG scale\n"
<< "\t2/w - increase/decrease levels count\n"
<< "\t3/e - increase/decrease HOG group threshold\n"
<< "\t4/r - increase/decrease hit threshold\n"
<< endl;
use_gpu = true;
make_gray = args.make_gray;
scale = args.scale;
gr_threshold = args.gr_threshold;
nlevels = args.nlevels;
if (args.hit_threshold_auto)
args.hit_threshold = args.win_width == 48 ? 1.4 : 0.;
hit_threshold = args.hit_threshold;
gamma_corr = args.gamma_corr;
/*
if (args.win_width != 64 && args.win_width != 48)
args.win_width = 64;*/
cout << "Scale: " << scale << endl;
if (args.resize_src)
cout << "Resized source: (" << args.width << ", " << args.height << ")\n";
cout << "Group threshold: " << gr_threshold << endl;
cout << "Levels number: " << nlevels << endl;
cout << "Win width: " << args.win_width << endl;
cout << "Win stride: (" << args.win_stride_width << ", " << args.win_stride_height << ")\n";
cout << "Hit threshold: " << hit_threshold << endl;
cout << "Gamma correction: " << gamma_corr << endl;
cout << endl;
}
void App::run()
{
FileStorage fs("/home/ubuntu/Desktop/implemenatation/vecSupport.yml", FileStorage::READ);
vector<float> detector;
int frameCount;
fs["vector"] >> detector;
for (unsigned int i=0; i<detector.size(); i++)
{
std::cout << std::fixed << std::setprecision(10) << detector[i] << std::endl;
}
fs.release();
running = true;
cv::VideoWriter video_writer;
Size win_size(96,64); //(64, 128) or (48, 96)
Size win_stride(args.win_stride_width, args.win_stride_height);
// Create HOG descriptors and detectors here
/*
vector<float> detector;
if (win_size == Size(64, 128))
detector = cv::gpu::HOGDescriptor::getPeopleDetector64x128();
else
detector = cv::gpu::HOGDescriptor::getPeopleDetector48x96();*/
cv::gpu::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
cv::gpu::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
cv::gpu::HOGDescriptor::DEFAULT_NLEVELS);
cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
gpu_hog.setSVMDetector(detector);
cpu_hog.setSVMDetector(detector);
while (running)
{
VideoCapture vc;
Mat frame;
if (args.src_is_video)
{
vc.open(args.src.c_str());
if (!vc.isOpened())
throw runtime_error(string("can't open video file: " + args.src));
vc >> frame;
}
else if (args.src_is_camera)
{
vc.open(args.camera_id);
if (!vc.isOpened())
{
stringstream msg;
msg << "can't open camera: " << args.camera_id;
throw runtime_error(msg.str());
}
vc >> frame;
}
else
{
frame = imread(args.src);
if (frame.empty())
throw runtime_error(string("can't open image file: " + args.src));
}
Mat img_aux, img, img_to_show;
gpu::GpuMat gpu_img;
// Iterate over all frames
while (running && !frame.empty())
{
workBegin();
// Change format of the image
if (make_gray) cvtColor(frame, img_aux, CV_BGR2GRAY);
else if (use_gpu) cvtColor(frame, img_aux, CV_BGR2BGRA);
else frame.copyTo(img_aux);
// Resize image
if (args.resize_src) resize(img_aux, img, Size(args.width, args.height));
else img = img_aux;
img_to_show = img;
gpu_hog.nlevels = nlevels;
cpu_hog.nlevels = nlevels;
vector<Rect> found;
// Perform HOG classification
hogWorkBegin();
if (use_gpu)
{
gpu_img.upload(img);
gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride,
Size(0, 0), scale, gr_threshold);
}
else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
Size(0, 0), scale, gr_threshold);
hogWorkEnd();
// Draw positive classified windows
for (size_t i = 0; i < found.size(); i++)
{
Rect r = found[i];
rectangle(img_to_show, r.tl(), r.br(), CV_RGB(0, 255, 0), 3);
}
if (use_gpu)
putText(img_to_show, "Mode: GPU", Point(5, 25), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
else
putText(img_to_show, "Mode: CPU", Point(5, 25), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
putText(img_to_show, "FPS (HOG only): " + hogWorkFps(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
putText(img_to_show, "FPS (total): " + workFps(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
imshow("opencv_gpu_hog", img_to_show);
if (args.src_is_video || args.src_is_camera) vc >> frame;
workEnd();
if (args.write_video)
{
if (!video_writer.isOpened())
{
video_writer.open(args.dst_video, CV_FOURCC('x','v','i','d'), args.dst_video_fps,
img_to_show.size(), true);
if (!video_writer.isOpened())
throw std::runtime_error("can't create video writer");
}
if (make_gray) cvtColor(img_to_show, img, CV_GRAY2BGR);
else cvtColor(img_to_show, img, CV_BGRA2BGR);
video_writer << img;
}
handleKey((char)waitKey(3));
}
}
}
void App::handleKey(char key)
{
switch (key)
{
case 27:
running = false;
break;
case 'm':
case 'M':
use_gpu = !use_gpu;
cout << "Switched to " << (use_gpu ? "CUDA" : "CPU") << " mode\n";
break;
case 'g':
case 'G':
make_gray = !make_gray;
cout << "Convert image to gray: " << (make_gray ? "YES" : "NO") << endl;
break;
case '1':
scale *= 1.11;
cout << "Scale: " << scale << endl;
break;
case 'q':
case 'Q':
scale /= 1.11;
cout << "Scale: " << scale << endl;
break;
case '2':
nlevels++;
cout << "Levels number: " << nlevels << endl;
break;
case 'w':
case 'W':
nlevels = max(nlevels - 1, 1);
cout << "Levels number: " << nlevels << endl;
break;
case '3':
gr_threshold++;
cout << "Group threshold: " << gr_threshold << endl;
break;
case 'e':
case 'E':
gr_threshold = max(0, gr_threshold - 1);
cout << "Group threshold: " << gr_threshold << endl;
break;
case '4':
hit_threshold+=0.25;
cout << "Hit threshold: " << hit_threshold << endl;
break;
case 'r':
case 'R':
hit_threshold = max(0.0, hit_threshold - 0.25);
cout << "Hit threshold: " << hit_threshold << endl;
break;
case 'c':
case 'C':
gamma_corr = !gamma_corr;
cout << "Gamma correction: " << gamma_corr << endl;
break;
}
}
inline void App::hogWorkBegin() { hog_work_begin = getTickCount(); }
inline void App::hogWorkEnd()
{
int64 delta = getTickCount() - hog_work_begin;
double freq = getTickFrequency();
hog_work_fps = freq / delta;
}
inline string App::hogWorkFps() const
{
stringstream ss;
ss << hog_work_fps;
return ss.str();
}
inline void App::workBegin() { work_begin = getTickCount(); }
inline void App::workEnd()
{
int64 delta = getTickCount() - work_begin;
double freq = getTickFrequency();
work_fps = freq / delta;
}
inline string App::workFps() const
{
stringstream ss;
ss << work_fps;
return ss.str();
}
Problem:
I am not able to detect anything. Can someone look at my work and can let me know what I am doing wrong. Any suggestions would be valuable. Thank you. From last four weeks I am doing these steps over and over again.
P.S: You can find yaml files here and test images along with the annotations here
First of all, partition your data for cross-validation as suggested already. Second thing is that it is a good idea to use RBF kernel rather than Linear kernel. I highly doubt that a linear kernel can learn complex objects. A brief explanation is given here. Finally, experiment with the parameters. To do that, you need to check the limits of the parameter space, it's been a while since I haven't used SVMs therefore I cannot provide any details but a grid search with 20% cross-validation is a good start.
Related
I am trying to have stable flight with px4 and ros2 offboard control
Hello guys I have a offboard code which give about 50 setpoints to drone. It draws spiral with that setpoints. My problem is I couldnt get smooth travel. In every setpoint drone gives a high roll or pitch instant and then floats to the next setpoint. Is there a way to have stable velocity while passing the setpoints. Here is the code: #include <px4_msgs/msg/offboard_control_mode.hpp> #include <px4_msgs/msg/trajectory_setpoint.hpp> #include <px4_msgs/msg/timesync.hpp> #include <px4_msgs/msg/vehicle_command.hpp> #include <px4_msgs/msg/vehicle_control_mode.hpp> #include <px4_msgs/msg/vehicle_local_position.hpp> #include <rclcpp/rclcpp.hpp> #include <stdint.h> #include <chrono> #include <iostream> #include "std_msgs/msg/string.hpp" #include <math.h> float X; float Y; using namespace std::chrono; using namespace std::chrono_literals; using namespace px4_msgs::msg; class setpoint : public rclcpp::Node { public: setpoint() : Node("setpoint") { offboard_control_mode_publisher_ = this->create_publisher<OffboardControlMode>("fmu/offboard_control_mode/in", 10); trajectory_setpoint_publisher_ = this->create_publisher<TrajectorySetpoint>("fmu/trajectory_setpoint/in", 10); vehicle_command_publisher_ = this->create_publisher<VehicleCommand>("fmu/vehicle_command/in", 10); // get common timestamp timesync_sub_ = this->create_subscription<px4_msgs::msg::Timesync>("fmu/timesync/out", 10, [this](const px4_msgs::msg::Timesync::UniquePtr msg) { timestamp_.store(msg->timestamp); }); offboard_setpoint_counter_ = 0; auto sendCommands = [this]() -> void { if (offboard_setpoint_counter_ == 10) { // Change to Offboard mode after 10 setpoints this->publish_vehicle_command(VehicleCommand::VEHICLE_CMD_DO_SET_MODE, 1, 6); // Arm the vehicle this->arm(); } //------------- subscription_ = this->create_subscription<px4_msgs::msg::VehicleLocalPosition>( "/fmu/vehicle_local_position/out", #ifdef ROS_DEFAULT_API 10, #endif [this](const px4_msgs::msg::VehicleLocalPosition::UniquePtr msg) { X = msg->x; Y = msg->y; std::cout << "\n\n\n\n\n\n\n\n\n\n"; std::cout << "RECEIVED VEHICLE GPS POSITION DATA" << std::endl; std::cout << "==================================" << std::endl; std::cout << "ts: " << msg->timestamp << std::endl; //std::cout << "lat: " << msg->x << std::endl; //std::cout << "lon: " << msg->y << std::endl; std::cout << "lat: " << X << std::endl; std::cout << "lon: " << Y << std::endl; std::cout << "waypoint: " << waypoints[waypointIndex][0] << std::endl; std::cout << "waypoint: " << waypoints[waypointIndex][1] << std::endl; if((waypoints[waypointIndex][0] + 0.3 > X && waypoints[waypointIndex][0] - 0.3 < X)&&(waypoints[waypointIndex][1] + 0.3 > Y && waypoints[waypointIndex][1] - 0.3 < Y)){ waypointIndex++; if (waypointIndex >= waypoints.size()) exit(0); //waypointIndex = 0; RCLCPP_INFO(this->get_logger(), "Next waypoint: %.2f %.2f %.2f", waypoints[waypointIndex][0], waypoints[waypointIndex][1], waypoints[waypointIndex][2]); } }); //-------------- // offboard_control_mode needs to be paired with trajectory_setpoint publish_offboard_control_mode(); publish_trajectory_setpoint(); // stop the counter after reaching 11 if (offboard_setpoint_counter_ < 11) { offboard_setpoint_counter_++; } }; /* auto nextWaypoint = [this]() -> void { waypointIndex++; if (waypointIndex >= waypoints.size()) waypointIndex = 0; RCLCPP_INFO(this->get_logger(), "Next waypoint: %.2f %.2f %.2f", waypoints[waypointIndex][0], waypoints[waypointIndex][1], waypoints[waypointIndex][2]); }; */ commandTimer = this->create_wall_timer(100ms, sendCommands); //waypointTimer = this->create_wall_timer(2s, nextWaypoint); //EA } void arm() const; void disarm() const; void topic_callback() const; private: std::vector<std::vector<float>> waypoints = {{0,0,-5,}, {2,0,-5,}, {2.35216,0.476806,-5,}, {2.57897,1.09037,-5,}, {2.64107,1.80686,-5,}, {2.50814,2.58248,-5,}, {2.16121,3.36588,-5,}, {1.59437,4.10097,-5,}, {0.815842,4.73016,-5,}, {-0.151838,5.19778,-5,}, {-1.27233,5.45355,-5,}, {-2.49688,5.45578,-5,}, {-3.76641,5.17438,-5,}, {-5.01428,4.59315,-5,}, {-6.1696,3.71161,-5,}, {-7.16089,2.54591,-5,}, {-7.91994,1.12896,-5,}, {-8.38568,-0.490343,-5,}, {-8.50782,-2.24876,-5,}, {-8.25018,-4.07119,-5,}, {-7.59329,-5.87384,-5,}, {-6.53644,-7.56803,-5,}, {-5.09871,-9.06439,-5,}, {-3.31919,-10.2773,-5,}, {-1.25611,-11.1293,-5,}, {1.01499,-11.5555,-5,}, {3.40395,-11.5071,-5,}, {5.8096,-10.9548,-5,}, {8.12407,-9.89139,-5,}, {10.2375,-8.33272,-5,}, {12.0431,-6.31859,-5,}, {13.4424,-3.91182,-5,}, {14.3502,-1.19649,-5,}, {14.6991,1.72493,-5,}, {14.4435,4.73543,-5,}, {13.5626,7.70817,-5,}, {12.0624,10.5118,-5,}, {9.97696,13.0162,-5,}, {7.36759,15.0983,-5,}, {4.32167,16.6482,-5,}, {0.949612,17.5744,-5,}, {-2.619,17.8084,-5,}, {-6.24045,17.3094,-5,}, {-9.76262,16.0665,-5,}, {-13.0314,14.1004,-5,}, {-15.8974,11.4644,-5,}, {-18.2226,8.24237,-5,}, {-19.8868,4.54696,-5,}, {-20.7936,0.515337,-5,}, {-20.8754,-3.69574,-5,}, {-20.0972,-7.91595,-5,}, {-20.8754,-3.69574,-5,}, {-20.7936,0.515337,-5,}, {-19.8868,4.54696,-5,}, {-18.2226,8.24237,-5,}, {-15.8974,11.4644,-5,}, {-13.0314,14.1004,-5,}, {-9.76262,16.0665,-5,}, {-6.24045,17.3094,-5,}, {-2.619,17.8084,-5,}, {0.949612,17.5744,-5,}, {4.32167,16.6482,-5,}, {7.36759,15.0983,-5,}, {9.97696,13.0162,-5,}, {12.0624,10.5118,-5,}, {13.5626,7.70817,-5,}, {14.4435,4.73543,-5,}, {14.6991,1.72493,-5,}, {14.3502,-1.19649,-5,}, {13.4424,-3.91182,-5,}, {12.0431,-6.31859,-5,}, {10.2375,-8.33272,-5,}, {8.12407,-9.89139,-5,}, {5.8096,-10.9548,-5,}, {3.40395,-11.5071,-5,}, {1.01499,-11.5555,-5,}, {-1.25611,-11.1293,-5,}, {-3.31919,-10.2773,-5,}, {-5.09871,-9.06439,-5,}, {-6.53644,-7.56803,-5,}, {-7.59329,-5.87384,-5,}, {-8.25018,-4.07119,-5,}, {-8.50782,-2.24876,-5,}, {-8.38568,-0.490343,-5,}, {-7.91994,1.12896,-5,}, {-7.16089,2.54591,-5,}, {-6.1696,3.71161,-5,}, {-5.01428,4.59315,-5,}, {-3.76641,5.17438,-5,}, {-2.49688,5.45578,-5,}, {-1.27233,5.45355,-5,}, {-0.151838,5.19778,-5,}, {0.815842,4.73016,-5,}, {1.59437,4.10097,-5,}, {2.16121,3.36588,-5,}, {2.50814,2.58248,-5,}, {2.64107,1.80686,-5,}, {2.57897,1.09037,-5,}, {2.35216,0.476806,-5,}, {2,0,-5,}, {0,0,-5,}, {0,0,0,} }; // Land int waypointIndex = 0; rclcpp::TimerBase::SharedPtr commandTimer; rclcpp::TimerBase::SharedPtr waypointTimer; rclcpp::Publisher<OffboardControlMode>::SharedPtr offboard_control_mode_publisher_; rclcpp::Publisher<TrajectorySetpoint>::SharedPtr trajectory_setpoint_publisher_; rclcpp::Publisher<VehicleCommand>::SharedPtr vehicle_command_publisher_; rclcpp::Subscription<px4_msgs::msg::Timesync>::SharedPtr timesync_sub_; // rclcpp::Subscription<px4_msgs::msg::VehicleLocalPosition>::SharedPtr subscription_; // std::atomic<uint64_t> timestamp_; //!< common synced timestamped uint64_t offboard_setpoint_counter_; //!< counter for the number of setpoints sent void publish_offboard_control_mode() const; void publish_trajectory_setpoint() const; void publish_vehicle_command(uint16_t command, float param1 = 0.0, float param2 = 0.0) const; }; void setpoint::arm() const { publish_vehicle_command(VehicleCommand::VEHICLE_CMD_COMPONENT_ARM_DISARM, 1.0); RCLCPP_INFO(this->get_logger(), "Arm command send"); } void setpoint::disarm() const { publish_vehicle_command(VehicleCommand::VEHICLE_CMD_COMPONENT_ARM_DISARM, 0.0); RCLCPP_INFO(this->get_logger(), "Disarm command send"); } void setpoint::publish_offboard_control_mode() const { OffboardControlMode msg{}; msg.timestamp = timestamp_.load(); msg.position = true; msg.velocity = false; msg.acceleration = false; msg.attitude = false; msg.body_rate = false; offboard_control_mode_publisher_->publish(msg); } void setpoint::publish_trajectory_setpoint() const { TrajectorySetpoint msg{}; msg.timestamp = timestamp_.load(); msg.position = {waypoints[waypointIndex][0],waypoints[waypointIndex][1],waypoints[waypointIndex][2]}; msg.yaw = std::nanf("0"); //-3.14; // [-PI:PI] trajectory_setpoint_publisher_->publish(msg); } void setpoint::publish_vehicle_command(uint16_t command, float param1, float param2) const { VehicleCommand msg{}; msg.timestamp = timestamp_.load(); msg.param1 = param1; msg.param2 = param2; msg.command = command; msg.target_system = 1; msg.target_component = 1; msg.source_system = 1; msg.source_component = 1; msg.from_external = true; vehicle_command_publisher_->publish(msg); } int main(int argc, char* argv[]) { std::cout << "Starting setpoint node..." << std::endl; setvbuf(stdout, NULL, _IONBF, BUFSIZ); rclcpp::init(argc, argv); rclcpp::spin(std::make_shared<setpoint>()); rclcpp::shutdown(); return 0; }
We send the setpoints to the controller by giving reference points. The aircraft will then try to maneuver to the given points via its control strategy (usually PID). Therefore, to have a smooth maneuver, it is usually suggested to give a series of discrete points between two waypoints, i.e., trajectory which parameterized by time. It should then solve the abrupt motion of your UAV. I'm no expert, but I hope this helps.
How to Select a Region of Interest on a Video like this
I am working on a queue bypass detection project and i need to select a region of interest or the boundary. If a person crosses the boundary, we should get an alert. Please help me to select a region of interest in live video similar to the one in the image. please see this image
After doing some research I found what you need on github #include "opencv2/opencv.hpp" #include <iostream> using namespace cv; using namespace std; /*~~~~~~~~~~~~~~~~~~*/ char ky; bool got_roi = false; Point points_array[4]; Mat src, ROI_Img,backup,ROI_MASK; Rect2d ROI_Select; int width_roi = 0, height_roi = 0,min_x,min_y,max_x,max_y; Rect ROI_RECT ; vector< vector<Point> > co_ordinates; /*~~~~~~~~~~~~~~~~~~*/ /*~~~~~~~~~~~~~~~~~~*/ //Callback for mousclick event, the x-y coordinate of mouse button-down //are stored array of points [points_array]. void mouse_click(int event, int x, int y, int flags, void *param) { static int count=0; switch (event) { case CV_EVENT_LBUTTONDOWN: { switch (count) // number of set Point { case 0: cout << "Select top-right point" << endl; break; case 1: cout << "Select bottom-right point" << endl; break; case 2: cout << "Select bottom-left point" << endl << endl; break; default: break; } if (!got_roi) // you are not select ROI yet! { points_array[count] = Point(x,y); circle(src, points_array[count], 2, Scalar(0, 255, 0), 2); //show points on image imshow("My_Win", src); count++; if (count == 4) // if select 4 point finished { cout << "ROI x & y points :" << endl; cout << points_array[0] << endl; cout << points_array[1] << endl; cout << points_array[2] << endl; cout << points_array[3] << endl; cout << endl << "ROI Saved You can continue with double press any keys except 'c' " << endl <<"once press 'c' or 'C' to clear points and retry select ROI " << endl << endl; ky = waitKey(0) & 0xFF; if (ky == 99 || ky == 67) // c or C to clear { backup.copyTo(src); points_array[0] = Point(0, 0); points_array[1] = Point(0, 0); points_array[2] = Point(0, 0); points_array[3] = Point(0, 0); imshow("My_Win", src); count = 0; cout << endl << endl << endl << "#--------------------- Clear Points! ------------------# " << endl << endl << endl ; } else // user accept points & dosn't want to clear them { min_x = std::min(points_array[0].x, points_array[3].x); //find rectangle for minimum ROI surround it! max_x = std::max(points_array[1].x, points_array[2].x); min_y = std::min(points_array[0].y, points_array[1].y); max_y = std::max(points_array[3].y, points_array[2].y); height_roi = max_y - min_y; width_roi = max_x - min_x; ROI_RECT = Rect(min_x, min_y, width_roi, height_roi); got_roi = true; co_ordinates.push_back(vector<Point>()); co_ordinates[0].push_back(points_array[0]); co_ordinates[0].push_back(points_array[1]); co_ordinates[0].push_back(points_array[2]); co_ordinates[0].push_back(points_array[3]); } } } else { // if got_roi se true => select roi before cout << endl << "You Select ROI Before " << endl << "if you want to clear point press 'c' or double press other keys to continue" << endl << endl; } break; } } } /*~~~~~~~~~~~~~~~~~~*/ int main() { // replace all "My_Win" with your window name /*~~~~~~~~~~~~~~~~~~*/ namedWindow("My_Win", 1); /*~~~~~~~~~~~~~~~~~~*/ VideoCapture input_video("Video_path"); // Set source imafe as [src] /*~~~~~~~~~~~~~~~~~~*/ input_video >> src; imshow("My_Win", src); src.copyTo(backup); setMouseCallback("My_Win", mouse_click, 0); waitKey(0); Mat mask(src.rows, src.cols, CV_8UC1, cv::Scalar(0)); drawContours(mask, co_ordinates, 0, Scalar(255), CV_FILLED, 8); /*~~~~~~~~~~~~~~~~~~*/ while (1) { input_video >> src; /*~~~~~~~~~~~~~~~~~~*/ //Need to copy Select ROI as MASK src.copyTo(ROI_MASK, mask); //Creat a rectangle around the Mask to reduce size of mask ROI_Img = ROI_MASK(ROI_RECT); /*~~~~~~~~~~~~~~~~~~*/ //Show Image imshow("My_Win", ROI_Img); // Do remaining processing here on capture roi for every frame if(char (waitKey(1)& 0xFF) == 27) break; } }
Inacurate tracking when drawing calcOpticalFlow's outputed feature vector
I have been trying to develop a simple feature tracking program. The user outlines an area on the screen with their mouse, and a mask is created for this area and passed to goodFeaturesToTrack. The features found by the function are then drawn on the screen (represented by blue circles). Next I pass the feature vector returned by the function to calcOpticalFlowPyrLk and draw the resulting vector of points on the screen (represented by green circles). Although the program tracks the direction of flow correctly, for some reason the features output by the calcOpticalFlow funciton do not line up with the object's location on the screen. I feel as though it is a small mistake in the logic I have used on my part, but I just can't seem to decompose it, and I would really appreciate some help from the you guys. I have posted my code below, and I would like to greatly apologize for the global variables and messy structure. I am just testing at the moment, and plan to clean up and convert to an OOP format as soon as I get it running. As well, here is a link to a YouTube video I have uploaded that demonstrates the behavior I am combating. bool drawingBox = false; bool destroyBox = false; bool targetAcquired = false; bool featuresFound = false; CvRect box; int boxCounter = 0; cv::Point objectLocation; cv::Mat prevFrame, nextFrame, prevFrame_1C, nextFrame_1C; std::vector<cv::Point2f> originalFeatures, newFeatures, baseFeatures; std::vector<uchar> opticalFlowFeatures; std::vector<float> opticalFlowFeaturesError; cv::TermCriteria opticalFlowTermination = cv::TermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.3); cv::Mat mask; cv::Mat clearMask; long currentFrame = 0; void draw(cv::Mat image, CvRect rectangle) { if (drawingBox) { cv::rectangle(image, cv::Point(box.x, box.y), cv::Point(box.x + box.width, box.y + box.height), cv::Scalar(225, 238 , 81), 2); CvRect rectangle2 = cvRect(box.x, box.y, box.width, box.height); } if (featuresFound) { for (int i = 0; i < originalFeatures.size(); i++) { cv::circle(image, baseFeatures[i], 4, cv::Scalar(255, 0, 0), 1, 8, 0); cv::circle(image, newFeatures[i], 4, cv::Scalar(0, 255, 0),1, 8, 0); cv::line(image, baseFeatures[i], newFeatures[i], cv::Scalar(255, 0, 0), 2, CV_AA); } } } void findFeatures(cv::Mat mask) { if (!featuresFound && targetAcquired) { cv::goodFeaturesToTrack(prevFrame_1C, baseFeatures, 200, 0.1, 0.1, mask); originalFeatures= baseFeatures; featuresFound = true; std::cout << "Number of Corners Detected: " << originalFeatures.size() << std::endl; for(int i = 0; i < originalFeatures.size(); i++) { std::cout << "Corner Location " << i << ": " << originalFeatures[i].x << "," << originalFeatures[i].y << std::endl; } } } void trackFeatures() { cv::calcOpticalFlowPyrLK(prevFrame_1C, nextFrame_1C, originalFeatures, newFeatures, opticalFlowFeatures, opticalFlowFeaturesError, cv::Size(30,30), 5, opticalFlowTermination); originalFeatures = newFeatures; } void mouseCallback(int event, int x, int y, int flags, void *param) { cv::Mat frame; frame = *((cv::Mat*)param); switch(event) { case CV_EVENT_MOUSEMOVE: { if(drawingBox) { box.width = x-box.x; box.height = y-box.y; } } break; case CV_EVENT_LBUTTONDOWN: { drawingBox = true; box = cvRect (x, y, 0, 0); targetAcquired = false; cv::destroyWindow("Selection"); } break; case CV_EVENT_LBUTTONUP: { drawingBox = false; featuresFound = false; boxCounter++; std::cout << "Box " << boxCounter << std::endl; std::cout << "Box Coordinates: " << box.x << "," << box.y << std::endl; std::cout << "Box Height: " << box.height << std::endl; std::cout << "Box Width: " << box.width << std:: endl << std::endl; if(box.width < 0) { box.x += box.width; box.width *= -1; } if(box.height < 0) { box.y +=box.height; box.height *= -1; } objectLocation.x = box.x; objectLocation.y = box.y; targetAcquired = true; } break; case CV_EVENT_RBUTTONUP: { destroyBox = true; } break; } } int main () { const char *name = "Boundary Box"; cv::namedWindow(name); cv::VideoCapture camera; cv::Mat cameraFrame; int cameraNumber = 0; camera.open(cameraNumber); camera >> cameraFrame; cv::Mat mask = cv::Mat::zeros(cameraFrame.size(), CV_8UC1); cv::Mat clearMask = cv::Mat::zeros(cameraFrame.size(), CV_8UC1); if (!camera.isOpened()) { std::cerr << "ERROR: Could not access the camera or video!" << std::endl; } cv::setMouseCallback(name, mouseCallback, &cameraFrame); while(true) { if (destroyBox) { cv::destroyAllWindows(); break; } camera >> cameraFrame; if (cameraFrame.empty()) { std::cerr << "ERROR: Could not grab a camera frame." << std::endl; exit(1); } camera.set(CV_CAP_PROP_POS_FRAMES, currentFrame); camera >> prevFrame; cv::cvtColor(prevFrame, prevFrame_1C, cv::COLOR_BGR2GRAY); camera.set(CV_CAP_PROP_POS_FRAMES, currentFrame ++); camera >> nextFrame; cv::cvtColor(nextFrame, nextFrame_1C, cv::COLOR_BGR2GRAY); if (targetAcquired) { cv::Mat roi (mask, cv::Rect(box.x, box.y, box.width, box.height)); roi = cv::Scalar(255, 255, 255); findFeatures(mask); clearMask.copyTo(mask); trackFeatures(); } draw(cameraFrame, box); cv::imshow(name, cameraFrame); cv::waitKey(20); } cv::destroyWindow(name); return 0; }
In my opinion you can't use camera.set(CV_CAP_PROP_POS_FRAMES, currentFrame) on a webcam, but I 'm not positive about that. Instead I suggest you to save the previous frame in your prevFrame variable. As an example I can suggest you this working code, I only change inside the while loop and I add comment before all my adds : while(true) { if (destroyBox) { cv::destroyAllWindows(); break; } camera >> cameraFrame; if (cameraFrame.empty()) { std::cerr << "ERROR: Could not grab a camera frame." << std::endl; exit(1); } // new lines if(prevFrame.empty()){ prevFrame = cameraFrame; continue; } // end new lines //camera.set(CV_CAP_PROP_POS_FRAMES, currentFrame); //camera >> prevFrame; cv::cvtColor(prevFrame, prevFrame_1C, cv::COLOR_BGR2GRAY); //camera.set(CV_CAP_PROP_POS_FRAMES, currentFrame ++); //camera >> nextFrame; // new line nextFrame = cameraFrame; cv::cvtColor(nextFrame, nextFrame_1C, cv::COLOR_BGR2GRAY); if (targetAcquired) { cv::Mat roi (mask, cv::Rect(box.x, box.y, box.width, box.height)); roi = cv::Scalar(255, 255, 255); findFeatures(mask); clearMask.copyTo(mask); trackFeatures(); } draw(cameraFrame, box); cv::imshow(name, cameraFrame); cv::waitKey(20); // old = new // new line prevFrame = cameraFrame.clone(); }
facedetect.cpp to detect eye in a cropped image
I am using OpenCV for face and eye detection. To start with, I tested the sample program in OpenCV/Samples/c/facedetect.cpp. I gave two images as an input to this facedetect.exe - one is full and the other is cropped face of the same person. Now, the facedetect.cpp works fine with full image whereas it is not even detecting the face with the cropped image as input. Although the cropped image contains only the face which is cropped using OpenCV face detector, In some bad cases I will get only mouth or lips or only part of the face. So my requirement here is to check both the eyes are there in an image or not. The below are the two sample pictures one is full image where I get proper output: The below is the image where I need to detect the eyes using facedetect.cpp: So my question here is how to detect the eyes in the cropped image? The below is the code of sample facedetect.cpp #include "opencv2/objdetect/objdetect.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" #include <iostream> #include <stdio.h> using namespace std; using namespace cv; static void help() { cout << "\nThis program demonstrates the cascade recognizer. Now you can use Haar or LBP features.\n" "This classifier can recognize many ~rigid objects, it's most known use is for faces.\n" "Usage:\n" "./facedetect [--cascade=<cascade_path> this is the primary trained classifier such as frontal face]\n" " [--nested-cascade[=nested_cascade_path this an optional secondary classifier such as eyes]]\n" " [--scale=<image scale greater or equal to 1, try 1.3 for example>\n" " [filename|camera_index]\n\n" "see facedetect.cmd for one call:\n" "./facedetect --cascade=\"../../data/haarcascades/haarcascade_frontalface_alt.xml\" --nested-cascade=\"../../data/haarcascades/haarcascade_eye.xml\" --scale=1.3 \n" "Hit any key to quit.\n" "Using OpenCV version " << CV_VERSION << "\n" << endl; } void detectAndDraw( Mat& img, CascadeClassifier& cascade, CascadeClassifier& nestedCascade, double scale); String cascadeName = "../../data/haarcascades/haarcascade_frontalface_alt.xml"; String nestedCascadeName = "../../data/haarcascades/haarcascade_eye_tree_eyeglasses.xml"; int main( int argc, const char** argv ) { CvCapture* capture = 0; Mat frame, frameCopy, image; const String scaleOpt = "--scale="; size_t scaleOptLen = scaleOpt.length(); const String cascadeOpt = "--cascade="; size_t cascadeOptLen = cascadeOpt.length(); const String nestedCascadeOpt = "--nested-cascade"; size_t nestedCascadeOptLen = nestedCascadeOpt.length(); String inputName; help(); CascadeClassifier cascade, nestedCascade; double scale = 1; for( int i = 1; i < argc; i++ ) { cout << "Processing " << i << " " << argv[i] << endl; if( cascadeOpt.compare( 0, cascadeOptLen, argv[i], cascadeOptLen ) == 0 ) { cascadeName.assign( argv[i] + cascadeOptLen ); cout << " from which we have cascadeName= " << cascadeName << endl; } else if( nestedCascadeOpt.compare( 0, nestedCascadeOptLen, argv[i], nestedCascadeOptLen ) == 0 ) { if( argv[i][nestedCascadeOpt.length()] == '=' ) nestedCascadeName.assign( argv[i] + nestedCascadeOpt.length() + 1 ); if( !nestedCascade.load( nestedCascadeName ) ) cerr << "WARNING: Could not load classifier cascade for nested objects" << endl; } else if( scaleOpt.compare( 0, scaleOptLen, argv[i], scaleOptLen ) == 0 ) { if( !sscanf( argv[i] + scaleOpt.length(), "%lf", &scale ) || scale < 1 ) scale = 1; cout << " from which we read scale = " << scale << endl; } else if( argv[i][0] == '-' ) { cerr << "WARNING: Unknown option %s" << argv[i] << endl; } else inputName.assign( argv[i] ); } if( !cascade.load( cascadeName ) ) { cerr << "ERROR: Could not load classifier cascade" << endl; cerr << "Usage: facedetect [--cascade=<cascade_path>]\n" " [--nested-cascade[=nested_cascade_path]]\n" " [--scale[=<image scale>\n" " [filename|camera_index]\n" << endl ; return -1; } if( inputName.empty() || (isdigit(inputName.c_str()[0]) && inputName.c_str()[1] == '\0') ) { capture = cvCaptureFromCAM( inputName.empty() ? 0 : inputName.c_str()[0] - '0' ); int c = inputName.empty() ? 0 : inputName.c_str()[0] - '0' ; if(!capture) cout << "Capture from CAM " << c << " didn't work" << endl; } else if( inputName.size() ) { image = imread( inputName, 1 ); if( image.empty() ) { capture = cvCaptureFromAVI( inputName.c_str() ); if(!capture) cout << "Capture from AVI didn't work" << endl; } } else { image = imread( "lena.jpg", 1 ); if(image.empty()) cout << "Couldn't read lena.jpg" << endl; } cvNamedWindow( "result", 1 ); if( capture ) { cout << "In capture ..." << endl; for(;;) { IplImage* iplImg = cvQueryFrame( capture ); frame = iplImg; if( frame.empty() ) break; if( iplImg->origin == IPL_ORIGIN_TL ) frame.copyTo( frameCopy ); else flip( frame, frameCopy, 0 ); detectAndDraw( frameCopy, cascade, nestedCascade, scale ); if( waitKey( 10 ) >= 0 ) goto _cleanup_; } waitKey(0); _cleanup_: cvReleaseCapture( &capture ); } else { cout << "In image read" << endl; if( !image.empty() ) { detectAndDraw( image, cascade, nestedCascade, scale ); waitKey(0); } else if( !inputName.empty() ) { /* assume it is a text file containing the list of the image filenames to be processed - one per line */ FILE* f = fopen( inputName.c_str(), "rt" ); if( f ) { char buf[1000+1]; while( fgets( buf, 1000, f ) ) { int len = (int)strlen(buf), c; while( len > 0 && isspace(buf[len-1]) ) len--; buf[len] = '\0'; cout << "file " << buf << endl; image = imread( buf, 1 ); if( !image.empty() ) { detectAndDraw( image, cascade, nestedCascade, scale ); c = waitKey(0); if( c == 27 || c == 'q' || c == 'Q' ) break; } else { cerr << "Aw snap, couldn't read image " << buf << endl; } } fclose(f); } } } cvDestroyWindow("result"); return 0; } void detectAndDraw( Mat& img, CascadeClassifier& cascade, CascadeClassifier& nestedCascade, double scale) { int i = 0; double t = 0; vector<Rect> faces; const static Scalar colors[] = { CV_RGB(0,0,255), CV_RGB(0,128,255), CV_RGB(0,255,255), CV_RGB(0,255,0), CV_RGB(255,128,0), CV_RGB(255,255,0), CV_RGB(255,0,0), CV_RGB(255,0,255)} ; Mat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 ); cvtColor( img, gray, CV_BGR2GRAY ); resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); equalizeHist( smallImg, smallImg ); t = (double)cvGetTickCount(); cascade.detectMultiScale( smallImg, faces, 1.1, 2, 0 //|CV_HAAR_FIND_BIGGEST_OBJECT //|CV_HAAR_DO_ROUGH_SEARCH |CV_HAAR_SCALE_IMAGE , Size(30, 30) ); t = (double)cvGetTickCount() - t; printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) ); for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ ) { Mat smallImgROI; vector<Rect> nestedObjects; Point center; Scalar color = colors[i%8]; int radius; center.x = cvRound((r->x + r->width*0.5)*scale); center.y = cvRound((r->y + r->height*0.5)*scale); radius = cvRound((r->width + r->height)*0.25*scale); circle( img, center, radius, color, 3, 8, 0 ); if( nestedCascade.empty() ) continue; smallImgROI = smallImg(*r); nestedCascade.detectMultiScale( smallImgROI, nestedObjects, 1.1, 2, 0 //|CV_HAAR_FIND_BIGGEST_OBJECT //|CV_HAAR_DO_ROUGH_SEARCH //|CV_HAAR_DO_CANNY_PRUNING |CV_HAAR_SCALE_IMAGE , Size(30, 30) ); for( vector<Rect>::const_iterator nr = nestedObjects.begin(); nr != nestedObjects.end(); nr++ ) { center.x = cvRound((r->x + nr->x + nr->width*0.5)*scale); center.y = cvRound((r->y + nr->y + nr->height*0.5)*scale); radius = cvRound((nr->width + nr->height)*0.25*scale); circle( img, center, radius, color, 3, 8, 0 ); } } cv::imshow( "result", img ); }
The original example detects faces first by cascade.detectMultiScale, then finds eyes in the detected faces by nestedCascade.detectMultiScale. If you only need to detect the eyes, just use nestedCascade.detectMultiScale on the full image.
If you have a detected face (frontal face) a rough positions of the left and right eye regions are then estimated using anthropometric relations as show below.
Change the cascade classifier name: String cascadeName = "../../data/haarcascades/haarcascade_frontalface_alt.xml"; change this to String cascadeName = "../../data/haarcascades/haarcascade_eye_tree_eyeglasses.xml"; and pass the location of the image as the argument.
Face-detection in bash: Simply return number of faces found
I need to count the number of faces in a picture on the commandline (to be used in a bash script: do-something-if-picture-passed-contains-faces, else try next picture). So far I have the facedetect OpenCV example working, but this example continuously displays the picture - all I'd need is a number returned (or, simply an errorcode: 0 if no face found, 1 if a face has been found). Sadly my C++ skills are abysmal, could someone point me in the right direction? I'd use python or ruby, but the bindings to OpenCV seem to be more then broken in Ubuntu 12.04, none of the examples work right (or at all). Thank you! Edit: The example I was talking about as installed by OpenCV under Ubuntu cat /usr/share/doc/opencv-doc/examples/c/facedetect.cpp: #include "opencv2/objdetect/objdetect.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/imgproc/imgproc.hpp" #include <iostream> #include <stdio.h> using namespace std; using namespace cv; void help() { cout << "\nThis program demonstrates the cascade recognizer. Now you can use Haar or LBP features.\n" "This classifier can recognize many ~rigid objects, it's most known use is for faces.\n" "Usage:\n" "./facedetect [--cascade=<cascade_path> this is the primary trained classifier such as frontal face]\n" " [--nested-cascade[=nested_cascade_path this an optional secondary classifier such as eyes]]\n" " [--scale=<image scale greater or equal to 1, try 1.3 for example>\n" " [filename|camera_index]\n\n" "see facedetect.cmd for one call:\n" "./facedetect --cascade=\"../../data/haarcascades/haarcascade_frontalface_alt.xml\" --nested-cascade=\"../../data/haarcascades/haarcascade_eye.xml\" --scale=1.3 \n" "Hit any key to quit.\n" "Using OpenCV version " << CV_VERSION << "\n" << endl; } void detectAndDraw( Mat& img, CascadeClassifier& cascade, CascadeClassifier& nestedCascade, double scale); String cascadeName = "../../data/haarcascades/haarcascade_frontalface_alt.xml"; String nestedCascadeName = "../../data/haarcascades/haarcascade_eye_tree_eyeglasses.xml"; int main( int argc, const char** argv ) { CvCapture* capture = 0; Mat frame, frameCopy, image; const String scaleOpt = "--scale="; size_t scaleOptLen = scaleOpt.length(); const String cascadeOpt = "--cascade="; size_t cascadeOptLen = cascadeOpt.length(); const String nestedCascadeOpt = "--nested-cascade"; size_t nestedCascadeOptLen = nestedCascadeOpt.length(); String inputName; help(); CascadeClassifier cascade, nestedCascade; double scale = 1; for( int i = 1; i < argc; i++ ) { cout << "Processing " << i << " " << argv[i] << endl; if( cascadeOpt.compare( 0, cascadeOptLen, argv[i], cascadeOptLen ) == 0 ) { cascadeName.assign( argv[i] + cascadeOptLen ); cout << " from which we have cascadeName= " << cascadeName << endl; } else if( nestedCascadeOpt.compare( 0, nestedCascadeOptLen, argv[i], nestedCascadeOptLen ) == 0 ) { if( argv[i][nestedCascadeOpt.length()] == '=' ) nestedCascadeName.assign( argv[i] + nestedCascadeOpt.length() + 1 ); if( !nestedCascade.load( nestedCascadeName ) ) cerr << "WARNING: Could not load classifier cascade for nested objects" << endl; } else if( scaleOpt.compare( 0, scaleOptLen, argv[i], scaleOptLen ) == 0 ) { if( !sscanf( argv[i] + scaleOpt.length(), "%lf", &scale ) || scale < 1 ) scale = 1; cout << " from which we read scale = " << scale << endl; } else if( argv[i][0] == '-' ) { cerr << "WARNING: Unknown option %s" << argv[i] << endl; } else inputName.assign( argv[i] ); } if( !cascade.load( cascadeName ) ) { cerr << "ERROR: Could not load classifier cascade" << endl; cerr << "Usage: facedetect [--cascade=<cascade_path>]\n" " [--nested-cascade[=nested_cascade_path]]\n" " [--scale[=<image scale>\n" " [filename|camera_index]\n" << endl ; return -1; } if( inputName.empty() || (isdigit(inputName.c_str()[0]) && inputName.c_str()[1] == '\0') ) { capture = cvCaptureFromCAM( inputName.empty() ? 0 : inputName.c_str()[0] - '0' ); int c = inputName.empty() ? 0 : inputName.c_str()[0] - '0' ; if(!capture) cout << "Capture from CAM " << c << " didn't work" << endl; } else if( inputName.size() ) { image = imread( inputName, 1 ); if( image.empty() ) { capture = cvCaptureFromAVI( inputName.c_str() ); if(!capture) cout << "Capture from AVI didn't work" << endl; } } else { image = imread( "lena.jpg", 1 ); if(image.empty()) cout << "Couldn't read lena.jpg" << endl; } cvNamedWindow( "result", 1 ); if( capture ) { cout << "In capture ..." << endl; for(;;) { IplImage* iplImg = cvQueryFrame( capture ); frame = iplImg; if( frame.empty() ) break; if( iplImg->origin == IPL_ORIGIN_TL ) frame.copyTo( frameCopy ); else flip( frame, frameCopy, 0 ); detectAndDraw( frameCopy, cascade, nestedCascade, scale ); if( waitKey( 10 ) >= 0 ) goto _cleanup_; } waitKey(0); _cleanup_: cvReleaseCapture( &capture ); } else { cout << "In image read" << endl; if( !image.empty() ) { detectAndDraw( image, cascade, nestedCascade, scale ); waitKey(0); } else if( !inputName.empty() ) { /* assume it is a text file containing the list of the image filenames to be processed - one per line */ FILE* f = fopen( inputName.c_str(), "rt" ); if( f ) { char buf[1000+1]; while( fgets( buf, 1000, f ) ) { int len = (int)strlen(buf), c; while( len > 0 && isspace(buf[len-1]) ) len--; buf[len] = '\0'; cout << "file " << buf << endl; image = imread( buf, 1 ); if( !image.empty() ) { detectAndDraw( image, cascade, nestedCascade, scale ); c = waitKey(0); if( c == 27 || c == 'q' || c == 'Q' ) break; } else { cerr << "Aw snap, couldn't read image " << buf << endl; } } fclose(f); } } } cvDestroyWindow("result"); return 0; } void detectAndDraw( Mat& img, CascadeClassifier& cascade, CascadeClassifier& nestedCascade, double scale) { int i = 0; double t = 0; vector<Rect> faces; const static Scalar colors[] = { CV_RGB(0,0,255), CV_RGB(0,128,255), CV_RGB(0,255,255), CV_RGB(0,255,0), CV_RGB(255,128,0), CV_RGB(255,255,0), CV_RGB(255,0,0), CV_RGB(255,0,255)} ; Mat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 ); cvtColor( img, gray, CV_BGR2GRAY ); resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); equalizeHist( smallImg, smallImg ); t = (double)cvGetTickCount(); cascade.detectMultiScale( smallImg, faces, 1.1, 2, 0 //|CV_HAAR_FIND_BIGGEST_OBJECT //|CV_HAAR_DO_ROUGH_SEARCH |CV_HAAR_SCALE_IMAGE , Size(30, 30) ); t = (double)cvGetTickCount() - t; printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) ); for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ ) { Mat smallImgROI; vector<Rect> nestedObjects; Point center; Scalar color = colors[i%8]; int radius; center.x = cvRound((r->x + r->width*0.5)*scale); center.y = cvRound((r->y + r->height*0.5)*scale); radius = cvRound((r->width + r->height)*0.25*scale); circle( img, center, radius, color, 3, 8, 0 ); if( nestedCascade.empty() ) continue; smallImgROI = smallImg(*r); nestedCascade.detectMultiScale( smallImgROI, nestedObjects, 1.1, 2, 0 //|CV_HAAR_FIND_BIGGEST_OBJECT //|CV_HAAR_DO_ROUGH_SEARCH //|CV_HAAR_DO_CANNY_PRUNING |CV_HAAR_SCALE_IMAGE , Size(30, 30) ); for( vector<Rect>::const_iterator nr = nestedObjects.begin(); nr != nestedObjects.end(); nr++ ) { center.x = cvRound((r->x + nr->x + nr->width*0.5)*scale); center.y = cvRound((r->y + nr->y + nr->height*0.5)*scale); radius = cvRound((nr->width + nr->height)*0.25*scale); circle( img, center, radius, color, 3, 8, 0 ); } } cv::imshow( "result", img ); }
Change the void detectAndDraw to int detectAndDraw The int detectAndDraw will return faces.end()-faces.begin(); and you will get the number of faces in a picture. hope this will help