iOS FFMPEG hight level API - ios

I have video file with subtitles and I'd like to get all subtitles from it. With terminal it's quite easy to do this.
ffmpeg -i video.mkv -map 0:s:0 subs.srt
How can I execute this command on iOS?
Edit
Or maybe you know easy way to get subtitles from video file? Fails on av_guess_format returns NULL.
+ (void)readSubtitles:(NSString *)videoPath saveFolder:(NSString *)saveFolder {
AVFormatContext *pFormatCtx;
av_register_all();
avcodec_register_all();
avformat_network_init();
pFormatCtx = avformat_alloc_context();
if (avformat_open_input(&pFormatCtx, [videoPath UTF8String], NULL, NULL) != 0) {
return;
}
if (avformat_find_stream_info(pFormatCtx, NULL) < 0) {
return;
}
for (int i = 0; i < pFormatCtx->nb_streams; i++) {
if (pFormatCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_SUBTITLE) {
NSString *subPath = [saveFolder stringByAppendingPathComponent:[NSString stringWithFormat:#"sub_%d.srt", i]];
[self parseSubtitles:pFormatCtx streamIdx:i savePath:subPath];
}
}
}
+ (void)parseSubtitles:(AVFormatContext *)context streamIdx:(int)idx savePath:(NSString *)savePath {
const char *filename = [savePath UTF8String];
AVStream *avstream = context->streams[idx];
AVCodec *codec = avcodec_find_decoder( avstream->codec->codec_id );
int result = avcodec_open2( avstream->codec, codec, NULL );
AVOutputFormat *outFormat = av_guess_format( NULL, "sub.mp4", NULL );
NSAssert(outFormat != NULL, #"Error finding format"); // !!! fails !!!
NSLog(#"Found output format: %# (%#)", [NSString stringWithUTF8String:outFormat->name], [NSString stringWithUTF8String:outFormat->long_name]);
AVFormatContext *outFormatContext;
avformat_alloc_output_context2( &outFormatContext, NULL, NULL, filename );
AVCodec *encoder = avcodec_find_encoder( outFormat->subtitle_codec );
// checkResult( encoder != NULL, "Error finding encoder" );
NSLog(#"Found encoder: %#", [NSString stringWithUTF8String:encoder->name]);
AVStream *outStream = avformat_new_stream( outFormatContext, encoder );
AVCodecContext *c = outStream->codec;
result = avcodec_get_context_defaults3( c, encoder );
// outStream->codecpar
NSLog(#"outstream codec: %#", [NSString stringWithUTF8String:outStream->codec]);
NSLog(#"Opened stream %d, codec=%d", outStream->id, outStream->codec->codec_id);
result = avio_open( &outFormatContext->pb, filename, AVIO_FLAG_WRITE );
// checkResult( result == 0, "Error opening out file" );
// cerr << "out file opened correctly" << endl;
result = avformat_write_header( outFormatContext, NULL );
// checkResult(result==0, "Error writing header");
// cerr << "header wrote correctly" << endl;
result = 0;
AVPacket pkt;
av_init_packet( &pkt );
pkt.data = NULL;
pkt.size = 0;
// cerr << "srt codec id: " << AV_CODEC_ID_SUBRIP << endl;
while( av_read_frame( context, &pkt ) >= 0 )
{
if(pkt.stream_index != idx)
continue;
int gotSubtitle = 0;
AVSubtitle subtitle;
result = avcodec_decode_subtitle2( avstream->codec, &subtitle, &gotSubtitle, &pkt );
uint64_t bufferSize = 1024 * 1024 ;
uint8_t *buffer = (uint8_t *)malloc(bufferSize * sizeof(uint8_t));
memset(buffer, 0, bufferSize);
if( result >= 0 )
{
result = avcodec_encode_subtitle( outStream->codec, buffer, bufferSize, &subtitle );
// cerr << "Encode subtitle result: " << result << endl;
}
// cerr << "Encoded subtitle: " << buffer << endl;
free(buffer);
}
}

Related

How to get stack trace for C/C++ program in CYGWIN environment?

How to get stack trace for C/C++ program in CYGWIN environment ?
** I was looking for a back trace mechanism, I've compiled some of the solutions found here and made it a small program for quick reference.
My Answers with a code snippet:
#if defined(__CYGWIN__)
#include <Windows.h>
#include <dbghelp.h>
#include <psdk_inc/_dbg_common.h>
#include <cxxabi.h>
#include <cstring>
class Error // Windows version
{
private:
void *stacktrace[MAX_STACKTRACE_SIZE];
size_t stacktrace_size;
public:
const char* message;
Error(const char* m)
: message(m)
, stacktrace_size(0)
{
// Capture the stack, when error is 'hit'
stacktrace_size = CaptureStackBackTrace(0, MAX_STACKTRACE_SIZE, stacktrace, nullptr);
}
void print_backtrace(ostream& out) const
{
SYMBOL_INFO * symbol;
HANDLE process;
size_t length;
process = GetCurrentProcess();
SymInitialize(process, nullptr, TRUE);
symbol = (SYMBOL_INFO *)calloc(sizeof(SYMBOL_INFO) + 256 * sizeof(char), 1);
symbol->MaxNameLen = 255;
symbol->SizeOfStruct = sizeof(SYMBOL_INFO);
length = strlen (symbol->Name);
std::string result;
char tempStr[255] = {0};
for (int i = 0; i < stacktrace_size; i++)
{
int status = 0;
// '_' is missing in symbol->Name , hence prefix it and concat with symbol->Name
char prefixed_symbol [256] = "_" ;
SymFromAddr(process, (DWORD64)(stacktrace[i]), 0, symbol);
auto backtrace_line = string(symbol->Name);
if (backtrace_line.size() == 0) continue;
// https://en.wikipedia.org/wiki/Name_mangling
// Prefix '_' with symbol name, so that __cxa_demangle does the job correctly
// $ c++filt -n _Z9test_ringI12SmallIntegerIhEEvRK4RingIT_E
strcat (prefixed_symbol, symbol->Name);
char * demangled_name = abi::__cxa_demangle(prefixed_symbol, nullptr, nullptr, &status);
if(status < 0)
{
sprintf(tempStr, "%i: %s - 0x%0X\n", stacktrace_size-i-1, symbol->Name, symbol->Address);
// out << symbol->Name << endl;
}
else
{
sprintf(tempStr, "%i: %s - 0x%0X\n", stacktrace_size - i - 1, demangled_name, symbol->Address);
// out << demangled_name << endl;
}
// Append the extracted info to the result
result += tempStr;
// Free the HEAP allocation made by __cxa_demangle
free((void*)demangled_name);
// Restore the prefix '_' string
prefixed_symbol [1] = '\0';
}
std::cout << result << std::endl;
free(symbol);
}
};
int main ()
{
try {
do_something ();
if (false == status) throw Error("SystemError");
}
catch (const Error &error)
{
cout << "NotImplementedError(\"" << error.message << "\")" << endl;
error.print_backtrace(cout);
return 1;
}
#endif
Command Line Option:
// Use -limagehlp to link the library
g++ -std=c++20 main.cpp -limagehlp

OpenCL Read Back bus speed extensive drop

I have a simple OpenCL host code, which is writing some amount of data into an OpenCL enabled device's buffer and then read it back into the host memory. It does this experiment for different buffer sizes. Here is the code I use:
#include <iostream>
#include "support.h"
#include "Event.h"
#include "ResultDatabase.h"
#include "OptionParser.h"
using namespace std;
void addBenchmarkSpecOptions(OptionParser &op)
{
op.addOption("nopinned", OPT_BOOL, "",
"disable usage of pinned (pagelocked) memory");
}
// Modifications:
// Jeremy Meredith, Wed Dec 1 17:05:27 EST 2010
// Added calculation of latency estimate.
void RunBenchmark(cl_device_id id,
cl_context ctx,
cl_command_queue queue,
ResultDatabase &resultDB,
OptionParser &op)
{
bool verbose = op.getOptionBool("verbose");
bool pinned = !op.getOptionBool("nopinned");
int npasses = op.getOptionInt("passes");
const bool waitForEvents = true;
// Sizes are in kb
int nSizes = 20;
int sizes[20] = {1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,
32768,65536,131072,262144,524288};
// Max sure we don't surpass the OpenCL limit.
cl_long maxAllocSizeBytes = 0;
clGetDeviceInfo(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
sizeof(cl_long), &maxAllocSizeBytes, NULL);
while (sizes[nSizes-1]*1024 > 0.90 * maxAllocSizeBytes)
{
--nSizes;
if (verbose) cout << " - dropping allocation size to keep under reported limit.\n";
if (nSizes < 1)
{
cerr << "Error: OpenCL reported a max allocation size less than 1kB.\b";
return;
}
}
// Create some host memory pattern
if (verbose) cout << ">> creating host mem pattern\n";
int err;
float *hostMem1;
float *hostMem2;
cl_mem hostMemObj1;
cl_mem hostMemObj2;
long long numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
if (pinned)
{
int err1, err2;
hostMemObj1 = clCreateBuffer(ctx,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
sizeof(float)*numMaxFloats, NULL, &err1);
if (err1 == CL_SUCCESS)
{
hostMem1 = (float*)clEnqueueMapBuffer(queue, hostMemObj1, true,
CL_MAP_READ|CL_MAP_WRITE,
0,sizeof(float)*numMaxFloats,0,
NULL,NULL,&err1);
}
hostMemObj2 = clCreateBuffer(ctx,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
sizeof(float)*numMaxFloats, NULL, &err2);
if (err2 == CL_SUCCESS)
{
hostMem2 = (float*)clEnqueueMapBuffer(queue, hostMemObj2, true,
CL_MAP_READ|CL_MAP_WRITE,
0,sizeof(float)*numMaxFloats,0,
NULL,NULL,&err2);
}
while (err1 != CL_SUCCESS || err2 != CL_SUCCESS)
{
// free the first buffer if only the second failed
if (err1 == CL_SUCCESS)
clReleaseMemObject(hostMemObj1);
// drop the size and try again
if (verbose) cout << " - dropping size allocating pinned mem\n";
--nSizes;
if (nSizes < 1)
{
cerr << "Error: Couldn't allocated any pinned buffer\n";
return;
}
numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
hostMemObj1 = clCreateBuffer(ctx,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
sizeof(float)*numMaxFloats, NULL, &err1);
if (err1 == CL_SUCCESS)
{
hostMem1 = (float*)clEnqueueMapBuffer(queue, hostMemObj1, true,
CL_MAP_READ|CL_MAP_WRITE,
0,sizeof(float)*numMaxFloats,0,
NULL,NULL,&err1);
}
hostMemObj2 = clCreateBuffer(ctx,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
sizeof(float)*numMaxFloats, NULL, &err2);
if (err2 == CL_SUCCESS)
{
hostMem2 = (float*)clEnqueueMapBuffer(queue, hostMemObj2, true,
CL_MAP_READ|CL_MAP_WRITE,
0,sizeof(float)*numMaxFloats,0,
NULL,NULL,&err2);
}
}
}
else
{
hostMem1 = new float[numMaxFloats];
hostMem2 = new float[numMaxFloats];
}
for (int i=0; i<numMaxFloats; i++) {
hostMem1[i] = i % 77;
hostMem2[i] = -1;
}
// Allocate some device memory
if (verbose) cout << ">> allocating device mem\n";
cl_mem mem1 = clCreateBuffer(ctx, CL_MEM_READ_WRITE,
sizeof(float)*numMaxFloats, NULL, &err);
while (err != CL_SUCCESS)
{
// drop the size and try again
if (verbose) cout << " - dropping size allocating device mem\n";
--nSizes;
if (nSizes < 1)
{
cerr << "Error: Couldn't allocated any device buffer\n";
return;
}
numMaxFloats = 1024 * (sizes[nSizes-1]) / 4;
mem1 = clCreateBuffer(ctx, CL_MEM_READ_WRITE,
sizeof(float)*numMaxFloats, NULL, &err);
}
if (verbose) cout << ">> filling device mem to force allocation\n";
Event evDownloadPrime("DownloadPrime");
err = clEnqueueWriteBuffer(queue, mem1, false, 0,
numMaxFloats*sizeof(float), hostMem1,
0, NULL, &evDownloadPrime.CLEvent());
CL_CHECK_ERROR(err);
if (verbose) cout << ">> waiting for download to finish\n";
err = clWaitForEvents(1, &evDownloadPrime.CLEvent());
CL_CHECK_ERROR(err);
// Three passes, forward and backward both
for (int pass = 0; pass < npasses; pass++)
{
// store the times temporarily to estimate latency
//float times[nSizes];
// Step through sizes forward on even passes and backward on odd
for (int i = 0; i < nSizes; i++)
{
int sizeIndex;
if ((pass%2) == 0)
sizeIndex = i;
else
sizeIndex = (nSizes-1) - i;
// Read memory back from the device
if (verbose) cout << ">> reading from device "<<sizes[sizeIndex]<<"kB\n";
Event evReadback("Readback");
err = clEnqueueReadBuffer(queue, mem1, false, 0,
sizes[sizeIndex]*1024, hostMem2,
0, NULL, &evReadback.CLEvent());
CL_CHECK_ERROR(err);
// Wait for event to finish
if (verbose) cout << ">> waiting for readback to finish\n";
err = clWaitForEvents(1, &evReadback.CLEvent());
CL_CHECK_ERROR(err);
if (verbose) cout << ">> finish!";
if (verbose) cout << endl;
// Get timings
err = clFlush(queue);
CL_CHECK_ERROR(err);
evReadback.FillTimingInfo();
if (verbose) evReadback.Print(cerr);
double t = evReadback.SubmitEndRuntime() / 1.e6; // in ms
//times[sizeIndex] = t;
// Add timings to database
double speed = (double(sizes[sizeIndex] * 1024.) / (1000.*1000.)) / t;
char sizeStr[256];
sprintf(sizeStr, "% 7dkB", sizes[sizeIndex]);
resultDB.AddResult("ReadbackSpeed", sizeStr, "GB/sec", speed);
// Add timings to database
double delay = evReadback.SubmitStartDelay() / 1.e6;
resultDB.AddResult("ReadbackDelay", sizeStr, "ms", delay);
resultDB.AddResult("ReadbackTime", sizeStr, "ms", t);
}
//resultDB.AddResult("ReadbackLatencyEstimate", "1-2kb", "ms", times[0]-(times[1]-times[0])/1.);
//resultDB.AddResult("ReadbackLatencyEstimate", "1-4kb", "ms", times[0]-(times[2]-times[0])/3.);
//resultDB.AddResult("ReadbackLatencyEstimate", "2-4kb", "ms", times[1]-(times[2]-times[1])/1.);
}
// Cleanup
err = clReleaseMemObject(mem1);
CL_CHECK_ERROR(err);
if (pinned)
{
err = clReleaseMemObject(hostMemObj1);
CL_CHECK_ERROR(err);
err = clReleaseMemObject(hostMemObj2);
CL_CHECK_ERROR(err);
}
else
{
delete[] hostMem1;
delete[] hostMem2;
}
}
while running the code and plotting the speed against the buffer size, I see this:
As you can see, the performance drops for data sizes over 1024KB. And then stays the same at some point.
My Device is a Tesla K40 and I'm using Nvidia's OpenCL driver. But still do not understand the main reason behind such a weird behaviour.

Car detection using HOG features and cvsvm

I am doing a project for which I need to detect the rear of a car using HOG features. Once I calculated the HOG features I trained the cvsvm using positive and negative samples. cvsvm is correctly classifying the new data. Here is my code that I used to train cvsvm.
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/ml/ml.hpp>
#include "opencv2/opencv.hpp"
#include "LinearSVM.h"
using namespace cv;
using namespace std;
int main(void)
{
LinearSVM *s = new LinearSVM;
vector<float> values, values1, values2, values3, values4;
FileStorage fs2("/home/ubuntu/Desktop/opencv-svm/vecSupport.yml", FileStorage::READ);
FileStorage fs3("/home/ubuntu/Desktop/opencv-svm/vecSupport1.yml", FileStorage::READ);
FileStorage fs4("/home/ubuntu/Desktop/opencv-svm/vecSupport2.yml", FileStorage::READ);
FileStorage fs5("/home/ubuntu/Desktop/opencv-svm/vecSupport3.yml", FileStorage::READ);
FileStorage fs6("/home/ubuntu/Desktop/opencv-svm/vecSupport4.yml", FileStorage::READ);
fs2["vector"]>>values;
fs3["vector"]>>values1;
fs4["vector"]>>values2;
fs5["vector"]>>values3;
fs6["vector"]>>values4;
//fill with data
values.insert(values.end(), values1.begin(), values1.end());
values.insert(values.end(), values2.begin(), values2.end());
fs2.release();
fs3.release();
fs4.release();
float arr[188496];
float car[2772];
float noncar[2772];
// move positive and negative to arr
std::copy(values.begin(), values.end(), arr);
std::copy(values3.begin(), values3.end(), car);
std::copy(values4.begin(), values4.end(), noncar);
float labels[68];
for (unsigned int s = 0; s < 68; s++)
{
if (s<34)
labels[s] = +1;
else
labels[s] = -1;
}
Mat labelsMat(68, 1, CV_32FC1, labels);
Mat trainingDataMat(68,2772, CV_32FC1, arr);
// Set up SVM's parameters
CvSVMParams params;
params.svm_type = CvSVM::C_SVC;
params.kernel_type = CvSVM::LINEAR;
params.term_crit = cvTermCriteria(CV_TERMCRIT_ITER, 100, 1e-6);
// Train the SVM
LinearSVM SVM;
SVM.train(trainingDataMat, labelsMat, Mat(), Mat(), params);
Mat matinput(1,2772,CV_32FC1,noncar);
//cout<<matinput;
float response = SVM.predict(matinput);
cout<<"Response : "<<response<<endl;
SVM.save("Classifier.xml");
vector<float>primal;
// LinearSVM s;
//s.getSupportVector(primal);
SVM.getSupportVector(primal);
FileStorage fs("/home/ubuntu/Desktop/opencv-svm/test.yml", FileStorage::WRITE);
fs << "dector" << primal;
fs.release();
}
// LinearSVM cpp file
#include "LinearSVM.h"
void LinearSVM::getSupportVector(std::vector<float>& support_vector) const {
int sv_count = get_support_vector_count();
const CvSVMDecisionFunc* df = decision_func;
const double* alphas = df[0].alpha;
double rho = df[0].rho;
int var_count = get_var_count();
support_vector.resize(var_count, 0);
for (unsigned int r = 0; r < (unsigned)sv_count; r++) {
float myalpha = alphas[r];
const float* v = get_support_vector(r);
for (int j = 0; j < var_count; j++,v++) {
support_vector[j] += (-myalpha) * (*v);
}
}
support_vector.push_back(rho);
}
// LinearSVM head file
#ifndef LINEAR_SVM_H_
#define LINEAR_SVM_H_
#include <opencv2/core/core.hpp>
#include <opencv2/ml/ml.hpp>
class LinearSVM: public CvSVM {
public:
void getSupportVector(std::vector<float>& support_vector) const;
};
#endif /* LINEAR_SVM_H_ */
After this step I got the vector file that I can fed into setsvmdetector method. Here is my code. I have used window size of 96 x 64 and scale of 1.11
#include <iostream>
#include <fstream>
#include <string>
#include <time.h>
#include <iostream>
#include <sstream>
#include <iomanip>
#include <stdexcept>
#include <stdexcept>
#include "opencv2/gpu/gpu.hpp"
#include "opencv2/highgui/highgui.hpp"
using namespace std;
using namespace cv;
bool help_showed = false;
class Args
{
public:
Args();
static Args read(int argc, char** argv);
string src;
bool src_is_video;
bool src_is_camera;
int camera_id;
bool write_video;
string dst_video;
double dst_video_fps;
bool make_gray;
bool resize_src;
int width, height;
double scale;
int nlevels;
int gr_threshold;
double hit_threshold;
bool hit_threshold_auto;
int win_width;
int win_stride_width, win_stride_height;
bool gamma_corr;
};
class App
{
public:
App(const Args& s);
void run();
void handleKey(char key);
void hogWorkBegin();
void hogWorkEnd();
string hogWorkFps() const;
void workBegin();
void workEnd();
string workFps() const;
string message() const;
private:
App operator=(App&);
Args args;
bool running;
bool use_gpu;
bool make_gray;
double scale;
int gr_threshold;
int nlevels;
double hit_threshold;
bool gamma_corr;
int64 hog_work_begin;
double hog_work_fps;
int64 work_begin;
double work_fps;
};
static void printHelp()
{
cout << "Histogram of Oriented Gradients descriptor and detector sample.\n"
<< "\nUsage: hog_gpu\n"
<< " (<image>|--video <vide>|--camera <camera_id>) # frames source\n"
<< " [--make_gray <true/false>] # convert image to gray one or not\n"
<< " [--resize_src <true/false>] # do resize of the source image or not\n"
<< " [--width <int>] # resized image width\n"
<< " [--height <int>] # resized image height\n"
<< " [--hit_threshold <double>] # classifying plane distance threshold (0.0 usually)\n"
<< " [--scale <double>] # HOG window scale factor\n"
<< " [--nlevels <int>] # max number of HOG window scales\n"
<< " [--win_width <int>] # width of the window (48 or 64)\n"
<< " [--win_stride_width <int>] # distance by OX axis between neighbour wins\n"
<< " [--win_stride_height <int>] # distance by OY axis between neighbour wins\n"
<< " [--gr_threshold <int>] # merging similar rects constant\n"
<< " [--gamma_correct <int>] # do gamma correction or not\n"
<< " [--write_video <bool>] # write video or not\n"
<< " [--dst_video <path>] # output video path\n"
<< " [--dst_video_fps <double>] # output video fps\n";
help_showed = true;
}
int main(int argc, char** argv)
{
try
{
if (argc < 2)
printHelp();
Args args = Args::read(argc, argv);
if (help_showed)
return -1;
App app(args);
app.run();
}
catch (const Exception& e) { return cout << "error: " << e.what() << endl, 1; }
catch (const exception& e) { return cout << "error: " << e.what() << endl, 1; }
catch(...) { return cout << "unknown exception" << endl, 1; }
return 0;
}
Args::Args()
{
src_is_video = false;
src_is_camera = false;
camera_id = 0;
write_video = false;
dst_video_fps = 24.;
make_gray = false;
resize_src = false;
width = 640;
height = 480;
scale = 1.11;
nlevels = 13;
gr_threshold = 1;
hit_threshold = 1.4;
hit_threshold_auto = true;
win_width = 64;
win_stride_width = 8;
win_stride_height = 8;
gamma_corr = true;
}
Args Args::read(int argc, char** argv)
{
Args args;
for (int i = 1; i < argc; i++)
{
if (string(argv[i]) == "--make_gray") args.make_gray = (string(argv[++i]) == "true");
else if (string(argv[i]) == "--resize_src") args.resize_src = (string(argv[++i]) == "true");
else if (string(argv[i]) == "--width") args.width = atoi(argv[++i]);
else if (string(argv[i]) == "--height") args.height = atoi(argv[++i]);
else if (string(argv[i]) == "--hit_threshold")
{
args.hit_threshold = atof(argv[++i]);
args.hit_threshold_auto = false;
}
else if (string(argv[i]) == "--scale") args.scale = atof(argv[++i]);
else if (string(argv[i]) == "--nlevels") args.nlevels = atoi(argv[++i]);
else if (string(argv[i]) == "--win_width") args.win_width = atoi(argv[++i]);
else if (string(argv[i]) == "--win_stride_width") args.win_stride_width = atoi(argv[++i]);
else if (string(argv[i]) == "--win_stride_height") args.win_stride_height = atoi(argv[++i]);
else if (string(argv[i]) == "--gr_threshold") args.gr_threshold = atoi(argv[++i]);
else if (string(argv[i]) == "--gamma_correct") args.gamma_corr = (string(argv[++i]) == "true");
else if (string(argv[i]) == "--write_video") args.write_video = (string(argv[++i]) == "true");
else if (string(argv[i]) == "--dst_video") args.dst_video = argv[++i];
else if (string(argv[i]) == "--dst_video_fps") args.dst_video_fps = atof(argv[++i]);
else if (string(argv[i]) == "--help") printHelp();
else if (string(argv[i]) == "--video") { args.src = argv[++i]; args.src_is_video = true; }
else if (string(argv[i]) == "--camera") { args.camera_id = atoi(argv[++i]); args.src_is_camera = true; }
else if (args.src.empty()) args.src = argv[i];
else throw runtime_error((string("unknown key: ") + argv[i]));
}
return args;
}
App::App(const Args& s)
{
cv::gpu::printShortCudaDeviceInfo(cv::gpu::getDevice());
args = s;
cout << "\nControls:\n"
<< "\tESC - exit\n"
<< "\tm - change mode GPU <-> CPU\n"
<< "\tg - convert image to gray or not\n"
<< "\t1/q - increase/decrease HOG scale\n"
<< "\t2/w - increase/decrease levels count\n"
<< "\t3/e - increase/decrease HOG group threshold\n"
<< "\t4/r - increase/decrease hit threshold\n"
<< endl;
use_gpu = true;
make_gray = args.make_gray;
scale = args.scale;
gr_threshold = args.gr_threshold;
nlevels = args.nlevels;
if (args.hit_threshold_auto)
args.hit_threshold = args.win_width == 48 ? 1.4 : 0.;
hit_threshold = args.hit_threshold;
gamma_corr = args.gamma_corr;
/*
if (args.win_width != 64 && args.win_width != 48)
args.win_width = 64;*/
cout << "Scale: " << scale << endl;
if (args.resize_src)
cout << "Resized source: (" << args.width << ", " << args.height << ")\n";
cout << "Group threshold: " << gr_threshold << endl;
cout << "Levels number: " << nlevels << endl;
cout << "Win width: " << args.win_width << endl;
cout << "Win stride: (" << args.win_stride_width << ", " << args.win_stride_height << ")\n";
cout << "Hit threshold: " << hit_threshold << endl;
cout << "Gamma correction: " << gamma_corr << endl;
cout << endl;
}
void App::run()
{
FileStorage fs("/home/ubuntu/Desktop/implemenatation/vecSupport.yml", FileStorage::READ);
vector<float> detector;
int frameCount;
fs["vector"] >> detector;
for (unsigned int i=0; i<detector.size(); i++)
{
std::cout << std::fixed << std::setprecision(10) << detector[i] << std::endl;
}
fs.release();
running = true;
cv::VideoWriter video_writer;
Size win_size(96,64); //(64, 128) or (48, 96)
Size win_stride(args.win_stride_width, args.win_stride_height);
// Create HOG descriptors and detectors here
/*
vector<float> detector;
if (win_size == Size(64, 128))
detector = cv::gpu::HOGDescriptor::getPeopleDetector64x128();
else
detector = cv::gpu::HOGDescriptor::getPeopleDetector48x96();*/
cv::gpu::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
cv::gpu::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
cv::gpu::HOGDescriptor::DEFAULT_NLEVELS);
cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
gpu_hog.setSVMDetector(detector);
cpu_hog.setSVMDetector(detector);
while (running)
{
VideoCapture vc;
Mat frame;
if (args.src_is_video)
{
vc.open(args.src.c_str());
if (!vc.isOpened())
throw runtime_error(string("can't open video file: " + args.src));
vc >> frame;
}
else if (args.src_is_camera)
{
vc.open(args.camera_id);
if (!vc.isOpened())
{
stringstream msg;
msg << "can't open camera: " << args.camera_id;
throw runtime_error(msg.str());
}
vc >> frame;
}
else
{
frame = imread(args.src);
if (frame.empty())
throw runtime_error(string("can't open image file: " + args.src));
}
Mat img_aux, img, img_to_show;
gpu::GpuMat gpu_img;
// Iterate over all frames
while (running && !frame.empty())
{
workBegin();
// Change format of the image
if (make_gray) cvtColor(frame, img_aux, CV_BGR2GRAY);
else if (use_gpu) cvtColor(frame, img_aux, CV_BGR2BGRA);
else frame.copyTo(img_aux);
// Resize image
if (args.resize_src) resize(img_aux, img, Size(args.width, args.height));
else img = img_aux;
img_to_show = img;
gpu_hog.nlevels = nlevels;
cpu_hog.nlevels = nlevels;
vector<Rect> found;
// Perform HOG classification
hogWorkBegin();
if (use_gpu)
{
gpu_img.upload(img);
gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride,
Size(0, 0), scale, gr_threshold);
}
else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
Size(0, 0), scale, gr_threshold);
hogWorkEnd();
// Draw positive classified windows
for (size_t i = 0; i < found.size(); i++)
{
Rect r = found[i];
rectangle(img_to_show, r.tl(), r.br(), CV_RGB(0, 255, 0), 3);
}
if (use_gpu)
putText(img_to_show, "Mode: GPU", Point(5, 25), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
else
putText(img_to_show, "Mode: CPU", Point(5, 25), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
putText(img_to_show, "FPS (HOG only): " + hogWorkFps(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
putText(img_to_show, "FPS (total): " + workFps(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
imshow("opencv_gpu_hog", img_to_show);
if (args.src_is_video || args.src_is_camera) vc >> frame;
workEnd();
if (args.write_video)
{
if (!video_writer.isOpened())
{
video_writer.open(args.dst_video, CV_FOURCC('x','v','i','d'), args.dst_video_fps,
img_to_show.size(), true);
if (!video_writer.isOpened())
throw std::runtime_error("can't create video writer");
}
if (make_gray) cvtColor(img_to_show, img, CV_GRAY2BGR);
else cvtColor(img_to_show, img, CV_BGRA2BGR);
video_writer << img;
}
handleKey((char)waitKey(3));
}
}
}
void App::handleKey(char key)
{
switch (key)
{
case 27:
running = false;
break;
case 'm':
case 'M':
use_gpu = !use_gpu;
cout << "Switched to " << (use_gpu ? "CUDA" : "CPU") << " mode\n";
break;
case 'g':
case 'G':
make_gray = !make_gray;
cout << "Convert image to gray: " << (make_gray ? "YES" : "NO") << endl;
break;
case '1':
scale *= 1.11;
cout << "Scale: " << scale << endl;
break;
case 'q':
case 'Q':
scale /= 1.11;
cout << "Scale: " << scale << endl;
break;
case '2':
nlevels++;
cout << "Levels number: " << nlevels << endl;
break;
case 'w':
case 'W':
nlevels = max(nlevels - 1, 1);
cout << "Levels number: " << nlevels << endl;
break;
case '3':
gr_threshold++;
cout << "Group threshold: " << gr_threshold << endl;
break;
case 'e':
case 'E':
gr_threshold = max(0, gr_threshold - 1);
cout << "Group threshold: " << gr_threshold << endl;
break;
case '4':
hit_threshold+=0.25;
cout << "Hit threshold: " << hit_threshold << endl;
break;
case 'r':
case 'R':
hit_threshold = max(0.0, hit_threshold - 0.25);
cout << "Hit threshold: " << hit_threshold << endl;
break;
case 'c':
case 'C':
gamma_corr = !gamma_corr;
cout << "Gamma correction: " << gamma_corr << endl;
break;
}
}
inline void App::hogWorkBegin() { hog_work_begin = getTickCount(); }
inline void App::hogWorkEnd()
{
int64 delta = getTickCount() - hog_work_begin;
double freq = getTickFrequency();
hog_work_fps = freq / delta;
}
inline string App::hogWorkFps() const
{
stringstream ss;
ss << hog_work_fps;
return ss.str();
}
inline void App::workBegin() { work_begin = getTickCount(); }
inline void App::workEnd()
{
int64 delta = getTickCount() - work_begin;
double freq = getTickFrequency();
work_fps = freq / delta;
}
inline string App::workFps() const
{
stringstream ss;
ss << work_fps;
return ss.str();
}
Problem:
I am not able to detect anything. Can someone look at my work and can let me know what I am doing wrong. Any suggestions would be valuable. Thank you. From last four weeks I am doing these steps over and over again.
P.S: You can find yaml files here and test images along with the annotations here
First of all, partition your data for cross-validation as suggested already. Second thing is that it is a good idea to use RBF kernel rather than Linear kernel. I highly doubt that a linear kernel can learn complex objects. A brief explanation is given here. Finally, experiment with the parameters. To do that, you need to check the limits of the parameter space, it's been a while since I haven't used SVMs therefore I cannot provide any details but a grid search with 20% cross-validation is a good start.

Kinect 2 failing to 'AcquireLatestFrame'

I am using the following code from a sample I found online. It seems to pick up one frame but subsequently never succeeds to 'AcquireLatestFrame'. I have the exact same problem with the body reader. Can anyone see an issue that might be causing this?
IKinectSensor* pSensor;
HRESULT hResult = S_OK;
hResult = GetDefaultKinectSensor(&pSensor);
if (FAILED(hResult)) {
std::cerr << "Error : GetDefaultKinectSensor" << std::endl;
return -1;
}
hResult = pSensor->Open();
if (FAILED(hResult)) {
std::cerr << "Error : IKinectSensor::Open()" << std::endl;
return -1;
}
// Source
IColorFrameSource* pColorSource;
hResult = pSensor->get_ColorFrameSource(&pColorSource);
if (FAILED(hResult)) {
std::cerr << "Error : IKinectSensor::get_ColorFrameSource()" << std::endl;
return -1;
}
// Reader
IColorFrameReader* pColorReader;
hResult = pColorSource->OpenReader(&pColorReader);
if (FAILED(hResult)) {
std::cerr << "Error : IColorFrameSource::OpenReader()" << std::endl;
return -1;
}
// Description
IFrameDescription* pDescription;
hResult = pColorSource->get_FrameDescription(&pDescription);
if (FAILED(hResult)) {
std::cerr << "Error : IColorFrameSource::get_FrameDescription()" << std::endl;
return -1;
}
int width = 0;
int height = 0;
pDescription->get_Width(&width); // 1920
pDescription->get_Height(&height); // 1080
unsigned int bufferSize = width * height * 4 * sizeof(unsigned char);
cv::Mat bufferMat(height, width, CV_8UC4);
cv::Mat colorMat(height / 2, width / 2, CV_8UC4);
cv::namedWindow("Color");
while (1) {
// Frame
IColorFrame* pColorFrame = nullptr;
hResult = pColorReader->AcquireLatestFrame(&pColorFrame);
if (SUCCEEDED(hResult)) {
hResult = pColorFrame->CopyConvertedFrameDataToArray(bufferSize, reinterpret_cast<BYTE*>(bufferMat.data), ColorImageFormat::ColorImageFormat_Bgra);
if (SUCCEEDED(hResult)) {
cv::resize(bufferMat, colorMat, cv::Size(), 0.5, 0.5);
}
}
else
{
cout << "Can't aquire latest frame.\n";
}
cv::imshow("Color", colorMat);
if (cv::waitKey(30) == VK_ESCAPE) {
break;
}
}
if (pSensor) {
pSensor->Close();
}
cv::destroyAllWindows();
I wasn't releasing the pColorFrame. Doing so solved the issue.

OpenCV Error: Assertion failed (ni > 0 && ni == ni1)

I have the following problem: Everything works fine, but when i reach the calibrateCamera Part i get the following error:
OpenCV Error: Assertion failed (ni > 0 && ni == ni1) in unknown function, file .
.....\src\opencv\modules\calib3d\src\calibration.cpp, line 3197
I am new here, and maybe my code isn't as clear as it could be, but please be friendly.
Thank you very much in advance.
My code is: (i deleted the include and pragma parts, because it got really big letters here)
int _tmain()
{
printf("Everything loaded. Press Enter to continue.\n\a");
getch();
system("cls"); //bildschirm clearen
int numBoards = 0;
int numCornersHor;
int numCornersVer;
char stCurPath[200];
int numFiles = 0;
char stRemFileNum[200];
int curNum;
vector<string> file_names;
string s;
bool pathok = false;
#pragma region ask user for path to load images and list them
// ask user for path to load images and list them
while(pathok == false)
{
fflush(stdin);
printf("Enter path to the folder where the pictures are:\n");
scanf("%199[^\n]s", stCurPath); //achtung wegen bufferoverflow - nicht mehr als 199 zeichen + EOF (0) einlesen.
//[^\n] wegen Leerzeichen. scanf liest bis Enter.
system("cls");
DIR *dir;
struct dirent *ent;
dir = opendir (stCurPath);
if (dir != NULL) {
/* print all the files and directories within directory */
printf("Your chosen path:\n%s\n\nFiletree of this path:\n", stCurPath);
while ((ent = readdir (dir)) != NULL)
{
numFiles++; //Anzahl der Files
printf ("%d)\t%s\n",numFiles, ent->d_name);
s = ent->d_name;
file_names.push_back(s);
}
closedir (dir);
fflush( stdout );
printf ("\nNumber of found files: %d\n", numFiles);
pathok = true;
}
else
{
/* could not open directory */
printf ("Could not open directory. Make sure path is ok!\n\n");
pathok = false;
/*perror ("");
return EXIT_FAILURE*/;
}
}//while(pathok == false)
/*****************************end ask user for path to load images and list them****************************************/
#pragma endregion
#pragma region ask user to exclude some files
/*****************************************ask user to exclude some files*************************/
printf ("\nEnter the number of the files you dont want to load (number only!).\nSeperate single files with comma.\nEnter 0 if you don't want to exclude files.\nEnter x to exclude all non bmp files.\n\nNumbers:\n");
scanf("%199s", stRemFileNum); //achtung wegen bufferoverflow - nicht mehr als 199 zeichen + EOF (0) einlesen
vector<string>::iterator it;
if(strcmp(stRemFileNum,"x") == 0)
{
curNum=1;
numFiles = 0;
string extstr;
const char * extc;
file_names.erase(remove_if(file_names.begin(),
file_names.end(),
isBmpExtension), file_names.end());
system("cls"); //bildschirm clearen
printf("New Filetree of this path:\n\n");
it = file_names.begin();
for(it; it != file_names.end(); ++it)
{
numFiles++;
printf ("%d)\t%s\n",numFiles, (*it).c_str());
}
printf ("\nNumber of found files: %d\n", numFiles);
}//if(strcmp(stRemFileNum,"x") == 0)
//end user entered x
//start user entered 0
if(strcmp(stRemFileNum,"0") != 0 && strcmp(stRemFileNum,"x") != 0)
{
//fehler
numFiles = 0;
vector<string> numbersVector;
string strNumbers = stRemFileNum;
Tokenize(strNumbers, numbersVector, ",");
sort(numbersVector.begin(), numbersVector.end(), strCompDesc);
for(it = numbersVector.begin(); it != numbersVector.end(); ++it)
{
curNum = atoi((*it).c_str());
file_names.erase(file_names.begin() + (curNum - 1));
}
system("cls"); //bildschirm clearen
printf("New Filetree of this path:\n\n");
//fehler ende
for(vector<string>::iterator it = file_names.begin(); it != file_names.end(); ++it)
{
numFiles++;
printf ("%d)\t%s\n",numFiles, (*it).c_str());
}
printf ("\nNumber of found files: %d\n", numFiles);
}
//user entered 0
else if(strcmp(stRemFileNum,"0") == 0)
{
printf ("\nNo files excluded.\n");
}
/*****************end ask user to exclude some files *******************************************/
#pragma endregion
fflush(stdin);
printf("\nEnter number of corners along width: ");
scanf("%d", &numCornersHor);
fflush(stdin);
printf("Enter number of corners along height: ");
scanf("%d", &numCornersVer);
int numSquares = numCornersHor * numCornersVer;
Size board_sz = Size(numCornersHor, numCornersVer);
vector<vector<Point3f>> object_points;
vector<vector<Point2f>> image_points;
vector<Point2f> corners;
vector<Point3f> obj;
int pictures_done=0;
Mat image;
Mat gray_image;
//Bilder zum persönlichen auswerten anzeigen +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
for(vector<string>::iterator it = file_names.begin(); it != file_names.end(); ++it)
{
printf("\nLoading Picture..\n");
image = imread(string(stCurPath) + "\\" + (*it).c_str()); //als farbe lesen; doppelter backslash o. normaler
//resize(image, image, Size(0,0), 0.5, 0.5, INTER_AREA);
cvtColor(image,gray_image,CV_RGB2GRAY);
for(int j=0;j<numSquares;j++)
{
obj.push_back(Point3f(j/numCornersHor, j%numCornersHor, 0.0f));
}//for(int j=0;j<numSquares;j++)
bool found = findChessboardCorners(image, board_sz, corners, CV_CALIB_CB_ADAPTIVE_THRESH | CV_CALIB_CB_FILTER_QUADS);
if(found)
{
cornerSubPix(gray_image, corners, Size(11, 11), Size(-1, -1), TermCriteria(CV_TERMCRIT_EPS | CV_TERMCRIT_ITER, 30, 0.1));
drawChessboardCorners(gray_image, board_sz, corners, found);
}
cvNamedWindow("win1", 1);
imshow("win1", gray_image);
waitKey(30);
cvMoveWindow("win1",0,0);
waitKey(30);
printf("\nPicture loaded.\nPress a to load original, s to store snap and show next picture,");
printf("\nd to drop snap and show next picture, and f to close the programm\n");
char key;
bool bOriginalDisplayed = false;
while(1)
{
if (cin.rdbuf()->in_avail())
{
key = _getch();
}
if('a' == key && found!=0)
{
if (!bOriginalDisplayed)
{
printf("\nLoading original..\n");
cvNamedWindow("win2", 1);
cvMoveWindow("win2",0,0);
imshow("win2", image); //oder imshow cvMoveWindow("Smile", 100, 100);
bOriginalDisplayed = true;
waitKey(50);
printf("\nOriginal loaded.\nPess a again to close original before you continue.\n");
}
else
{
cvDestroyWindow("win2");
printf("\nOriginal closed.\n");
bOriginalDisplayed = false;
}
}
if('s'==key)
{
image_points.push_back(corners);
object_points.push_back(obj);
printf("\nSnap stored!\n");
pictures_done++;
found = false;
break;
}
if('d' == key)
{
pictures_done++;
break;
found = false;
}
if('f' == key)
{
return 0;
}
Sleep(50);
} //while (1)
} //for(vector<string>::iterator it = file_names.begin(); it != file_names.end(); ++it)
cvDestroyWindow("win1");
waitKey(50);
Mat intrinsic = Mat(3, 3, CV_32FC1);
Mat distCoeffs;
vector<Mat> rvecs;
vector<Mat> tvecs;
intrinsic.ptr<float>(0)[0] = 1;
intrinsic.ptr<float>(1)[1] = 1;
calibrateCamera(object_points, image_points, image.size(), intrinsic, distCoeffs, rvecs, tvecs);
Mat imageUndistorted;
for(vector<string>::iterator it = file_names.begin(); it != file_names.end(); ++it)
{
printf("\nLoading undistorted Picture..\n");
image = imread(string(stCurPath) + "\\" + (*it).c_str()); //als farbe lesen; doppelter backslash o. normaler
//resize(image, image, Size(0,0), 0.5, 0.5, INTER_AREA);
cvtColor(image,gray_image,CV_RGB2GRAY);
cvNamedWindow("win1", 1);
cvNamedWindow("win2", 1);
cvMoveWindow("win1",0,0);
cvMoveWindow("win2",0,0);
undistort(image, imageUndistorted, intrinsic, distCoeffs);
imshow("win1", image);
waitKey(0);
imshow("win2", imageUndistorted);
waitKey(30);
printf("\nPicture loaded. Press s for the next picture or f to exit.");
char key;
bool bOriginalDisplayed = false;
while(1)
{
if (cin.rdbuf()->in_avail())
{
key = _getch();
}
if('s'==key)
{
break;
}
if('f' == key)
{
return 0;
}
Sleep(50);
} //while (1)
} //for(vector<string>::iterator it = file_names.begin(); it != file_names.end(); ++it)
return 0;
}
If found the problem. And me being a good person i came here to post the answer for people having the same issue in the future.
Solution: Add a obj.clear() into the for function so obj always stays same size when you push back. Like this:
//Bilder zum persönlichen auswerten anzeigen +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
for(vector<string>::iterator it = file_names.begin(); it != file_names.end(); ++it)
{
printf("\nLoading Picture..\n");
image = imread(string(stCurPath) + "\\" + (*it).c_str()); //als farbe lesen; doppelter backslash o. normaler
//resize(image, image, Size(0,0), 0.5, 0.5, INTER_AREA);
cvtColor(image,gray_image,CV_RGB2GRAY);
obj.clear();
for(int j=0;j<numSquares;j++)
{
obj.push_back(Point3f(j/numCornersHor, j%numCornersHor, 0.0f));
}//for(int j=0;j<numSquares;j++)
bool found = findChessboardCorners(image, board_sz, corners, CV_CALIB_CB_ADAPTIVE_THRESH | CV_CALIB_CB_FILTER_QUADS);
if(found)
Have fun programming. Greets, Escore.

Resources