OpenCL: Strange buffer or image bahaviour with NVidia but not Amd - buffer

I have a big problem (on Linux):
I create a buffer with defined data, then an OpenCL kernel takes this data and puts it into an image2d_t. When working on an AMD C50 (Fusion CPU/GPU) the program works as desired, but on my GeForce 9500 GT the given kernel computes the correct result very rarely. Sometimes the result is correct, but very often it is incorrect. Sometimes it depends on very strange changes like removing unused variable declarations or adding a newline. I realized that disabling the optimization will increase the probability to fail. I have the most actual display driver in both systems.
Here is my reduced code:
#include <CL/cl.h>
#include <string>
#include <iostream>
#include <sstream>
#include <cmath>
void checkOpenCLErr(cl_int err, std::string name){
const char* errorString[] = {
"CL_SUCCESS",
"CL_DEVICE_NOT_FOUND",
"CL_DEVICE_NOT_AVAILABLE",
"CL_COMPILER_NOT_AVAILABLE",
"CL_MEM_OBJECT_ALLOCATION_FAILURE",
"CL_OUT_OF_RESOURCES",
"CL_OUT_OF_HOST_MEMORY",
"CL_PROFILING_INFO_NOT_AVAILABLE",
"CL_MEM_COPY_OVERLAP",
"CL_IMAGE_FORMAT_MISMATCH",
"CL_IMAGE_FORMAT_NOT_SUPPORTED",
"CL_BUILD_PROGRAM_FAILURE",
"CL_MAP_FAILURE",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"",
"CL_INVALID_VALUE",
"CL_INVALID_DEVICE_TYPE",
"CL_INVALID_PLATFORM",
"CL_INVALID_DEVICE",
"CL_INVALID_CONTEXT",
"CL_INVALID_QUEUE_PROPERTIES",
"CL_INVALID_COMMAND_QUEUE",
"CL_INVALID_HOST_PTR",
"CL_INVALID_MEM_OBJECT",
"CL_INVALID_IMAGE_FORMAT_DESCRIPTOR",
"CL_INVALID_IMAGE_SIZE",
"CL_INVALID_SAMPLER",
"CL_INVALID_BINARY",
"CL_INVALID_BUILD_OPTIONS",
"CL_INVALID_PROGRAM",
"CL_INVALID_PROGRAM_EXECUTABLE",
"CL_INVALID_KERNEL_NAME",
"CL_INVALID_KERNEL_DEFINITION",
"CL_INVALID_KERNEL",
"CL_INVALID_ARG_INDEX",
"CL_INVALID_ARG_VALUE",
"CL_INVALID_ARG_SIZE",
"CL_INVALID_KERNEL_ARGS",
"CL_INVALID_WORK_DIMENSION",
"CL_INVALID_WORK_GROUP_SIZE",
"CL_INVALID_WORK_ITEM_SIZE",
"CL_INVALID_GLOBAL_OFFSET",
"CL_INVALID_EVENT_WAIT_LIST",
"CL_INVALID_EVENT",
"CL_INVALID_OPERATION",
"CL_INVALID_GL_OBJECT",
"CL_INVALID_BUFFER_SIZE",
"CL_INVALID_MIP_LEVEL",
"CL_INVALID_GLOBAL_WORK_SIZE",
};
if (err != CL_SUCCESS) {
std::stringstream str;
str << errorString[-err] << " (" << err << ")";
throw std::string(name)+(str.str());
}
}
int main(){
try{
cl_context m_context;
cl_platform_id* m_platforms;
unsigned int m_numPlatforms;
cl_command_queue m_queue;
cl_device_id m_device;
cl_int error = 0; // Used to handle error codes
clGetPlatformIDs(0,NULL,&m_numPlatforms);
m_platforms = new cl_platform_id[m_numPlatforms];
error = clGetPlatformIDs(m_numPlatforms,m_platforms,&m_numPlatforms);
checkOpenCLErr(error, "getPlatformIDs");
// Device
error = clGetDeviceIDs(m_platforms[0], CL_DEVICE_TYPE_GPU, 1, &m_device, NULL);
checkOpenCLErr(error, "getDeviceIDs");
// Context
cl_context_properties properties[] =
{ CL_CONTEXT_PLATFORM, (cl_context_properties)(m_platforms[0]), 0};
m_context = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, NULL);
// m_private->m_context = clCreateContext(properties, 1, &m_private->m_device, NULL, NULL, &error);
checkOpenCLErr(error, "Create context");
// Command-queue
m_queue = clCreateCommandQueue(m_context, m_device, 0, &error);
checkOpenCLErr(error, "Create command queue");
//Build program and kernel
const char* source = "#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable\n"
"\n"
"__kernel void bufToImage(__global unsigned char* in, __write_only image2d_t out, const unsigned int offset_x, const unsigned int image_width , const unsigned int maxval ){\n"
"\tint i = get_global_id(0);\n"
"\tint j = get_global_id(1);\n"
"\tint width = get_global_size(0);\n"
"\tint height = get_global_size(1);\n"
"\n"
"\tint pos = j*image_width*3+(offset_x+i)*3;\n"
"\tif( maxval < 256 ){\n"
"\t\tfloat4 c = (float4)(in[pos],in[pos+1],in[pos+2],1.0f);\n"
"\t\tc.x /= maxval;\n"
"\t\tc.y /= maxval;\n"
"\t\tc.z /= maxval;\n"
"\t\twrite_imagef(out, (int2)(i,j), c);\n"
"\t}else{\n"
"\t\tfloat4 c = (float4)(255.0f*in[2*pos]+in[2*pos+1],255.0f*in[2*pos+2]+in[2*pos+3],255.0f*in[2*pos+4]+in[2*pos+5],1.0f);\n"
"\t\tc.x /= maxval;\n"
"\t\tc.y /= maxval;\n"
"\t\tc.z /= maxval;\n"
"\t\twrite_imagef(out, (int2)(i,j), c);\n"
"\t}\n"
"}\n"
"\n"
"__constant sampler_t imageSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
"\n"
"__kernel void imageToBuf(__read_only image2d_t in, __global unsigned char* out, const unsigned int offset_x, const unsigned int image_width ){\n"
"\tint i = get_global_id(0);\n"
"\tint j = get_global_id(1);\n"
"\tint pos = j*image_width*3+(offset_x+i)*3;\n"
"\tfloat4 c = read_imagef(in, imageSampler, (int2)(i,j));\n"
"\tif( c.x <= 1.0f && c.y <= 1.0f && c.z <= 1.0f ){\n"
"\t\tout[pos] = c.x*255.0f;\n"
"\t\tout[pos+1] = c.y*255.0f;\n"
"\t\tout[pos+2] = c.z*255.0f;\n"
"\t}else{\n"
"\t\tout[pos] = 200.0f;\n"
"\t\tout[pos+1] = 0.0f;\n"
"\t\tout[pos+2] = 255.0f;\n"
"\t}\n"
"}\n";
cl_int err;
cl_program prog = clCreateProgramWithSource(m_context,1,&source,NULL,&err);
if( -err != CL_SUCCESS ) throw std::string("clCreateProgramWithSources");
err = clBuildProgram(prog,0,NULL,"-cl-opt-disable",NULL,NULL);
if( -err != CL_SUCCESS ) throw std::string("clBuildProgram(fromSources)");
cl_kernel kernel = clCreateKernel(prog,"bufToImage",&err);
checkOpenCLErr(err,"CreateKernel");
cl_uint imageWidth = 80;
cl_uint imageHeight = 90;
//Initialize datas
cl_uint maxVal = 255;
cl_uint offsetX = 0;
int size = imageWidth*imageHeight*3;
int resSize = imageWidth*imageHeight*4;
cl_uchar* data = new cl_uchar[size];
cl_float* expectedData = new cl_float[resSize];
for( int i = 0,j=0; i < size; i++,j++ ){
data[i] = (cl_uchar)i;
expectedData[j] = (cl_float)((unsigned char)i)/255.0f;
if ( i%3 == 2 ){
j++;
expectedData[j] = 1.0f;
}
}
cl_mem inBuffer = clCreateBuffer(m_context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,size*sizeof(cl_uchar),data,&err);
checkOpenCLErr(err, "clCreateBuffer()");
clFinish(m_queue);
cl_image_format imgFormat;
imgFormat.image_channel_order = CL_RGBA;
imgFormat.image_channel_data_type = CL_FLOAT;
cl_mem outImg = clCreateImage2D( m_context, CL_MEM_READ_WRITE, &imgFormat, imageWidth, imageHeight, 0, NULL, &err );
checkOpenCLErr(err,"get2DImage()");
clFinish(m_queue);
size_t kernelRegion[]={imageWidth,imageHeight};
size_t kernelWorkgroup[]={1,1};
//Fill kernel with data
clSetKernelArg(kernel,0,sizeof(cl_mem),&inBuffer);
clSetKernelArg(kernel,1,sizeof(cl_mem),&outImg);
clSetKernelArg(kernel,2,sizeof(cl_uint),&offsetX);
clSetKernelArg(kernel,3,sizeof(cl_uint),&imageWidth);
clSetKernelArg(kernel,4,sizeof(cl_uint),&maxVal);
//Run kernel
err = clEnqueueNDRangeKernel(m_queue,kernel,2,NULL,kernelRegion,kernelWorkgroup,0,NULL,NULL);
checkOpenCLErr(err,"RunKernel");
clFinish(m_queue);
//Check resulting data for validty
cl_float* computedData = new cl_float[resSize];;
size_t region[]={imageWidth,imageHeight,1};
const size_t offset[] = {0,0,0};
err = clEnqueueReadImage(m_queue,outImg,CL_TRUE,offset,region,0,0,computedData,0,NULL,NULL);
checkOpenCLErr(err, "readDataFromImage()");
clFinish(m_queue);
for( int i = 0; i < resSize; i++ ){
if( fabs(expectedData[i]-computedData[i])>0.1 ){
std::cout << "Expected: \n";
for( int j = 0; j < resSize; j++ ){
std::cout << expectedData[j] << " ";
}
std::cout << "\nComputed: \n";
std::cout << "\n";
for( int j = 0; j < resSize; j++ ){
std::cout << computedData[j] << " ";
}
std::cout << "\n";
throw std::string("Error, computed and expected data are not the same!\n");
}
}
}catch(std::string& e){
std::cout << "\nCaught an exception: " << e << "\n";
return 1;
}
std::cout << "Works fine\n";
return 0;
}
I also uploaded the source code for you to make it easier to test it:
http://www.file-upload.net/download-3524302/strangeOpenCLError.cpp.html
Please can you tell me if I've done wrong anything?
Is there any mistake in the code or is this a bug in my driver?
Best reagards,
Alex
Edit: changed the program (both: here and the linked one) a little bit to make it more likely to get a mismatch.

I found the bug and this is an annoying one:
When working under linux and just linking the OpenCL program with the most actual "OpenCV" library (yes, the computation lib), the binary parts of the kernels, which get compiled and cached in ~/.nv are damaged.
Can you please install the actual OpenCV library and execute following commands:
Generating bad kernel maybe leading sometimes to bad behaviour:
rm -R ~/.nv && g++ strangeOpenCLError.cpp -lOpenCL -lopencv_gpu -o strangeOpenCLError && ./strangeOpenCLError && ls -la ~/.nv/ComputeCache/*/*
Generating good kernel which performs as desired:
rm -R ~/.nv && g++ strangeOpenCLError.cpp -lOpenCL -o strangeOpenCLError && ./strangeOpenCLError && ls -la ~/.nv/ComputeCache/*/*
In my system when using -lopencv_gpu or -lopencv_core I get a kernel object in ~/.nv with a slightly other size due to sightly different binary parts. So these smaller kernels computed bad results in my systems.
The problem is that the bug does not always appear: Sometimes just when working on buffers, which are big enough. So the more relyable measurement is the different kernel-cache size. I edited the program in my question, now it is more likely that it will create the bad result.
Best regards,
Alex
PS: I also created a bug report at NVidia and it is in progress. They could reproduce the bug on their system.

To turn off Nvidia compiler cache, set env. variable CUDA_CACHE_DISABLE=1. That may helps to avoid the problem in future.

In line
m_context = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, NULL);
you should use &error as last parameter to get a meaningful error. Without it I got some silly error messages. (I needed to change the platform to get my GPU board.)
I can not reproduce the error with my nVidia GeForce 8600 GTS. I get a 'Works fine'. I tried it >20 times without any issue.
I also can not see any error beside that you code is a little confusing. You should remove all commented out code and introduce some blank lines for grouping the code a little bit.
Do you have the latest drivers? The behavior you describe sounds very familiar like an uninitialized buffer or variable, but I do not see anything like that.

Related

How to get stack trace for C/C++ program in CYGWIN environment?

How to get stack trace for C/C++ program in CYGWIN environment ?
** I was looking for a back trace mechanism, I've compiled some of the solutions found here and made it a small program for quick reference.
My Answers with a code snippet:
#if defined(__CYGWIN__)
#include <Windows.h>
#include <dbghelp.h>
#include <psdk_inc/_dbg_common.h>
#include <cxxabi.h>
#include <cstring>
class Error // Windows version
{
private:
void *stacktrace[MAX_STACKTRACE_SIZE];
size_t stacktrace_size;
public:
const char* message;
Error(const char* m)
: message(m)
, stacktrace_size(0)
{
// Capture the stack, when error is 'hit'
stacktrace_size = CaptureStackBackTrace(0, MAX_STACKTRACE_SIZE, stacktrace, nullptr);
}
void print_backtrace(ostream& out) const
{
SYMBOL_INFO * symbol;
HANDLE process;
size_t length;
process = GetCurrentProcess();
SymInitialize(process, nullptr, TRUE);
symbol = (SYMBOL_INFO *)calloc(sizeof(SYMBOL_INFO) + 256 * sizeof(char), 1);
symbol->MaxNameLen = 255;
symbol->SizeOfStruct = sizeof(SYMBOL_INFO);
length = strlen (symbol->Name);
std::string result;
char tempStr[255] = {0};
for (int i = 0; i < stacktrace_size; i++)
{
int status = 0;
// '_' is missing in symbol->Name , hence prefix it and concat with symbol->Name
char prefixed_symbol [256] = "_" ;
SymFromAddr(process, (DWORD64)(stacktrace[i]), 0, symbol);
auto backtrace_line = string(symbol->Name);
if (backtrace_line.size() == 0) continue;
// https://en.wikipedia.org/wiki/Name_mangling
// Prefix '_' with symbol name, so that __cxa_demangle does the job correctly
// $ c++filt -n _Z9test_ringI12SmallIntegerIhEEvRK4RingIT_E
strcat (prefixed_symbol, symbol->Name);
char * demangled_name = abi::__cxa_demangle(prefixed_symbol, nullptr, nullptr, &status);
if(status < 0)
{
sprintf(tempStr, "%i: %s - 0x%0X\n", stacktrace_size-i-1, symbol->Name, symbol->Address);
// out << symbol->Name << endl;
}
else
{
sprintf(tempStr, "%i: %s - 0x%0X\n", stacktrace_size - i - 1, demangled_name, symbol->Address);
// out << demangled_name << endl;
}
// Append the extracted info to the result
result += tempStr;
// Free the HEAP allocation made by __cxa_demangle
free((void*)demangled_name);
// Restore the prefix '_' string
prefixed_symbol [1] = '\0';
}
std::cout << result << std::endl;
free(symbol);
}
};
int main ()
{
try {
do_something ();
if (false == status) throw Error("SystemError");
}
catch (const Error &error)
{
cout << "NotImplementedError(\"" << error.message << "\")" << endl;
error.print_backtrace(cout);
return 1;
}
#endif
Command Line Option:
// Use -limagehlp to link the library
g++ -std=c++20 main.cpp -limagehlp

OpenCL "read_imageui " always returns zero 0

I have written a simple OpenCL program with an objective to make a copy of input image using OpenCL image2d struct. It seemed like a simple job to do but I have been stuck at it.
The kernel has "read_imageui" which always returns zero value. The input image is a all white jpeg image.
Image loading is done using OpenCV imread.
Here is the Kernel :
const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
__kernel void copy(__read_only image2d_t in, __write_only image2d_t out)
{
int idx = get_global_id(0);
int idy = get_global_id(1);
int2 pos = (int2)(idx,idy);
uint4 pix = read_imageui(in,smp,pos);
write_imageui(out,pos,pix);
}
Here is the host code :
int main(){
//get all platforms (drivers)
std::vector<cl::Platform> all_platforms;
cl::Platform::get(&all_platforms);
if(all_platforms.size()==0){
std::cout<<" No platforms found. Check OpenCL installation!\n";
exit(1);
}
cl::Platform default_platform=all_platforms[0];
std::cout << "Using platform: "<<default_platform.getInfo<CL_PLATFORM_NAME>()<<"\n";
std::cout <<" Platform Version: "<<default_platform.getInfo<CL_PLATFORM_VERSION>() <<"\n";
//cout << "Image 2D support : " << default_platform.getInfo<CL_DEVICE_IMAGE_SUPPORT>()<<"\n";
//get default device of the default platform
std::vector<cl::Device> all_devices;
default_platform.getDevices(CL_DEVICE_TYPE_ALL, &all_devices);
if(all_devices.size()==0){
std::cout<<" No devices found. Check OpenCL installation!\n";
exit(1);
}
cl::Device default_device=all_devices[0];
std::cout<< "Using device: "<<default_device.getInfo<CL_DEVICE_NAME>()<<"\n";
//creating a context
cl::Context context(default_device);
//cl::Program::Sources sources;
//sources.push_back(LoadKernel('kenel2.cl'));
//load kernel coad
cl::Program program(context,LoadKernel("image_test.cl"));
//build kernel code
if(program.build(all_devices)!=CL_SUCCESS){
std::cout<<" Error building: "<<program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(default_device)<<"\n";
exit(1);
}
/* IMAGE FORMTS */
// Determine and show image format support
vector<cl::ImageFormat > supportedFormats;
context.getSupportedImageFormats(CL_MEM_READ_ONLY,CL_MEM_OBJECT_IMAGE2D,&supportedFormats);
cout <<"No. of supported formats " <<supportedFormats.size()<<endl;
Mat white = imread("white_small.jpg");
cvtColor(white, white, CV_BGR2RGBA);
//white.convertTo(white,CV_8UC4);
Mat out = Mat(white);
out.setTo(Scalar(0));
char * inbuffer = reinterpret_cast<char *>(white.data);
char * outbuffer = reinterpret_cast<char *>(out.data);
//cout <<"Type of input : " <<white.type<<endl;
int sizeOfImage = white.cols * white.rows * white.channels();
int outImageSize = white.cols * white.rows * white.channels();
int w = white.cols;
int h = white.rows;
cout <<"Creating Images ... "<<endl;
cout <<"Dimensions ..." <<w << " x "<<h<<endl;
const cl::ImageFormat format(CL_RGBA, CL_UNSIGNED_INT8);
cl::Image2D imageSrc(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, format, white.cols, white.rows,0,inbuffer);
cl::Image2D imageDst(context, CL_MEM_WRITE_ONLY, format , white.cols, white.rows,0,NULL);
cout <<"Creating Kernel Program ... "<<endl;
cl::Kernel kernelCopy(program, "copy");
kernelCopy.setArg(0, imageSrc);
kernelCopy.setArg(1, imageDst);
cout <<"Creating Command Queue ... "<<endl;
cl::CommandQueue queue(context, default_device);
cout <<"Executing Kernel ... "<<endl;
int64 e = getTickCount();
for(int i = 0 ; i < 100 ; i ++)
{
queue.enqueueNDRangeKernel(kernelCopy, cl::NullRange, cl::NDRange(w, h), cl::NullRange);
queue.finish();
}
cout <<((getTickCount() - e) / getTickFrequency())/100 <<endl;;
cl::size_t<3> origin;
cl::size_t<3> size;
origin[0] = 0;
origin[1] = 0;
origin[2] = 0;
size[0] = w;
size[1] = h;
size[2] = 1;
cout <<"Transfering Images ... "<<endl;
//unsigned char *tmp = new unsigned char (w * h * 4);
//CL_TRUE means that it waits for the entire image to be copied before continuing
queue.enqueueReadImage(imageDst, CL_TRUE, origin, size, 0, 0, outbuffer);
queue.finish();
imwrite("result.jpg",out);
/* OLD CODE ==================================================*/
return 0;
}
However if I change the kernel as
uint4 pix2 = (uint4)(255,255,255,1);
write_imageui(out,pos,pix2);
It outputs a white image. Which means there is something wrong with how I am using the read_image
it came out to be something related to "reference counting" on Mat copy constructor.
if instead of using
Mat white = imread("white_small.jpg");
cvtColor(white, white, CV_BGR2RGBA);
//white.convertTo(white,CV_8UC4);
Mat out = Mat(white);
Initialize the output matrix "out" as
Mat out = Mat(white.size,CV_8UC4)
then it works fine.
I couldn't comprehend completely what exactly caused it but I know that it is due to "reference counting" of Mat copy constructor when used as first syntax.
When write:
Mat out = Mat(white);
It is like a shallow copy of white to out. Bot white.data and out.data pointers will be pointing to same memory and reference count will be incremented. So, when you call out.setTo, white Mat will also see same change. Declaring out as below might be good idea:
Mat out = Mat(white.size,CV_8UC(white.channels()));

OpenCV 3.0 printing Mat

I am a newbie to OpenCV, so pls bear with me.. I am trying to dump the histogram Mat object for the given image.. It fails with the below error - Any help appreciated...
The first cout in the below program i.e of the loaded image prints successfully - While the second cout of the hist of the image fails with the below error
OpenCV Error: Assertion failed (m.dims <= 2) in FormattedImpl, file /mycode/ws/opencv/opencv-3.0.0-beta/modules/core/src/out.cpp, line 86
libc++abi.dylib: terminating with uncaught exception of type cv::Exception: /mycode/ws/opencv/opencv-3.0.0-beta/modules/core/src/out.cpp:86: error: (-215) m.dims <= 2 in function FormattedImpl
Here is the complete code
#include <stdio.h>
#include <string>
#include <opencv2/opencv.hpp>
using namespace std;
using namespace cv;
int main(int argc, char** argv) {
if (argc != 2) {
printf("usage: opencv.out <Image_Path>\n");
return -1;
}
string imagePath = (argv[1]);
cout << "loading image..." << imagePath << endl;
Mat image = imread(imagePath, 1);
Mat hist;
int imgCount = 1;
int dims = 3;
const int histSizes[] = {4, 4, 4};
const int channels[] = {0, 1, 2};
float rRange[] = {0, 256};
float gRange[] = {0, 256};
float bRange[] = {0, 256};
const float *ranges[] = {rRange, gRange, bRange};
Mat mask = Mat();
calcHist(&image, imgCount, channels, mask, hist, dims, histSizes, ranges);
cout << image << "Loaded image..." << endl;
cout << "Hist of image..." << hist;
return 0;
}
Based on the OpenCV 2.4.9 source code:
static inline std::ostream& operator << (std::ostream& out, const Mat& mtx)
{
Formatter::get()->write(out, mtx);
return out;
}
Is the function you are calling when using << operator. Formatter::get() returns appropriate
formatter class based on the programming language you are using.
write() function basicly calls:
static void writeMat(std::ostream& out, const Mat& m, char rowsep, char elembrace, bool singleLine)
{
CV_Assert(m.dims <= 2);
int type = m.type();
char crowbrace = getCloseBrace(rowsep);
char orowbrace = crowbrace ? rowsep : '\0';
if( orowbrace || isspace(rowsep) )
rowsep = '\0';
for( int i = 0; i < m.rows; i++ )
{
if(orowbrace)
out << orowbrace;
if( m.data )
writeElems(out, m.ptr(i), m.cols, type, elembrace);
if(orowbrace)
out << crowbrace << (i+1 < m.rows ? ", " : "");
if(i+1 < m.rows)
{
if(rowsep)
out << rowsep << (singleLine ? " " : "");
if(!singleLine)
out << "\n ";
}
}
}
As you can see if your Mat dimensionality is greater than 2 assertion will be thrown like in your code (CV_Assert(m.dims<=2)).
calcHist() with the parameters you gave produces 3-dimentional Mat and thus it cannot be displayed using << operator
By calling calcHist() function that way you are getting 3-dimentional histogram and I don't see a simple solution to visualize that in OpenCV (which doesn't mean it can't be done). If it's something you must do I would suggest to look into OpenGL for 3D data visualization. If not you could simply call this function for each channel seperatly - you will get 3 one-dimenational histograms which you can print using << operator.

Openni opencv kinect Bad Memory allocation

Basically I've got a loop which goes through all the kinects depth pixels. If they are greater than 3000mm it sets the pixel value to black.
For some reason this works only at a close range while pointed to a wall. If I pull the kinect back (giving it a larger area to scan) I get a Bad Memory allocation error. My code can be found below. I get the bad memory allocation error inside that try catch statement. Most of the code is from the opencv kinect sample here and here.
i figured out the problem, its because the depth values are stored in an array instead of matrix, i need a better way of finding out which location in the array, the x.y of the pixels which start from 1,1 point to instead of the (i = x+y*640)
#include <opencv.hpp>
#include <iostream>
#include <string>
#include <stdio.h>
#include <OpenNI.h>
using namespace std;
using namespace cv;
int main()
{
openni::Device device;
openni::VideoStream depth;
const char* device_uri = openni::ANY_DEVICE;
openni::Status ret = openni::OpenNI::initialize();
// Open
ret =device.open( device_uri );
ret = depth.create( device, openni::SENSOR_DEPTH );
if ( ret == openni::STATUS_OK )
{
// Start Depth
depth.start();
}
// Get Depth Stream Min-Max Value
int minDepthValue = depth.getMinPixelValue();
int maxDepthValue = depth.getMaxPixelValue();
//cout << "Depth min-Max Value : " << minDepthValue << "-" << maxDepthValue << endl;
// Frame Information Reference
openni::VideoFrameRef depthFrame;
// Get Sensor Resolution Information
int dImgWidth = depth.getVideoMode().getResolutionX();
int dImgHeight = depth.getVideoMode().getResolutionY();
// Depth Image Matrix
cv::Mat dImg = cv::Mat( dImgHeight, dImgWidth, CV_8UC3 );
Mat grey= cvCreateImage(cvSize(640, 480), 8, 1); ;
for(;;)
{
depth.readFrame( &depthFrame );
openni::DepthPixel* depthImgRaw = (openni::DepthPixel*)depthFrame.getData();
for ( int i = 0 ; i < ( depthFrame.getDataSize() / sizeof( openni::DepthPixel ) ) ; i++ )
{
int idx = i * 3; // Grayscale
unsigned char* data = &dImg.data[idx];
int gray_scale = ( ( depthImgRaw[i] * 255 ) / ( maxDepthValue - minDepthValue ) );
data[0] = (unsigned char)~gray_scale;
data[1] = (unsigned char)~gray_scale;
data[2] = (unsigned char)~gray_scale;
}
openni::DepthPixel* depthpixels = (openni::DepthPixel*)depthFrame.getData();
cvtColor(dImg, grey, CV_RGB2GRAY);
int i ;
try{
for( int y =0; y < 480 ; y++){
//getting in to each pixel in a row
for(int x = 0; x < 640; x++){
//getting out the corresponding pixel value from the array
i = x+y*640;
if (depthpixels[i] >3000)
{
grey.at<unsigned char>(x,y) = 0;
}
}
}
}catch(exception e)
{cout << e.what() <<endl ;
cout <<depthpixels[i] <<endl ;
cout << i <<endl ;
}
// cv:imshow( "depth", dImg );
imshow("dpeth2", grey);
int k = cvWaitKey( 30 ); // About 30fps
if ( k == 0x1b )
break;
}
// Destroy Streams
depth.destroy();
// Close Device
device.close();
// Shutdown OpenNI
openni::OpenNI::shutdown();
return 0;
}
solved the problem simply by swapping my x and y around
for( y =0; y < 480 ; y++)
{
//getting in to each pixel in a row
for( x = 0; x < 640; x++)
{
if (depthpixels[i]>1500)
{
grey.at<unsigned char >(y,x) = 0;
}
if (depthpixels[i] <500)
{
grey.at<unsigned char >(y,x) = 0;
}
i++;
}
}

Failed Assertion Using HOGDescriptor

Ok, so I've decided that using a histogram of oriented gradients is a better method for image fingerprinting vs. creating a histogram of sobel derivatives. I think I finally have it mostly figured out but when I test my code I get the following:
OpenCV Error: Assertion failed ((winSize.width - blockSize.width) % blockStride.width == 0 && (winSize.height - blockSize.height) % blockStride.height == 0).
As of now I'm just trying to figure out how to compute the HOG correctly and see the results; but not visually, I just want some very basic output to see if the HOG was created. Then I'll figure out how to use it in image comparison.
Here is my sample code:
using namespace cv;
using namespace std;
int main(int argc, const char * argv[])
{
// Initialize string variables.
string thePath, img, hogSaveFile;
thePath = "/Users/Mikie/Documents/Xcode/images/";
img = thePath + "HDimage.jpg";
hogSaveFile = thePath + "HDimage.yml";
// Create mats.
Mat src;
// Load image as grayscale.
src = imread(img, CV_LOAD_IMAGE_GRAYSCALE);
// Verify source loaded.
if(src.empty()){
cout << "No image data. \n ";
return -1;
}else{
cout << "Image loaded. \n" << "Size: " << src.cols << " X " << src.rows << "." << "\n";
}
// Initialize float variables.
float imgWidth, imgHeight, newWidth, newHeight;
imgWidth = src.cols;
imgHeight = src.rows;
newWidth = 320;
newHeight = (imgHeight/imgWidth)*newWidth;
Mat dst = Mat::zeros(newHeight, newWidth, CV_8UC3);
resize(src, dst, Size(newWidth, newHeight), CV_INTER_LINEAR);
// Was resize successful?
if (dst.rows < src.rows && dst.cols < src.cols) {
cout << "Resize successful. \n" << "New size: " << dst.cols << " X " << dst.rows << "." << "\n";
} else {
cout << "Resize failed. \n";
return -1;
}
vector<float>theHOG(Mat dst);{
if (dst.empty()) {
cout << "Image lost. \n";
} else {
cout << "Setting up HOG. \n";
}
imshow("Image", dst);
bool gammaC = true;
int nlevels = HOGDescriptor::DEFAULT_NLEVELS;
Size winS(newWidth, newHeight);
// int block_size = 16;
// int block_stride= 8;
// int cell_size = 8;
int gbins = 9;
vector<float> descriptorsValues;
vector<Point> locations;
HOGDescriptor hog(Size(320, 412), Size(16, 16), Size(8, 8), Size(8, 8), gbins, -1, HOGDescriptor::L2Hys, 0.2, gammaC, nlevels);
hog.compute(dst, descriptorsValues, Size(0,0), Size(0,0), locations);
printf("descriptorsValues.size() = %ld \n", descriptorsValues.size()); //prints 960
for (int i = 0; i <descriptorsValues.size(); i++) {
cout << descriptorsValues[i] << endl;
}
}
cvWaitKey(0);
return 0;
}
As you can see, I messed around with different variables to define the sizes but to no avail so, I commented them out and tried manually setting them. Still nothing. What am I doing wrong? Any help will be greatly appreciated.
Thank you!
You are initializing the HOGDescriptor incorrectly.
The assertion states that each of the first three input parameters must satisfy the constraint:
(winSize - blockSize) % blockStride == 0
in both height and width dimensions.
The problem is that winSize.height does not satisfy this constraint, considering the other parameters you initialize hog with:
(412 - 16) % 8 = 4 //Problem!!
Probably the simplest fix is to increase your window dimensions from cv::Size(320,412) to something divisible by 8, perhaps cv::Size(320,416), but the specific size will depend on your specific requirements. Just pay attention to what the assertion is saying!

Resources