Related
I am working on a project which will help us to correct the degree of orientation of image.
Here in this code i am detecting a sheet of paper.
Steps that i used
1.Apply houghLine transform
2.Detect corner.
3.Applied perspective transform.
And with all this I am able to detect sheet of paper but it only works for only one or two images it does not work on all the images and I am not understanding why,
The problem that I think in this code is that it is not able to detect the corners correctly , because of which I am not able to correct the perspective of a image .
it works on this image
but when i used some other image instead of this then i am not able to do so
#include <cv.h>
#include <highgui.h>
using namespace std;
using namespace cv;
Point2f center(0,0);
Point2f computeIntersect(Vec4i a, Vec4i b)
{
int x1 = a[0], y1 = a[1], x2 = a[2], y2 = a[3], x3 = b[0], y3 = b[1], x4 = b[2], y4 = b[3];
float denom;
if (float d = ((float)(x1 - x2) * (y3 - y4)) - ((y1 - y2) * (x3 - x4)))
{
Point2f pt;
pt.x = ((x1 * y2 - y1 * x2) * (x3 - x4) - (x1 - x2) * (x3 * y4 - y3 * x4)) / d;
pt.y = ((x1 * y2 - y1 * x2) * (y3 - y4) - (y1 - y2) * (x3 * y4 - y3 * x4)) / d;
return pt;
}
else
return Point2f(-1, -1);
}
void sortCorners(vector<Point2f>& corners, Point2f center)
{
vector<Point2f> top, bot;
for (int i = 0; i < corners.size(); i++)
{
if (corners[i].y < center.y)
top.push_back(corners[i]);
else
bot.push_back(corners[i]);
}
corners.clear();
if (top.size() == 2 && bot.size() == 2){
Point2f tl = top[0].x > top[1].x ? top[1] : top[0];
Point2f tr = top[0].x > top[1].x ? top[0] : top[1];
Point2f bl = bot[0].x > bot[1].x ? bot[1] : bot[0];
Point2f br = bot[0].x > bot[1].x ? bot[0] : bot[1];
corners.push_back(tl);
corners.push_back(tr);
corners.push_back(br);
corners.push_back(bl);
}
}
int main()
{
Mat src,cann,hsv;
src = imread("C:\\im.jpg",WINDOW_AUTOSIZE);
if (src.empty())
return -1;
imshow("original",src);
blur(src, src, Size(3, 3));
Canny(src, cann, 50, 200, 3);
cvtColor(cann, hsv, CV_GRAY2BGR);
vector<Vec4i> lines;
HoughLinesP(cann, lines, 1, CV_PI/180, 70, 30, 10);
for( size_t i = 0; i < lines.size(); i++ )
{
Vec4i l = lines[i];
line( hsv, Point(l[0], l[1]), Point(l[2], l[3]), Scalar(0,0,255), 2, CV_AA);
}
// Expand the lines
for (int i = 0; i < lines.size(); i++)
{
Vec4i v = lines[i];
lines[i][0] = 0;
lines[i][1] = ((float)v[1] - v[3]) / (v[0] - v[2]) * -v[0] + v[1];
lines[i][2] = src.cols;
lines[i][3] = ((float)v[1] - v[3]) / (v[0] - v[2]) * (src.cols - v[2]) + v[3];
}
vector<Point2f> corners;
for (int i = 0; i < lines.size(); i++)
{
for (int j = i+1; j < lines.size(); j++)
{
Point2f pt = computeIntersect(lines[i], lines[j]);
if (pt.x >= 0 && pt.y >= 0)
corners.push_back(pt);
}
}
vector<Point2f> approx;
approxPolyDP(Mat(corners), approx, arcLength(Mat(corners), true) * 0.02, true);
//if (approx.size() != 4)
// {
// cout << "The object is not quadrilateral!" << endl;
//return -1;
//}
// Get mass center
for (int i = 0; i < corners.size(); i++)
center += corners[i];
center *= (1. / corners.size());
sortCorners(corners, center);
if (corners.size() == 0)
{
cout << "The corners were not sorted correctly!" << endl;
return -1;
}
Mat dst = src.clone();
// Draw lines
for (int i = 0; i < lines.size(); i++)
{
Vec4i v = lines[i];
line(dst, Point(v[0], v[1]), Point(v[2], v[3]), CV_RGB(0,255,0));
}
// Draw corner points
circle(dst, corners[0], 3, CV_RGB(255,0,0), 2);
circle(dst, corners[1], 3, CV_RGB(0,255,0), 2);
circle(dst, corners[2], 3, CV_RGB(0,0,255), 2);
circle(dst, corners[3], 3, CV_RGB(255,255,255), 2);
// Draw mass center
circle(dst, center, 3, CV_RGB(255,255,0), 2);
Mat quad = Mat::zeros(300, 220, CV_8UC3);
vector<Point2f> quad_pts;
quad_pts.push_back(Point2f(0, 0));
quad_pts.push_back(Point2f(quad.cols, 0));
quad_pts.push_back(Point2f(quad.cols, quad.rows));
quad_pts.push_back(Point2f(0, quad.rows));
Mat transmtx = getPerspectiveTransform(corners, quad_pts);
warpPerspective(src, quad, transmtx, quad.size());
imshow("blurr",src);
imshow("canney",cann);
imshow("hough",hsv);
imshow("image", dst);
imshow("quadrilateral", quad);
waitKey(0);
return 0;
}
please please help me this i am really get stuck with this .
Your algorithm assumes that HoughLinesP function will always detect only 4 lines and that each one will lie on a different edge of the paper. However, this assumption is wrong. In your particular case, when you work with the second image, it returns 5 lines when you work on the second image. Click to see the detected lines (marked by non-gray colours).
Quick fix
I changed the value of 6th HoughLinesP argument (minLineThreshold parameter) to 70. After that, only four lines were detected in the image, but another bug surfaced; 5 corners were detected instead of 4. The reason? Two of the opposite edges were not parallel and they intersected far outside the image area. This condition was causing the problem:
if (pt.x >= 0 && pt.y >= 0)
corners.push_back(pt);
It is not enough to check whether corners coordinates are non-negative. Instead, you have to make sure that the corners are within certain boundaries that make sense; in your case these could be boundaries of the image.
if (pt.x >= 0 && pt.y >= 0 && pt.x <src.cols && pt.y < src.rows)
corners.push_back(pt);
After changing threshold and fixing the condition, I obtained this result: (Click to see an image)
Warning
As you can see, yet another problem surfaced - the corners are not detected as accurately as they could be. You can use information provided by canny edges to your advantage here. But I do not want to venture out of the scope of your question here, so I'll stop.
I named my solution as a "quick fix" because it only solves one particular case. If you want more general solution and if you want to keep using your algorithm, you will have to compute a reasonable threshold estimate every time before you use HoughLineP.
I have a problem in my iOS application where i attempt to obtain a view matrix using solvePnP and render a 3d cube using modern OpenGL. While my code attempts to render a 3d cube directly on top of the detected marker, it seems to render with a certain offset from the marker (see video for example)
https://www.youtube.com/watch?v=HhP5Qr3YyGI&feature=youtu.be
(on the bottom right of the image you can see an opencv render of the homography around the tracker marker. the rest of the screen is an opengl render of the camera input frame and a 3d cube at location (0,0,0).
the cube rotates and translates correctly whenever i move the marker, though it is very telling that there is some difference in the scale of translations (IE, if i move my marker 5cm in the real world, it hardly moves by 1cm on screen)
these are what i believe to be the relevant parts of the code where the error could come from :
Extracting view matrix from homography :
AVCaptureDevice *deviceInput = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
AVCaptureDeviceFormat *format = deviceInput.activeFormat;
CMFormatDescriptionRef fDesc = format.formatDescription;
CGSize dim = CMVideoFormatDescriptionGetPresentationDimensions(fDesc, true, true);
const float cx = float(dim.width) / 2.0;
const float cy = float(dim.height) / 2.0;
const float HFOV = format.videoFieldOfView;
const float VFOV = ((HFOV)/cx)*cy;
const float fx = abs(float(dim.width) / (2 * tan(HFOV / 180 * float(M_PI) / 2)));
const float fy = abs(float(dim.height) / (2 * tan(VFOV / 180 * float(M_PI) / 2)));
Mat camIntrinsic = Mat::zeros(3, 3, CV_64F);
camIntrinsic.at<double>(0, 0) = fx;
camIntrinsic.at<double>(0, 2) = cx;
camIntrinsic.at<double>(1, 1) = fy;
camIntrinsic.at<double>(1, 2) = cy;
camIntrinsic.at<double>(2, 2) = 1.0;
std::vector<cv::Point3f> object3dPoints;
object3dPoints.push_back(cv::Point3f(-0.5f,-0.5f,0));
object3dPoints.push_back(cv::Point3f(+0.5f,-0.5f,0));
object3dPoints.push_back(cv::Point3f(+0.5f,+0.5f,0));
object3dPoints.push_back(cv::Point3f(-0.5f,+0.5f,0));
cv::Mat raux,taux;
cv::Mat Rvec, Tvec;
cv::solvePnP(object3dPoints, mNewImageBounds, camIntrinsic, Mat(),raux,taux); //mNewImageBounds are the 4 corner of the homography detected by perspectiveTransform (the green outline seen in the image)
raux.convertTo(Rvec,CV_32F);
taux.convertTo(Tvec ,CV_64F);
Mat Rot(3,3,CV_32FC1);
Rodrigues(Rvec, Rot);
// [R | t] matrix
Mat_<double> para = Mat_<double>::eye(4,4);
Rot.convertTo(para(cv::Rect(0,0,3,3)),CV_64F);
Tvec.copyTo(para(cv::Rect(3,0,1,3)));
Mat cvToGl = Mat::zeros(4, 4, CV_64F);
cvToGl.at<double>(0, 0) = 1.0f;
cvToGl.at<double>(1, 1) = -1.0f; // Invert the y axis
cvToGl.at<double>(2, 2) = -1.0f; // invert the z axis
cvToGl.at<double>(3, 3) = 1.0f;
para = cvToGl * para;
Mat_<double> modelview_matrix;
Mat(para.t()).copyTo(modelview_matrix); // transpose to col-major for OpenGL
glm::mat4 openGLViewMatrix;
for(int col = 0; col < modelview_matrix.cols; col++)
{
for(int row = 0; row < modelview_matrix.rows; row++)
{
openGLViewMatrix[col][row] = modelview_matrix.at<double>(col,row);
}
}
i made sure the camera intrinsic matrix contains correct values, the portion which converts the opencv Mat to an opengl view matrix i believe to be correct as the cube translates and rotates in the right directions.
once the view matrix is calculated, i use it to draw the cube as follows :
_projectionMatrix = glm::perspective<float>(radians(60.0f), fabs(view.bounds.size.width / view.bounds.size.height), 0.1f, 100.0f);
_cube_ModelMatrix = glm::translate(glm::vec3(0,0,0));
const mat4 MVP = _projectionMatrix * openGLViewMatrix * _cube_ModelMatrix;
glUniformMatrix4fv(glGetUniformLocation(_cube_program, "ModelMatrix"), 1, GL_FALSE, value_ptr(MVP));
glDrawElements(GL_TRIANGLES, 36, GL_UNSIGNED_INT, BUFFER_OFFSET(0));
Is anyone able to spot my error?
You should create perspective matrix as explained here: http://ksimek.github.io/2013/06/03/calibrated_cameras_in_opengl
Here is quick code:
const float fx = intrinsicParams(0, 0); // Focal length in x axis
const float fy = intrinsicParams(1, 1); // Focal length in y axis
const float cx = intrinsicParams(0, 2); // Primary point x
const float cy = intrinsicParams(1, 2); // Primary point y
projectionMatrix(0, 0) = 2.0f * fx;
projectionMatrix(0, 1) = 0.0f;
projectionMatrix(0, 2) = 0.0f;
projectionMatrix(0, 3) = 0.0f;
projectionMatrix(1, 0) = 0.0f;
projectionMatrix(1, 1) = 2.0f * fy;
projectionMatrix(1, 2) = 0.0f;
projectionMatrix(1, 3) = 0.0f;
projectionMatrix(2, 0) = 2.0f * cx - 1.0f;
projectionMatrix(2, 1) = 2.0f * cy - 1.0f;
projectionMatrix(2, 2) = -(far + near) / (far - near);
projectionMatrix(2, 3) = -1.0f;
projectionMatrix(3, 0) = 0.0f;
projectionMatrix(3, 1) = 0.0f;
projectionMatrix(3, 2) = -2.0f * far * near / (far - near);
projectionMatrix(3, 3) = 0.0f;
For more information about intrinsic matrix: http://ksimek.github.io/2013/08/13/intrinsic
I'm trying to draw a triangle like this one in a view (one UIView, one NSView):
My first thought was CoreGraphics, but I couldn't find any information that would help me draw a gradient between three points of arbitrary color.
Any help?
Thanks!
Actually it's pretty simple with CoreGraphics. Below you can find code that renders given triangle, but first let's think how we can solve this problem.
Theory
Imagine equilateral triangle with side length w. All three angles are equal to 60 degrees:
Each angle will represent component of a pixel: red, green or blue.
Lets analyze intensity of a green component in a pixel near top angle:
The more closer pixel to the angle, the more component intense it'll have and vice versa. Here we can decompose our main goal to smaller ones:
Draw triangle pixel by pixel.
For each pixel calculate value for each component based on distance from corresponding angle.
To solve first task we will use CoreGraphics bitmap context. It will have four components per pixel each 8 bits long. This means that component value may vary from 0 to 255. Fourth component is alpha channel and will be always equal to max value - 255. Here is example of how values will be interpolated for the top angle:
Now we need to think how we can calculate value for component.
First, let's define main color for each angle:
Now let's choose an arbitrary point A with coordinates (x,y) on the triangle:
Next, we draw a line from an angle associated with red component and it passes through the A till it intersects with opposite side of a triangle:
If we could find d and c their quotient will equal to normalized value of component, so value can be calculated easily:
(source: sciweavers.org)
Formula for finding distance between two points is simple:
(source: sciweavers.org)
We can easily find distance for d, but not for c, because we don't have coordinates of intersection. Actually it's not that hard. We just need to build line equations for line that passes through A and line that describes opposite side of a triangle and find their intersection:
Having intersection point we can apply distance formula to find c and finally calculate component value for current point.
Same flow applies for another components.
Code
Here is the code that implements concepts above:
+ (UIImage *)triangleWithSideLength:(CGFloat)sideLength {
return [self triangleWithSideLength:sideLength scale:[UIScreen mainScreen].scale];
}
+ (UIImage *)triangleWithSideLength:(CGFloat)sideLength
scale:(CGFloat)scale {
UIImage *image = nil;
CGSize size = CGSizeApplyAffineTransform((CGSize){sideLength, sideLength * sin(M_PI / 3)}, CGAffineTransformMakeScale(scale, scale));
size_t const numberOfComponents = 4;
size_t width = ceilf(size.width);
size_t height = ceilf(size.height);
size_t realBytesPerRow = width * numberOfComponents;
size_t alignedBytesPerRow = (realBytesPerRow + 0xFF) & ~0xFF;
size_t alignedPixelsPerRow = alignedBytesPerRow / numberOfComponents;
CGContextRef ctx = CGBitmapContextCreate(NULL,
width,
height,
8,
alignedBytesPerRow,
CGColorSpaceCreateDeviceRGB(),
(CGBitmapInfo)kCGImageAlphaPremultipliedLast);
char *data = CGBitmapContextGetData(ctx);
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
int edge = ceilf((height - i) / sqrt(3));
if (j < edge || j > width - edge) {
continue;
}
CGFloat redNormalized = 0;
CGFloat greenNormalized = 0;
CGFloat blueNormalized = 0;
CGPoint currentTrianglePoint = (CGPoint){j / scale, (height - i) / scale};
[self calculateCurrentValuesAtGiventPoint:currentTrianglePoint
sideLength:sideLength
sideOne:&redNormalized
sideTwo:&greenNormalized
sideThree:&blueNormalized];
int32_t red = redNormalized * 0xFF;
int32_t green = greenNormalized * 0xFF;
int32_t blue = blueNormalized * 0xFF;
char *pixel = data + (j + i * alignedPixelsPerRow) * numberOfComponents;
*pixel = red;
*(pixel + 1) = green;
*(pixel + 2) = blue;
*(pixel + 3) = 0xFF;
}
}
CGImageRef cgImage = CGBitmapContextCreateImage(ctx);
image = [[UIImage alloc] initWithCGImage:cgImage];
CGContextRelease(ctx);
CGImageRelease(cgImage);
return image;
}
+ (void)calculateCurrentValuesAtGiventPoint:(CGPoint)point
sideLength:(CGFloat)length
sideOne:(out CGFloat *)sideOne
sideTwo:(out CGFloat *)sideTwo
sideThree:(out CGFloat *)sideThree {
CGFloat height = sin(M_PI / 3) * length;
if (sideOne != NULL) {
// Side one is at 0, 0
CGFloat currentDistance = sqrt(point.x * point.x + point.y * point.y);
if (currentDistance != 0) {
CGFloat a = point.y / point.x;
CGFloat b = 0;
CGFloat c = -height / (length / 2);
CGFloat d = 2 * height;
CGPoint intersection = (CGPoint){(d - b) / (a - c), (a * d - c * b) / (a - c)};
CGFloat currentH = sqrt(intersection.x * intersection.x + intersection.y * intersection.y);
*sideOne = 1 - currentDistance / currentH;
} else {
*sideOne = 1;
}
}
if (sideTwo != NULL) {
// Side two is at w, 0
CGFloat currentDistance = sqrt(pow((point.x - length), 2) + point.y * point.y);
if (currentDistance != 0) {
CGFloat a = point.y / (point.x - length);
CGFloat b = height / (length / 2);
CGFloat c = a * -point.x + point.y;
CGFloat d = b * -length / 2 + height;
CGPoint intersection = (CGPoint){(d - c) / (a - b), (a * d - b * c) / (a - b)};
CGFloat currentH = sqrt(pow(length - intersection.x, 2) + intersection.y * intersection.y);
*sideTwo = 1 - currentDistance / currentH;
} else {
*sideTwo = 1;
}
}
if (sideThree != NULL) {
// Side three is at w / 2, w * sin60 degrees
CGFloat currentDistance = sqrt(pow((point.x - length / 2), 2) + pow(point.y - height, 2));
if (currentDistance != 0) {
float dy = point.y - height;
float dx = (point.x - length / 2);
if (fabs(dx) > FLT_EPSILON) {
CGFloat a = dy / dx;
CGFloat b = 0;
CGFloat c = a * -point.x + point.y;
CGFloat d = 0;
CGPoint intersection = (CGPoint){(d - c) / (a - b), (a * d - b * c) / (a - b)};
CGFloat currentH = sqrt(pow(length / 2 - intersection.x, 2) + pow(height - intersection.y, 2));
*sideThree = 1 - currentDistance / currentH;
} else {
*sideThree = 1 - currentDistance / height;
}
} else {
*sideThree = 1;
}
}
}
Here is a triangle image produced by this code:
How to draw Optical flow images from ocl::PyrLKOpticalFlow::dense() Which actually calculates both horizontal and vertical component of the Optical flow? So I don't know how to draw them. I'm new to opencv . Can anyone help me?
Syntax :
ocl::PyrLKOpticalFlow::dense(oclMat &prevImg, oclMat& nextImg, oclMat& u, oclMat &v,oclMat &err)
A well establische method used in the optical flow community is to display a motion vector field as a color coded image as you can see at one of the various data sets. E.g MPI dataset or the Middlebury dataset.
Therefor you estimate the length and the angle of your motion vector. And use a HSV to RGB colorspace transformation (see OpenCV cvtColor function) to create your color coded image. Use the angle of your motion vector as H (Hue) - channel and the normalized length as the S (Saturation) - channel and set V (Value) to 1. The the color of your image will show you the direction of your motion and the saturation the length ( speed ).
The code will should like this ( Note if use_value == true, the Saturation will be set to 1 and the Value channel is related to the motion vector length):
void FlowToRGB(const cv::Mat & inpFlow,
cv::Mat & rgbFlow,
const float & max_size ,
bool use_value)
{
if(inpFlow.empty()) return;
if( inpFlow.depth() != CV_32F)
throw(std::exception("FlowToRGB: error inpFlow wrong data type ( has be CV_32FC2"));
const float grad2deg = (float)(90/3.141);
cv::Mat pol(inpFlow.size(), CV_32FC2);
float mean_val = 0, min_val = 1000, max_val = 0;
float _dx, _dy;
for(int r = 0; r < inpFlow.rows; r++)
{
for(int c = 0; c < inpFlow.cols; c++)
{
cv::Point2f polar = cvmath::toPolar(inpFlow.at<cv::Point2f>(r,c));
polar.y *= grad2deg;
mean_val +=polar.x;
max_val = MAX(max_val, polar.x);
min_val = MIN(min_val, polar.x);
pol.at<cv::Point2f>(r,c) = cv::Point2f(polar.y,polar.x);
}
}
mean_val /= inpFlow.size().area();
float scale = max_val - min_val;
float shift = -min_val;//-mean_val + scale;
scale = 255.f/scale;
if( max_size > 0)
{
scale = 255.f/max_size;
shift = 0;
}
//calculate the angle, motion value
cv::Mat hsv(inpFlow.size(), CV_8UC3);
uchar * ptrHSV = hsv.ptr<uchar>();
int idx_val = (use_value) ? 2:1;
int idx_sat = (use_value) ? 1:2;
for(int r = 0; r < inpFlow.rows; r++, ptrHSV += hsv.step1())
{
uchar * _ptrHSV = ptrHSV;
for(int c = 0; c < inpFlow.cols; c++, _ptrHSV+=3)
{
cv::Point2f vpol = pol.at<cv::Point2f>(r,c);
_ptrHSV[0] = cv::saturate_cast<uchar>(vpol.x);
_ptrHSV[idx_val] = cv::saturate_cast<uchar>( (vpol.y + shift) * scale);
_ptrHSV[idx_sat] = 255;
}
}
cv::Mat rgbFlow32F;
cv::cvtColor(hsv, rgbFlow32F, CV_HSV2BGR);
rgbFlow32F.convertTo(rgbFlow, CV_8UC3);}
}
Python
Please refer to opt_flow.py#draw_flow
def draw_flow(img, flow, step=16):
h, w = img.shape[:2]
y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
fx, fy = flow[y,x].T
lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
lines = np.int32(lines + 0.5)
vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
cv2.polylines(vis, lines, 0, (0, 255, 0))
for (x1, y1), (x2, y2) in lines:
cv2.circle(vis, (x1, y1), 1, (0, 255, 0), -1)
return vis
C++
Please can refer to tvl1_optical_flow.cpp#drawOpticalFlow
static void drawOpticalFlow(const Mat_<Point2f>& flow, Mat& dst, float maxmotion = -1)
{
dst.create(flow.size(), CV_8UC3);
dst.setTo(Scalar::all(0));
// determine motion range:
float maxrad = maxmotion;
if (maxmotion <= 0)
{
maxrad = 1;
for (int y = 0; y < flow.rows; ++y)
{
for (int x = 0; x < flow.cols; ++x)
{
Point2f u = flow(y, x);
if (!isFlowCorrect(u))
continue;
maxrad = max(maxrad, sqrt(u.x * u.x + u.y * u.y));
}
}
}
for (int y = 0; y < flow.rows; ++y)
{
for (int x = 0; x < flow.cols; ++x)
{
Point2f u = flow(y, x);
if (isFlowCorrect(u))
dst.at<Vec3b>(y, x) = computeColor(u.x / maxrad, u.y / maxrad);
}
}
}
I did something like this in my code, a while ago:
calcOpticalFlowPyrLK(frame_prec,frame_cur,v_corners_prec[i],corners_cur,status, err);
for(int j=0; j<corners_cur.size(); j++){
if(status[j]){
line(frame_cur,v_corners_prec[i][j],corners_cur[j],colors[i]);
}
}
Basically I draw a line between the points tracked by the OF in this iteration and the previous ones, this draws the optical flow lines which represent the flow on the image.
Hope this helps..
I am attempting to determine the image gradient direction using the results from openCV's Sobel method.
I understand this should be a very simple task. I have copied the methods from a number of resources and answers from here but whatever I do the resultant directions are always between 0 - 57 degrees (I would expect the range to be from 0-360).
I believe all the depths are correct. I have tried calculating the direction using the 16S data as well as 8U data.
I just can't see where I'm going wrong? Can anyone spot my mistake?
void getGradients(IplImage* original, cv::Mat* gradArray)
{
cv::Mat original_Mat(original, true);
// Convert it to gray
cv::cvtColor( original_Mat, original_Mat, CV_RGB2GRAY );
//cv::blur(original_Mat, original_Mat, cv::Size(7,7));
/// Generate grad_x and grad_y
cv::Mat grad_x = cv::Mat::zeros(original->height, original->width, CV_16S);
cv::Mat grad_y = cv::Mat::zeros(original->height, original->width, CV_16S);
cv::Mat abs_grad_x = cv::Mat::zeros(original->height, original->width, CV_8U);
cv::Mat abs_grad_y = cv::Mat::zeros(original->height, original->width, CV_8U);;
/// Gradient X
cv::Sobel(original_Mat, grad_x, CV_16S, 1, 0, 3);
cv::convertScaleAbs( grad_x, abs_grad_x );
/// Gradient Y
cv::Sobel(original_Mat, grad_y, CV_16S, 0, 1, 3);
cv::convertScaleAbs( grad_y, abs_grad_y );
uchar* pixelX = abs_grad_x.data;
uchar* pixelY = abs_grad_y.data;
uchar* grad1 = gradArray[0].data;
uchar* grad2 = gradArray[1].data;
uchar* grad3 = gradArray[2].data;
uchar* grad4 = gradArray[3].data;
uchar* grad5 = gradArray[4].data;
uchar* grad6 = gradArray[5].data;
uchar* grad7 = gradArray[6].data;
uchar* grad8 = gradArray[7].data;
int count = 0;
int min = 999999;
int max = 0;
for(int i = 0; i < grad_x.rows * grad_x.cols; i++)
{
int directionRAD = atan2(pixelY[i], pixelX[i]);
int directionDEG = directionRAD / PI * 180;
if(directionDEG < min){min = directionDEG;}
if(directionDEG > max){max = directionDEG;}
if(directionDEG >= 0 && directionDEG <= 45) { grad1[i] = 255; count++;}
if(directionDEG >= 45 && directionDEG <= 90) { grad2[i] = 255; count++;}
if(directionDEG >= 90 && directionDEG <= 135) { grad3[i] = 255; count++;}
if(directionDEG >= 135 && directionDEG <= 190) { grad4[i] = 255; count++;}
if(directionDEG >= 190 && directionDEG <= 225) { grad5[i] = 255; count++;}
if(directionDEG >= 225 && directionDEG <= 270) { grad6[i] = 255; count++;}
if(directionDEG >= 270 && directionDEG <= 315) { grad7[i] = 255; count++;}
if(directionDEG >= 315 && directionDEG <= 360) { grad8[i] = 255; count++;}
if(directionDEG < 0 || directionDEG > 360)
{
cout<<"Weird gradient direction given in method: getGradients.";
}
}
}
You're using integer arithmetic so your calculations for radians and degrees are suffering badly from truncation.
Also atan2 gives a result in the range -PI to +PI, so if you want a value in degrees in the range 0..360 you'll need to add a 180 degree correction:
double directionRAD = atan2(pixelY[i], pixelX[i]);
int directionDEG = (int)(180.0 + directionRAD / M_PI * 180.0);
Note the use of double rather than int for directionRAD.
Pro tip: learn to use a debugger to step through you code, inspecting variables as you go - that will make fixing simple bugs like this a lot easier than waiting for responses on StackOverflow.
You can get the x-derivative dx and y-derivative dy using Sobel operator. Then you can use the formula to calculate the magnitude and direction of the gradient. G=sqrt(dx^2+dy^2), theta=arctan(dy/dx). You can find this is just convert descartes coordinate system(x,y) to polar coordinates(rho, theta)!
There is something wrong in your code that you make absolute value of dx and dy, which makes the direction always in the first quadrant of the Cartesian coordinate system. And the function you used convertScaleAbs converts the result to 8-bit, which results in the truncation error.
I have a demo to calculate the magnitude partly based on your code.
const string imgname = "F:/OpenCV/square.jpg";
Mat img = imread(imgname, CV_LOAD_IMAGE_COLOR);
// 1. convert it to gray value
Mat gray;
cvtColor(img, gray, CV_BGR2GRAY);
// 2. blur the image
blur(gray, gray, Size(7, 7));
// 3. sobel
Mat grad_x, grad_y;
Scharr(gray, grad_x, CV_32FC1, 1, 0);
Scharr(gray, grad_y, CV_32FC1, 0, 1);
// 4. calculate gradient magnitude and direction
Mat magnitude, direction;
bool useDegree = true; // use degree or rad
// the range of the direction is [0,2pi) or [0, 360)
cartToPolar(grad_x, grad_y, magnitude, direction, useDegree);
// test, the histogram of the directions
vector<int> cnt(8, 0); // 0-45, 45-90, ..., 315-360
for(auto iter = direction.begin<float>(); iter != direction.end<float>(); ++iter)
{
int idx = static_cast<int>(*iter) / 45;
++cnt[idx];
}
Mat scaled;
convertScaleAbs(magnitude, scaled);
imshow("magnitude", scaled);
for(auto v : cnt)
cout << v << " ";
You take and absolute value of the gradients, which maps all angles from [-180; 180] to [0;90]. Also you use integer division.