For my project I am using parts of the next code: link.
To track objects of a specific color I implemented this method:
My question is: How can I calculate the distance to the tracked colored objects?
Thank you in advance!
*The application calls the method for the left and right frame. This is not efficient...
**I need to calculate detectedObject.Zcor
DetectedObject Detect(IplImage *frame)
{
//Track object (left frame and right frame)
//Calculate average position
//Show X,Y,Z coordinate and detected color
color_image = frame;
imgThreshold = cvCreateImage(cvSize(color_image->width,color_image->height), IPL_DEPTH_8U, 1);
cvInitFont(&font, CV_FONT_HERSHEY_PLAIN, 1, 1, 0, 1.4f, CV_AA);
imgdraw = cvCreateImage(cvGetSize(color_image),8,3);
cvSetZero(imgdraw);
cvFlip(color_image, color_image, 1);
cvSmooth(color_image, color_image, CV_GAUSSIAN, 3, 0);
threshold = getThreshold(color_image);
cvErode(threshold, threshold, NULL, 3);
cvDilate(threshold, threshold, NULL, 10);
imgThreshold = cvCloneImage(threshold);
storage = cvCreateMemStorage(0);
contours = cvCreateSeq(0, sizeof(CvSeq), sizeof(CvPoint), storage);
cvFindContours(threshold, storage, &contours, sizeof(CvContour), CV_RETR_CCOMP, CV_CHAIN_APPROX_NONE, cvPoint(0,0));
final = cvCreateImage(cvGetSize(color_image),8,3);
for(; contours!=0; contours = contours->h_next)
{
CvRect rect = cvBoundingRect(contours, 0);
cvRectangle(color_image,
cvPoint(rect.x, rect.y),
cvPoint(rect.x+rect.width, rect.y+rect.height),
cvScalar(0,0,255,0),
2,8,0);
string s = to_string(rect.x) + "," + to_string(rect.y);
char const* pchar = s.c_str();
cvPutText(frame, pchar, cvPoint(rect.x, rect.y), &font, cvScalar(0,0,255,0));
detectedObject.Xcor = rect.x;
detectedObject.Ycor = rect.y;
}
cvShowImage("Threshold", imgThreshold);
cvAdd(final,imgdraw,final);
detectedObject.Zcor = 0;
return detectedObject;
}
For depth estimation you will need a calibrated stereo pair (known camera matrices for both the left and the right cameras). Then, using the camera matrices and corresponding points/contours in the stereo pair, you can compute depth.
Related
Last couple of weeks I've been working on developing a simple proof-of-concept application in which a 3D model is projected over a specific Augmented Reality marker (in my case I am using Aruco markers) in IOS (with Swift and Objective-C)
I calibrated an Ipad Camera with a specific fixed lens position and used that to estimate the pose of the AR marker (which from my debug analysis seem pretty accurate). The problem seems (surprise, surprise) when I try to use SceneKit scene to project a model over the marker.
I am aware that the axis in opencv and SceneKit are different (Y and Z) and already done this correction as well as the row order/column order difference between the two libraries.
After constructing the projection matrix, I apply that same transform to the 3D model and from my debug analysis the object seems to be translated to the desired position and with the desired rotation. The problem is that it never does overlap the specific image pixel position of the marker. I am using a AVCapturePreviewVideoLayer as to put the video in background that has the same bounds as my SceneKit View.
Has anyone has a clue why this happens? I tried to play with cameras FOV's but with no real impact in the results.
Thank you all for your time.
EDIT1: I Will post some of the code here to reveal what I am currently doing.
I have two subviews inside the main view, one which is a background AVCaptureVideoPreviewLayer and another which is a SceneKitView. Both have the same bounds as the main view.
At each frame I use an opencv wrapper which outputs the pose of each marker:
std::vector<int> ids;
std::vector<std::vector<cv::Point2f>> corners, rejected;
cv::aruco::detectMarkers(frame, _dictionary, corners, ids, _detectorParams, rejected);
if (ids.size() > 0 ){
cv::aruco::drawDetectedMarkers(frame, corners, ids);
cv::Mat rvecs, tvecs;
cv::aruco::estimatePoseSingleMarkers(corners, 2.6, _intrinsicMatrix, _distCoeffs, rvecs, tvecs);
// Let's protect ourselves agains multiple markers
if (rvecs.total() > 1)
return;
_markerFound = true;
cv::Rodrigues(rvecs, _currentR);
_currentT = tvecs;
for (int row = 0; row < _currentR.rows; row++){
for (int col = 0; col < _currentR.cols; col++){
_currentExtrinsics.at<double>(row, col) = _currentR.at<double>(row, col);
}
_currentExtrinsics.at<double>(row, 3) = _currentT.at<double>(row);
}
_currentExtrinsics.at<double>(3,3) = 1;
std::cout << tvecs << std::endl;
// Convert coordinate systems of opencv to openGL (SceneKit)
// Note that in openCV z goes away the camera (in openGL goes into the camera)
// and y points down and on openGL point up
// Another note: openCV has a column order matrix representation, while SceneKit
// has a row order matrix, but we'll take care of it later.
cv::Mat cvToGl = cv::Mat::zeros(4, 4, CV_64F);
cvToGl.at<double>(0,0) = 1.0f;
cvToGl.at<double>(1,1) = -1.0f; // invert the y axis
cvToGl.at<double>(2,2) = -1.0f; // invert the z axis
cvToGl.at<double>(3,3) = 1.0f;
_currentExtrinsics = cvToGl * _currentExtrinsics;
cv::aruco::drawAxis(frame, _intrinsicMatrix, _distCoeffs, rvecs, tvecs, 5);
Then in each frame I convert the opencv matrix for a SCN4Matrix:
- (SCNMatrix4) transformToSceneKit:(cv::Mat&) openCVTransformation{
SCNMatrix4 mat = SCNMatrix4Identity;
// Transpose
openCVTransformation = openCVTransformation.t();
// copy the rotationRows
mat.m11 = (float) openCVTransformation.at<double>(0, 0);
mat.m12 = (float) openCVTransformation.at<double>(0, 1);
mat.m13 = (float) openCVTransformation.at<double>(0, 2);
mat.m14 = (float) openCVTransformation.at<double>(0, 3);
mat.m21 = (float)openCVTransformation.at<double>(1, 0);
mat.m22 = (float)openCVTransformation.at<double>(1, 1);
mat.m23 = (float)openCVTransformation.at<double>(1, 2);
mat.m24 = (float)openCVTransformation.at<double>(1, 3);
mat.m31 = (float)openCVTransformation.at<double>(2, 0);
mat.m32 = (float)openCVTransformation.at<double>(2, 1);
mat.m33 = (float)openCVTransformation.at<double>(2, 2);
mat.m34 = (float)openCVTransformation.at<double>(2, 3);
//copy the translation row
mat.m41 = (float)openCVTransformation.at<double>(3, 0);
mat.m42 = (float)openCVTransformation.at<double>(3, 1)+2.5;
mat.m43 = (float)openCVTransformation.at<double>(3, 2);
mat.m44 = (float)openCVTransformation.at<double>(3, 3);
return mat;
}
At each frame in which the AR marker is found I add a box to the scene and apply the transformation to the object node:
SCNBox *box = [SCNBox boxWithWidth:5.0 height:5.0 length:5.0 chamferRadius:0.0];
_boxNode = [SCNNode nodeWithGeometry:box];
if (found){
[self.delegate returnExtrinsicsMat:extrinsicMatrixOfTheMarker];
Mat R, T;
[self.delegate returnRotationMat:R];
[self.delegate returnTranslationMat:T];
SCNMatrix4 Transformation;
Transformation = [self transformToSceneKit:extrinsicMatrixOfTheMarker];
//_cameraNode.transform = SCNMatrix4Invert(Transformation);
[_sceneKitScene.rootNode addChildNode:_cameraNode];
//_cameraNode.camera.projectionTransform = SCNMatrix4Identity;
//_cameraNode.camera.zNear = 0.0;
_sceneKitView.pointOfView = _cameraNode;
_boxNode.transform = Transformation;
[_sceneKitScene.rootNode addChildNode:_boxNode];
//_boxNode.position = SCNVector3Make(Transformation.m41, Transformation.m42, Transformation.m43);
std::cout << (_boxNode.position.x) << " " << (_boxNode.position.y) << " " << (_boxNode.position.z) << std::endl << std::endl;
}
For example if the translation vector is (-1, 5, 20) the object appears in the scene in position (-1, -5, -20) in the scene, and the rotation is correct also. The problem is that it never appears in the correct position in the background image. I will add some images to show the result.
Does anyone know why this is happening?
Found out the solution. Instead of applying the transform to the node of the object I applied the inverted transformation matrix to the camera node. Then for the camera perspective transform matrix I applied the following matrix:
projection = SCNMatrix4Identity
projection.m11 = (2 * (float)(cameraMatrix[0])) / -(ImageWidth*0.5)
projection.m12 = (-2 * (float)(cameraMatrix[1])) / (ImageWidth*0.5)
projection.m13 = (width - (2 * Float(cameraMatrix[2]))) / (ImageWidth*0.5)
projection.m22 = (2 * (float)(cameraMatrix[4])) / (ImageHeight*0.5)
projection.m23 = (-height + (2 * (float)(cameraMatrix[5]))) / (ImageHeight*0.5)
projection.m33 = (-far - near) / (far - near)
projection.m34 = (-2 * far * near) / (far - near)
projection.m43 = -1
projection.m44 = 0
being far and near the z clipping planes.
I also had to correct the box initial position to center it on the marker.
I need some help with opencv and gearwheel detection.
My task: count gearwheel teeth from images like this:
Im trying to use HoughCircles method but got bad results lile this:
Otsu threshold:
Code (on openCV Java wrapper):
Mat des = new Mat(sourceImg.rows(), sourceImg.cols(), sourceImg.type());
Imgproc.cvtColor(sourceImg, sourceImg, Imgproc.COLOR_BGR2GRAY, 4);
Imgproc.GaussianBlur(sourceImg,des, new Size(3,3),0,0);
Imgproc.threshold(des, des, 0, 255, Imgproc.THRESH_OTSU | Imgproc.THRESH_OTSU);
Imgproc.Canny(des, des, 0 , 1);
displayImage(Mat2BufferedImage(des));
Mat circles = new Mat();
Imgproc.HoughCircles(des, circles, Imgproc.CV_HOUGH_GRADIENT, 1.0, 50, 70.0, 30.0, 100, 0);
/// Draw the circles detected
for(int i = 0; i < circles.cols(); i++ )
{
double vCircle[] = circles.get(0,i);
if (vCircle == null)
break;
Point pt = new Point(Math.round(vCircle[0]), Math.round(vCircle[1]));
int radius = (int)Math.round(vCircle[2]);
// draw the found circle
Core.circle(des, pt, radius, new Scalar(255,255,255), 3);
Core.circle(des, pt, 3, new Scalar(255,0,0), 3);
}
What is right way for my task? How to count teeth? Thanks for your answers.
Here's what I tried. The code is in C++ but you can easily adapt it to Java.
load the image and resize it to half the size
erode the image, use Canny to detect edges, then dilate to connect the edges
find contours and choose the largest contour
find the convexhull of this largest contour. Number of point in the convexhull will give you a rough value for the number of teeth
Here's the largest contour and the convexhull points:
I get a value of 77 with the following code.
Mat gray = imread("16atchc.jpg", 0);
Mat small, bw, er, kernel;
resize(gray, small, Size(), .5, .5);
kernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
erode(small, er, kernel);
Canny(er, bw, 50, 150);
dilate(bw, bw, kernel);
vector<vector<Point>> contours;
vector<Vec4i> hierarchy;
int imax = 0, areamax = 0;
findContours(bw, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));
for(int idx = 0; idx >= 0; idx = hierarchy[idx][0])
{
Rect rect = boundingRect(contours[idx]);
int area = rect.width * rect.height;
if (area > areamax)
{
areamax = area;
imax = idx;
}
}
vector<Point> hull;
convexHull(contours[imax], hull);
cout << contours[imax].size() << ", " << hull.size() << endl;
I am struggling with finding the appropriate contour algorithm for a low quality image. The example image shows a rock scene:
What I am trying to achieve is to find contours arround features such as:
light areas
dark areas
grey1 areas
grey2 areas
etc. until grey-n areas
(The number of areas shall be a parameter of choice)
I do not want to take a simple binary-threshold but rather use some sort of contour-finding (for example watershed or other). The major feature-lines shall be kept, noise within a feature-are can be flattened.
The result of my code can be seen on the images to the right.
Unfortunately, as you can easily tell, the colors do not really represent the original large-scale image features! For example: check out the two areas that I circled with red - these features are almost completely flooded with another color. What I imagine is that at least the very light and the very dark areas are covered by its own color.
cv::Mat cv_src = cv::imread(argv[1]);
cv::Mat output;
cv::Mat cv_src_gray;
cv::cvtColor(cv_src, cv_src_gray, cv::COLOR_RGB2GRAY);
double clipLimit = 0.1;
cv::Size titleGridSize = cv::Size(8,8);
cv::Ptr<cv::CLAHE> clahe = cv::createCLAHE(clipLimit, titleGridSize);
clahe->apply(cv_src_gray, output);
cv::equalizeHist(output, output);
cv::cvtColor(output, cv_src, cv::COLOR_GRAY2RGB);
// Create binary image from source image
cv::Mat bw;
cv::cvtColor(cv_src, bw, cv::COLOR_BGR2GRAY);
cv::threshold(bw, bw, 180, 255, cv::THRESH_BINARY);
// Perform the distance transform algorithm
cv::Mat dist;
cv::distanceTransform(bw, dist, cv::DIST_L2, CV_32F);
// Normalize the distance image for range = {0.0, 1.0}
cv::normalize(dist, dist, 0, 1., cv::NORM_MINMAX);
// Threshold to obtain the peaks
cv::threshold(dist, dist, .2, 1., cv::THRESH_BINARY);
// Create the CV_8U version of the distance image
cv::Mat dist_8u;
dist.convertTo(dist_8u, CV_8U);
// Find total markers
std::vector<std::vector<cv::Point> > contours;
cv::findContours(dist_8u, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);
int ncomp = contours.size();
// Create the marker image for the watershed algorithm
cv::Mat markers = cv::Mat::zeros(dist.size(), CV_32S);
// Draw the foreground markers
for (int i = 0; i < ncomp; i++)
cv::drawContours(markers, contours, i, cv::Scalar::all(i+1), -1);
// Draw the background marker
cv::circle(markers, cv::Point(5,5), 3, CV_RGB(255,255,255), -1);
// Perform the watershed algorithm
cv::watershed(cv_src, markers);
// Generate random colors
std::vector<cv::Vec3b> colors;
for (int i = 0; i < ncomp; i++)
{
int b = cv::theRNG().uniform(0, 255);
int g = cv::theRNG().uniform(0, 255);
int r = cv::theRNG().uniform(0, 255);
colors.push_back(cv::Vec3b((uchar)b, (uchar)g, (uchar)r));
}
// Create the result image
cv::Mat dst = cv::Mat::zeros(markers.size(), CV_8UC3);
// Fill labeled objects with random colors
for (int i = 0; i < markers.rows; i++)
{
for (int j = 0; j < markers.cols; j++)
{
int index = markers.at<int>(i,j);
if (index > 0 && index <= ncomp)
dst.at<cv::Vec3b>(i,j) = colors[index-1];
else
dst.at<cv::Vec3b>(i,j) = cv::Vec3b(0,0,0);
}
}
// Show me what you got
imshow("final_result", dst);
I think you can use a simple clustering such as k-means for this, then examine the cluster centers (or the mean and standard deviations of each cluster). I quickly tried it in matlab.
im = imread('tvBqt.jpg');
gr = rgb2gray(im);
x = double(gr(:));
idx = kmeans(x, 4);
cl = reshape(idx, 600, 472);
figure,
subplot(1, 2, 1), imshow(gr, []), title('original')
subplot(1, 2, 2), imshow(label2rgb(cl), []), title('clustered')
The result:
You could try using SLIC Superpixels. I tried it and showed some good results. You could vary the parameters to get better clustering.
SLIC Superpixels
SLIC Superpixels with OpenCV C++
SLIC Superpixels with OpenCV Python
I tried to set a ROI with angle in an image. At first, I thought that using a mask would give me the same result as setting the ROI in my IplImage structure. Then, I would just use cvResize the same way I did when I used ROI, but this time for a mask.
However, this is obviously not this easy, because of the angle.
Is there any way to copy the inside of any rectangle, at any angle, into a new IplImage, which would be of the size of this very rectangle?
CvSeq* approximatedContour = cvApproxPoly(currentContour,
sizeof(CvContour),
0,
CV_POLY_APPROX_DP,
8);
// Circonscrire le polygone trouve dans un rectangle
etiquetteBox = cvMinAreaRect2(approximatedContour);
CvPoint2D32f boxPoints[4];
CvPoint2D32f* c1 = (&cvPoint2D32f(0,0),
&cvPoint2D32f(200,0),
&cvPoint2D32f(0,200),
&cvPoint2D32f(200,200));
CvMat* mmat = cvCreateMat(3,3,CV_32FC1);
cvBoxPoints(etiquetteBox, boxPoints);
IplImage* mask = cvCreateImage(cvSize(in->width,in->height), IPL_DEPTH_8U, 1);
IplImage* ROIimg = cvCreateImage(cvSize(in->width,in->height), IPL_DEPTH_8U, 1);
drawBox(mask,etiquetteBox,target_color[3]);
cvAnd(thresImg,mask,ROIimg,mask);
if(voirSeuillage)
cvCvtColor(ROIimg,in,CV_GRAY2BGR); //ROIimg is OK here!
mmat = cvGetPerspectiveTransform(boxPoints,c1,mmat);
cvWarpPerspective(ROIimg,thresImgResized,mmat); // here I get a full black image!
Doing this, as kindly suggested by Banthar, I get a full black image instead of what is delimited by boxPoints in ROIimg, what's wrong with this code?
After applying answer:
Here is what I do now :
double angle = 0.;
// TODO adaptive angle compensation
if(abs(etiquetteBox.angle) > 30)
angle = etiquetteBox.angle + 270.;
else
angle = etiquetteBox.angle - 270.;
CvPoint2D32f boxPoints[4];
CvPoint2D32f c1[] = {cvPoint2D32f(0,0),
cvPoint2D32f(20,0),
cvPoint2D32f(20,20),
cvPoint2D32f(0,20)};
CvMat* mmat = cvCreateMat(3,3,CV_32FC1);
cvBoxPoints(etiquetteBox, boxPoints);
Point center = Point(10,10);
//warp the image to fit the polygon into the 20x20 image
mmat = cvGetPerspectiveTransform(boxPoints,c1,mmat);
cvWarpPerspective(thresImg,thresImgResized,mmat);
//rotate the image because the inconsistent angle of etiquetteBox
// from a frame to the next ...
//it would be very cool to find a way to fix this...
CvMat rot_mat = getRotationMatrix2D( center, angle,1.0);
cvWarpAffine(thresImgResized,rotatedIm,&rot_mat);
It is still not quite what I want, because the object is rotating into the 20x20 rotatedImg; in thresImgResized, after cvWarpPerspective, the object is well segmented, BUT it is reversed because the inconsistency of the angle of etiquetteBox (-0 degrees in a frame, -90 in the next, depending on how I hold the object to be detected), which I get this way:
cvFindContours(dilImage,
contoursStorage,
&contours,
sizeof(CvContour),
CV_RETR_LIST,
CV_CHAIN_APPROX_TC89_KCOS);
// Trouver des polygones
CvSeq* currentContour = contours;
while(currentContour != 0 && !etiquette)
{
CvSeq* approximatedContour = cvApproxPoly(currentContour,
sizeof(CvContour),
0,
CV_POLY_APPROX_DP,
9);
// Circonscrire le polygone trouve dans un rectangle
etiquetteBox = cvMinAreaRect2(approximatedContour);
I don't know how to fix this, but, at least, it is better than setting my IplImage ROI, because I compensate the etiquetteBox's angle switch from -0 to -90 degrees in consecutive frames.
You are using the wrong brackets in definition of c1. Try this:
CvPoint2D32f c1[] = {
cvPoint2D32f(0,200),
cvPoint2D32f(0,0),
cvPoint2D32f(200,0),
cvPoint2D32f(200,200),
};
I am trying to create a histogram of the depth videos (converted to grayscale first) in order to apply a threshold to keep only highest values, and then do some dilation in order to extract contours. Apparently I am stuck, and besides that i don't know if what I am thinking is the right way to extract contours from depth videos.
In the following code I got stuck in the point of applying the threshold. I think that iam applying it in the wrong way. Which is the correct to apply a threshold in this situation in order to obtain a black and white image?
Any suggestions or links of tutorials would be awesome!!!
Thank you very much!
int bins = 256;
int hsize[] = {bins};
//max and min value of the histogram
float max_value = 0, min_value = 0;
float value;
int normalized;
//ranges - grayscale 0 to 256
float xranges[] = { 0, 256 };
float* ranges[] = { xranges };
//image is the actual source from input depth video
gray = cvCreateImage( cvGetSize(image), 8, 1 );
cvCvtColor( image, gray, CV_BGR2GRAY );
cvNamedWindow("original",1);
cvNamedWindow("gray",1);
cvNamedWindow("histogram",1);
cvNamedWindow("black & white",1);
IplImage* planes[] = { gray };
//get the histogram and some info about it
hist = cvCreateHist( 1, hsize, CV_HIST_ARRAY, ranges,1);
cvCalcHist( planes, hist, 0, NULL);
cvGetMinMaxHistValue( hist, &min_value, &max_value);
printf("min: %f, max: %f\n", min_value, max_value);
imgHistogram = cvCreateImage(cvSize(bins, image->height),8,1);
cvRectangle(imgHistogram, cvPoint(0,0), cvPoint(256,image->height), CV_RGB(255,255,255),-1);
//I think that here i have messed up things :( Any suggestions ???
bw_img = cvCreateImage(cvGetSize(imgHistogram), IPL_DEPTH_8U, 1);
cvThreshold(imgHistogram, bw_img, 150, 255, CV_THRESH_BINARY);
//draw the histogram
for(int i=0; i < bins; i++){
value = cvQueryHistValue_1D( hist, i);
normalized = cvRound(value*image->height/max_value);
cvLine(imgHistogram,cvPoint(i,image->height), cvPoint(i,image->height-normalized), CV_RGB(0,0,0));
}
//show the image results
cvShowImage( "original", image );
cvShowImage( "gray", gray );
cvShowImage( "histogram", imgHistogram );
cvShowImage( "balck & white", bw_img);