Instead of any additional blob detection library, how do I use the cv::SimpleBlobDetector class and its function detectblobs()?
Python: Reads image blob.jpg and performs blob detection with different parameters.
#!/usr/bin/python
# Standard imports
import cv2
import numpy as np;
# Read image
im = cv2.imread("blob.jpg")
# Setup SimpleBlobDetector parameters.
params = cv2.SimpleBlobDetector_Params()
# Change thresholds
params.minThreshold = 10
params.maxThreshold = 200
# Filter by Area.
params.filterByArea = True
params.minArea = 1500
# Filter by Circularity
params.filterByCircularity = True
params.minCircularity = 0.1
# Filter by Convexity
params.filterByConvexity = True
params.minConvexity = 0.87
# Filter by Inertia
params.filterByInertia = True
params.minInertiaRatio = 0.01
# Create a detector with the parameters
# OLD: detector = cv2.SimpleBlobDetector(params)
detector = cv2.SimpleBlobDetector_create(params)
# Detect blobs.
keypoints = detector.detect(im)
# Draw detected blobs as red circles.
# cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS ensures
# the size of the circle corresponds to the size of blob
im_with_keypoints = cv2.drawKeypoints(im, keypoints, np.array([]), (0,0,255), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
# Show blobs
cv2.imshow("Keypoints", im_with_keypoints)
cv2.waitKey(0)
C++: Reads image blob.jpg and performs blob detection with different parameters.
#include "opencv2/opencv.hpp"
using namespace cv;
using namespace std;
int main(int argc, char** argv)
{
// Read image
#if CV_MAJOR_VERSION < 3 // If you are using OpenCV 2
Mat im = imread("blob.jpg", CV_LOAD_IMAGE_GRAYSCALE);
#else
Mat im = imread("blob.jpg", IMREAD_GRAYSCALE);
#endif
// Setup SimpleBlobDetector parameters.
SimpleBlobDetector::Params params;
// Change thresholds
params.minThreshold = 10;
params.maxThreshold = 200;
// Filter by Area.
params.filterByArea = true;
params.minArea = 1500;
// Filter by Circularity
params.filterByCircularity = true;
params.minCircularity = 0.1;
// Filter by Convexity
params.filterByConvexity = true;
params.minConvexity = 0.87;
// Filter by Inertia
params.filterByInertia = true;
params.minInertiaRatio = 0.01;
// Storage for blobs
std::vector<KeyPoint> keypoints;
#if CV_MAJOR_VERSION < 3 // If you are using OpenCV 2
// Set up detector with params
SimpleBlobDetector detector(params);
// Detect blobs
detector.detect(im, keypoints);
#else
// Set up detector with params
Ptr<SimpleBlobDetector> detector = SimpleBlobDetector::create(params);
// Detect blobs
detector->detect(im, keypoints);
#endif
// Draw detected blobs as red circles.
// DrawMatchesFlags::DRAW_RICH_KEYPOINTS flag ensures
// the size of the circle corresponds to the size of blob
Mat im_with_keypoints;
drawKeypoints(im, keypoints, im_with_keypoints, Scalar(0, 0, 255), DrawMatchesFlags::DRAW_RICH_KEYPOINTS);
// Show blobs
imshow("keypoints", im_with_keypoints);
waitKey(0);
}
The answer has been copied from this tutorial I wrote at LearnOpenCV.com explaining various parameters of SimpleBlobDetector. You can find additional details about the parameters in the tutorial.
You may store the parameters for the blob detector in a file, but this is not necessary. Example:
// set up the parameters (check the defaults in opencv's code in blobdetector.cpp)
cv::SimpleBlobDetector::Params params;
params.minDistBetweenBlobs = 50.0f;
params.filterByInertia = false;
params.filterByConvexity = false;
params.filterByColor = false;
params.filterByCircularity = false;
params.filterByArea = true;
params.minArea = 20.0f;
params.maxArea = 500.0f;
// ... any other params you don't want default value
// set up and create the detector using the parameters
cv::SimpleBlobDetector blob_detector(params);
// or cv::Ptr<cv::SimpleBlobDetector> detector = cv::SimpleBlobDetector::create(params)
// detect!
vector<cv::KeyPoint> keypoints;
blob_detector.detect(image, keypoints);
// extract the x y coordinates of the keypoints:
for (int i=0; i<keypoints.size(); i++){
float X = keypoints[i].pt.x;
float Y = keypoints[i].pt.y;
}
Note: all the examples here are using the OpenCV 2.X API.
In OpenCV 3.X, you need to use:
Ptr<SimpleBlobDetector> d = SimpleBlobDetector::create(params);
See also: the transition guide: http://docs.opencv.org/master/db/dfa/tutorial_transition_guide.html#tutorial_transition_hints_headers
// creation
cv::SimpleBlobDetector * blob_detector;
blob_detector = new SimpleBlobDetector();
blob_detector->create("SimpleBlobDetector");
// change params - first move it to public!!
blob_detector->params.filterByArea = true;
blob_detector->params.minArea = 1;
blob_detector->params.maxArea = 32000;
// or read / write them with file
FileStorage fs("test_fs.yml", FileStorage::WRITE);
FileNode fn = fs["features"];
//blob_detector->read(fn);
// detect
vector<KeyPoint> keypoints;
blob_detector->detect(img_text, keypoints);
fs.release();
I do know why, but params are protected. So I moved it in file features2d.hpp to be public:
virtual void read( const FileNode& fn );
virtual void write( FileStorage& fs ) const;
public:
Params params;
protected:
struct CV_EXPORTS Center
{
Point2d loc
If you will not do this, the only way to change params is to create file (FileStorage fs("test_fs.yml", FileStorage::WRITE);), than open it in notepad, and edit. Or maybe there is another way, but I`m not aware of it.
Related
Given an MTLTexture, defined as follows.
// Create device.
id<MTLDevice> dev = MTLCreateDefaultSystemDevice();
// Size of texture.
const unsigned int W = 640;
const unsigned int H = 480;
// Define texture.
MTLTextureDescriptor *desc = [[MTLTextureDescriptor alloc] init];
desc.pixelFormat = MTLPixelFormatBGRA8Unorm;
desc.width = W;
desc.height = H;
// Create texture.
id<MTLTexture> tex = [device newTextureWithDescriptor:desc];
It is my understanding that at this point I should have a texture as defined in desc allocated on device dev and accessible through tex.
Now, given another texture tex2 (known to be allocated and accessible) and a Metal compute kernel defined as follows.
kernel void foo(texture2d<float, access::read> in [[texture(0)]],
texture2d<float, access::write> out [[texture(1)]],
uint2 pix [[thread_position_in_grid]]) {
// Out of bounds check.
if (pix.x >= out.get_width() || pix.y >= out.get_height()) {
return;
}
// Do some processing on the input texture.
// ... All fine up to here.
// Write out a pixel to the output buffer.
const float4 p = abc; // abc is computed above.
out.write(p, pix);
}
It is my understanding that when the pixel p is written out to out, the values of p will be converted to conform to the pixel format of tex, in this case MTLPixelFormatBGRA8Unorm.
However, when launching the kernel as follows, the line in which p is written to out (above defined as tex) triggers a critical error (SIGABRT).
// Create a Metal library.
id<MTLLibrary> lib = [dev newDefaultLibrary];
// Load the kernel.
id<MTLFunction> kernel = [lib newFunctionWithName:#"foo"];
// Create a pipeline state.
id<MTLComputePipelineState> pipelineState = [dev newComputePipelineStateWithFunction:kernel error:NULL];
// Create a command queue.
id<MTLCommandQueue> cmdQueue = [dev newCommandQueue];
// Create command buffer.
id<MTLCommandBuffer> cmdBuff = [cmdQueue commandBuffer];
// Create compute encoder.
id<MTLComputeCommandEncoder> enc = [cmdBuff computeCommandEncoder];
// Set the pipeline state.
[enc setComputePipelineState:pipelineState];
// Set the input textures (tex2 is read only in the kernel, as above).
[enc setTexture:tex2 atIndex:0];
[enc setTexture:tex atIndex:1];
// 2D launch configuration.
const MTLSize groupDim = MTLSizeMake(16, 16, 1);
const MTLSize gridDim = MTLSizeMake((int)ceil((float)(W / (float)groupDim.width)),
(int)ceil((float)(H / (float)groupDim.height)),
1);
// Launch kernel.
[enc dispatchThreadgroups:gridDim threadsPerThreadgroup:groupDim];
[enc endEncoding];
[enc commit];
[cmdBuff waitUntilCompleted];
My question is that under the scenario outlined above, is my understanding of how one allocates a MTLTexture correct? Or, is the example above merely defining a wrapper around some texture that I need to separately allocate?
The above texture allocation and compute kernel launch are correct. Upon further digging in the documentation, the part that was missing was the usage property of MTLTextureDescriptor. In the documentation, the following is stated.
The default value for this property is MTLTextureUsageShaderRead.
As such, in the example given in the question, the following additional property assignment on MTLTextureDescriptor is required.
desc.usage = MTLTextureUsageShaderWrite;
I´m trying to transform a point from one map to another. I´ve tried to use some OpenCV sample code for getAffineTransform(), getPerspectiveTransform(), warpAffine() and findHomography(), but there´re always some kind of gaps in my transformation mesh. The feature points are usually detected on very different positions, so I need a good interpolation method, I think.
About the maps:
Both maps are images which are containing human body parts and human skin. I´m using the OpenCV feature detection/matching algorithmns to get a couple of equal points in both maps. The tricky thing is they´re containing arms and feets, too. Feature points on arms/feets can have much bigger offsets than the points on the torso.
The goal:
I want to transform any point on map A as good as possible to the equivalent position on map B.
My current approach is to find the three most clostest points to my original point on map A and construct a triangle. Afterwards I transform this triangle to the same three feature points on map B. That´s working nice if I have a lot of close feature point surrounding my original point. But on larger areas without feature points I got some problems with the interpolation.
Is this a good way to do so? Or is there a much better solution?
My favorite one would be the contruction of a complete transformation map for both images, but I´m not sure how to do this. Is it possible at all?
Thanks a lot for any advice!
Simple sketch of the transformation (I´m trying to find the points X1 to X3 from the left image in the right image):
Sketch of a sample transformation
Sample for homography (OpenCVSharp):
Mat imgA = new Mat(#"d:\Mesh\Left2.jpg", ImreadModes.Color);
Mat imgB = new Mat(#"d:\Mesh\Right2.jpg", ImreadModes.Color);
Cv2.Resize(imgA, imgA, new Size(512, 341));
Cv2.Resize(imgB, imgB, new Size(512, 341));
SURF detector = SURF.Create(500.0);
KeyPoint[] keypointsA = detector.Detect(imgA);
KeyPoint[] keypointsB = detector.Detect(imgB);
SIFT extractor = SIFT.Create();
Mat descriptorsA = new Mat();
Mat descriptorsB = new Mat();
extractor.Compute(imgA, ref keypointsA, descriptorsA);
extractor.Compute(imgB, ref keypointsB, descriptorsB);
BFMatcher matcher = new BFMatcher(NormTypes.L2, true);
DMatch[] matches = matcher.Match(descriptorsA, descriptorsB);
double minDistance = 10000.0;
double maxDistance = 0.0;
for (int i = 0; i < matches.Length; ++i)
{
double distance = matches[i].Distance;
if (distance < minDistance)
{
minDistance = distance;
}
if (distance > maxDistance)
{
maxDistance = distance;
}
}
List<DMatch> goodMatches = new List<DMatch>();
for (int i = 0; i < matches.Length; ++i)
{
if (matches[i].Distance <= 3.0 * minDistance &&
Math.Abs(keypointsA[matches[i].QueryIdx].Pt.Y - keypointsB[matches[i].TrainIdx].Pt.Y) < 30)
{
goodMatches.Add(matches[i]);
}
}
Mat output = new Mat();
Cv2.DrawMatches(imgA, keypointsA, imgB, keypointsB, goodMatches.ToArray(), output);
List<Point2f> goodA = new List<Point2f>();
List<Point2f> goodB = new List<Point2f>();
for (int i = 0; i < goodMatches.Count; i++)
{
goodA.Add(keypointsA[goodMatches[i].QueryIdx].Pt);
goodB.Add(keypointsB[goodMatches[i].TrainIdx].Pt);
}
InputArray goodInputA = InputArray.Create<Point2f>(goodA);
InputArray goodInputB = InputArray.Create<Point2f>(goodB);
Mat h = Cv2.FindHomography(goodInputA, goodInputB);
Point2f centerA = new Point2f(imgA.Cols / 2.0f, imgA.Rows / 2.0f);
output.DrawMarker((int)centerA.X, (int)centerA.Y, Scalar.Red, MarkerStyle.Cross, 50, LineTypes.Link8, 5);
Point2f[] transformedPoints = Cv2.PerspectiveTransform(new Point2f[] { centerA }, h);
output.DrawMarker((int)transformedPoints[0].X + imgA.Cols, (int)transformedPoints[0].Y, Scalar.Red, MarkerStyle.Cross, 50, LineTypes.Link8, 5);
Code snippet for perspective transform (different approach, OpenCVSharp):
pointsA[0] = new Point(trisA[i].Item0, trisA[i].Item1);
pointsA[1] = new Point(trisA[i].Item2, trisA[i].Item3);
pointsA[2] = new Point(trisA[i].Item4, trisA[i].Item5);
pointsB[0] = new Point(trisB[i].Item0, trisB[i].Item1);
pointsB[1] = new Point(trisB[i].Item2, trisB[i].Item3);
pointsB[2] = new Point(trisB[i].Item4, trisB[i].Item5);
Mat transformation = Cv2.GetAffineTransform(pointsA, pointsB);
InputArray inputSource = InputArray.Create<Point2f>(new Point2f[] { new Point2f(10f, 50f) });
Mat outputMat = new Mat();
Cv2.PerspectiveTransform(inputSource, outputMat, transformation);
Mat.Indexer<Point2f> indexer = outputMat.GetGenericIndexer<Point2f>();
var target = indexer[0, 0];
i have a code like this:
Mat img = Highgui.imread(inFile);
Mat templ = Highgui.imread(templateFile);
int result_cols = img.cols() - templ.cols() + 1;
int result_rows = img.rows() - templ.rows() + 1;
Mat result = new Mat(result_rows, result_cols, CvType.CV_32FC1);
Imgproc.matchTemplate(img, templ, result, Imgproc.TM_CCOEFF);
/////Core.normalize(result, result, 0, 1, Core.NORM_MINMAX, -1, new Mat());
for (int i = 0; i < result_rows; i++)
for (int j = 0; j < result_cols; j++)
if(result.get(i, j)[0]>?)
//match!
I need to parse the input image to find multiple occurrencies of the template image. I want to have a result like this:
result[0][0]= 15%
result[0][1]= 17%
result[x][y]= 47%
If i use TM_COEFF all results are [-xxxxxxxx.xxx,+xxxxxxxx.xxx]
If i use TM_SQDIFF all results are xxxxxxxx.xxx
If i use TM_CCORR all results are xxxxxxxx.xxx
How can i detect a match or a mismatch? What is the right condition into the if?
If i normalized the matrix the application set a value to 1 and i can't detect if the template isn't stored into the image (all mismatch).
Thanks in advance
You can append "_NORMED" to the method names (For instance: CV_TM_COEFF_NORMED in C++; could be slightly different in Java) to get a sensible value for your purpose.
By 'sensible', I mean that you will get values in the range of 0 to 1 which can be multiplied by 100 for your purpose.
Note: For CV_TM_SQDIFF_NORMED, it will be in the range -1 to 0, and you will have to subtract the value from 1 in order to make sense of it, because the lowest value if used in this method.
Tip: you can use the java equivalent of minMaxLoc() in order to get the minimum and maximum values. It's very useful when used in conjunction with matchtemplate.
I believe 'minMaxLoc' that is located inside the class Core.
Here's a C++ implementation:
matchTemplate( input_mat, template_mat, result_mat, method_NORMED );
double minVal, maxVal;
double percentage;
Point minLoc; Point maxLoc;
minMaxLoc( result, &minVal, &maxVal, &minLoc, &maxLoc, Mat() );
if( method_NORMED == CV_TM_SQDIFF_NORMED )
{
percentage=1-minVal;
}
else
{
percentage=maxVal;
}
Useful C++ docs:
Match template description along with available methods: http://docs.opencv.org/modules/imgproc/doc/object_detection.html
MinMaxLoc documentation:
http://docs.opencv.org/modules/core/doc/operations_on_arrays.html?highlight=minmaxloc#minmaxloc
Another approach will be background differencing. You can observe the distortion.
import org.opencv.core.Core;
import org.opencv.core.Mat;
import org.opencv.highgui.Highgui;
import org.opencv.imgproc.Imgproc;
public class BackgroundDifference {
public static void main(String[] arg){
System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
Mat model = Highgui.imread("e:\\answers\\template.jpg",Highgui.CV_LOAD_IMAGE_GRAYSCALE);
Mat scene = Highgui.imread("e:\\answers\\front7.jpg",Highgui.CV_LOAD_IMAGE_GRAYSCALE);
Mat diff = new Mat();
Core.absdiff(model,scene,diff);
Imgproc.threshold(diff,diff,15,255,Imgproc.THRESH_BINARY);
int distortion = Core.countNonZero(diff);
System.out.println("distortion:"+distortion);
Highgui.imwrite("e:\\answers\\diff.jpg",diff);
}
}
I am new to OpenCV and I want to select a particular region in the video/image for detection. In my case I want to detect cars that are only in the road not in the parking lot.
Well, selecting cars requires use of training data. But to select an ROI (region of interest) is fairly simple:
Consider img = cv2.imread(image)
In that case, somewhere in your code, you can specify a region this way:
sub_image = img[y:y+h, x:x+w]
That will get the ROI once you specify the values, of course, not using 'x' or 'y', where h is the height and w is the width. Remember that images are just 2D matrices.
Use CascadeClassifier() to select the car(s) from the image(s). Documentation is found here. OpenCV comes packed with training data you can use to make classifications in the form of XML files.
If you want to manually select a region of interest (ROI) to do some processing on it, then you may trying using mouse click event to select start and stop points of your ROI.
Once you have start and stop point you can use it to retrieve image from selected region.
The can be done on image or capture video frame.
bool roi_captured = false;
Point pt1, pt2;
Mat cap_img;
//Callback for mousclick event, the x-y coordinate of mouse button-up and button-down
//are stored in two points pt1, pt2.
void mouse_click(int event, int x, int y, int flags, void *param)
{
switch(event)
{
case CV_EVENT_LBUTTONDOWN:
{
std::cout<<"Mouse Pressed"<<std::endl;
if(!roi_capture)
{
pt1.x = x;
pt1.y = y;
}
else
{
std::cout<<"ROI Already Acquired"<<std::endl;
}
break;
}
case CV_EVENT_LBUTTONUP:
{
if(!got_roi)
{
Mat cl;
std::cout<<"Mouse LBUTTON Released"<<std::endl;
pt2.x = x;
pt2.y = y;
cl = cap_img.clone();
Mat roi(cl, Rect(pt1, pt2));
Mat prev_imgT = roi.clone();
std::cout<<"PT1"<<pt1.x<<", "<<pt1.y<<std::endl;
std::cout<<"PT2"<<pt2.x<<","<<pt2.y<<std::endl;
imshow("Clone",cl);
got_roi = true;
}
else
{
std::cout<<"ROI Already Acquired"<<std::endl;
}
break;
}
}
}
//In main open video and wait for roi event to complete by the use.
// You capture roi in pt1 and pt2 you can use the same coordinates for processing // //subsequent frame
int main(int argc, char *argv[])
{
int frame_num = 0;
int non_decode_frame =0;
int count = 1, idx =0;
int frame_pos =0;
std::cout<<"Video File "<<argv[1]<<std::endl;
cv::VideoCapture input_video(argv[1]);
namedWindow("My_Win",1);
cvSetMouseCallback("My_Win", mouse_click, 0);
sleep(1);
while(input_video.grab())
{
cap_img.release();
if(input_video.retrieve(cap_img))
{
imshow("My_Win", cap_img);
if(!got_roi)
{
//Wait here till user select the desire ROI
waitKey(0);
}
else
{
std::cout<<"Got ROI disp prev and curr image"<<std::endl;
std::cout<<"PT1"<<pt1.x<<" "<<pt1.y<<std::endl;
std::cout<<"PT2"<<pt2.x<<" "<<pt2.y<<std::endl;
Mat curr_img_t1;
Mat roi2(cap_img,Rect(pt1, pt2));
Mat curr_imgT = roi2.clone();
cvtColor(curr_imgT, curr_img_t1, CV_RGB2GRAY);
imshow("curr_img", curr_img);
// Do remaining processing here on capture roi for every frame
waitKey(1);
}
}
}
}
You didn't tag in what programming language you are writing with. Anyway, I answer you in python. (You can easily convert it to C++ if you want)
def mouse_drawing(event, x, y, flags, params):
if event == cv2.EVENT_LBUTTONDOWN:
car = img[y: y + carheight, x: x + carwidth]
cv2.imwrite("car", car)
cv2.namedWindow("my_img")
cv2.setMouseCallback("my_img", mouse_drawing)
while True:
cv2.imshow("my_img", img)
key = cv2.waitKey(1)
if key == 27:
break
As in other answers was told, if you want to find cars automatically, that would be another problem and has to do with training data and other things.
I am in need of using the standard Hough Transformation (instead of the using the HoughLinesBinary method which implements Probabilistic Hough Transform) and have attempted doing so by creating a custom version of the HoughLinesBinary method:
using (MemStorage stor = new MemStorage())
{
IntPtr lines = CvInvoke.cvHoughLines2(canny.Ptr, stor.Ptr, Emgu.CV.CvEnum.HOUGH_TYPE.CV_HOUGH_STANDARD, rhoResolution, (thetaResolution*Math.PI)/180, threshold, 0, 0);
Seq<MCvMat> segments = new Seq<MCvMat>(lines, stor);
List<MCvMat> lineslist = segments.ToList();
foreach(MCvMat line in lineslist)
{
//Process lines: (rho, theta)
}
}
My problem is that I am unsure of what type is the sequence returned. I believe it should be MCvMat, due to reading the documentation that CvMat* is used in OpenCV, which also states that for STANDARD "the matrix must be (the created sequence will be) of CV_32FC2 type"
I am unclear as to what I would need to do to return and process that correct output data from the STANDARD hough lines (i.e. the 2x1 vector for each line giving the rho and theta information).
Any help would be greatly appreciated. Thank you
-Sal
I had the same problem myself a couple of days ago. This is how I solved it using marshalling. Please let me know if you find a simpler solution.
using (MemStorage stor = new MemStorage())
{
IntPtr lines = CvInvoke.cvHoughLines2(canny.Ptr, stor.Ptr, Emgu.CV.CvEnum.HOUGH_TYPE.CV_HOUGH_STANDARD, rhoResolution, (thetaResolution*Math.PI)/180, threshold, 0, 0);
int maxLines = 100;
for(int i = 0; i < maxLines; i++)
{
IntPtr line = CvInvoke.cvGetSeqElem(lines, i);
if (line == IntPtr.Zero)
{
// No more lines
break;
}
PolarCoordinates coords = (PolarCoordinates)System.Runtime.InteropServices.Marshal.PtrToStructure(line, typeof(PolarCoordinates));
// Do something with your Hough lines
}
}
with a struct defined as follows:
public struct PolarCoordinates
{
public float Rho;
public float Theta;
}