The OpenCV docs give the following SVM kernel type example:
A comparison of different kernels on the following 2D test case with four classes. Four SVM::C_SVC SVMs have been trained (one against rest) with auto_train. Evaluation on three different kernels (SVM::CHI2, SVM::INTER, SVM::RBF). The color depicts the class with max score. Bright means max-score > 0, dark means max-score < 0.
Where can I find the sample code that generates this example?
Specifically, the SVM predict() method presumably returns a label value and not a max-score. How can it return a max-score?
Note that the quote states that it uses SVM::C_SVC which is a classification, not a regression, type.
You can get the score with 2-class SVM, and if you pass RAW_OUTPUT to predict:
// svm.cpp, SVMImpl::predict(...) , line 1917
bool returnDFVal = (flags & RAW_OUTPUT) != 0;
// svm.cpp, PredictBody::operator(), line 1896,
float result = returnDFVal && class_count == 2 ?
(float)sum : (float)(svm->class_labels.at<int>(k));
Then you need to train 4 different 2 class SVM, one against rest.
These are the result I get on these samples:
INTER with trainAuto
CHI2 with trainAuto
RBF with train (C = 0.1, gamma = 0.001) (trainAuto overfits in this case)
Here is the code. You can enable trainAuto with AUTO_TRAIN_ENABLED boolean variable, and you can set the KERNEL as well as images dimensions, etc.
#include <opencv2/opencv.hpp>
#include <vector>
#include <algorithm>
using namespace std;
using namespace cv;
using namespace cv::ml;
int main()
{
const int WIDTH = 512;
const int HEIGHT = 512;
const int N_SAMPLES_PER_CLASS = 10;
const float NON_LINEAR_SAMPLES_RATIO = 0.1;
const int KERNEL = SVM::CHI2;
const bool AUTO_TRAIN_ENABLED = false;
int N_NON_LINEAR_SAMPLES = N_SAMPLES_PER_CLASS * NON_LINEAR_SAMPLES_RATIO;
int N_LINEAR_SAMPLES = N_SAMPLES_PER_CLASS - N_NON_LINEAR_SAMPLES;
vector<Scalar> colors{Scalar(255,0,0), Scalar(0,255,0), Scalar(0,0,255), Scalar(0,255,255)};
vector<Vec3b> colorsv{ Vec3b(255, 0, 0), Vec3b(0, 255, 0), Vec3b(0, 0, 255), Vec3b(0, 255, 255) };
vector<Vec3b> colorsv_shaded{ Vec3b(200, 0, 0), Vec3b(0, 200, 0), Vec3b(0, 0, 200), Vec3b(0, 200, 200) };
Mat1f data(4 * N_SAMPLES_PER_CLASS, 2);
Mat1i labels(4 * N_SAMPLES_PER_CLASS, 1);
RNG rng(0);
////////////////////////
// Set training data
////////////////////////
// Class 1
Mat1f class1 = data.rowRange(0, 0.5 * N_LINEAR_SAMPLES);
Mat1f x1 = class1.colRange(0, 1);
Mat1f y1 = class1.colRange(1, 2);
rng.fill(x1, RNG::UNIFORM, Scalar(1), Scalar(WIDTH));
rng.fill(y1, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT / 8));
class1 = data.rowRange(0.5 * N_LINEAR_SAMPLES, 1 * N_LINEAR_SAMPLES);
x1 = class1.colRange(0, 1);
y1 = class1.colRange(1, 2);
rng.fill(x1, RNG::UNIFORM, Scalar(1), Scalar(WIDTH));
rng.fill(y1, RNG::UNIFORM, Scalar(7*HEIGHT / 8), Scalar(HEIGHT));
class1 = data.rowRange(N_LINEAR_SAMPLES, 1 * N_SAMPLES_PER_CLASS);
x1 = class1.colRange(0, 1);
y1 = class1.colRange(1, 2);
rng.fill(x1, RNG::UNIFORM, Scalar(1), Scalar(WIDTH));
rng.fill(y1, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT));
// Class 2
Mat1f class2 = data.rowRange(N_SAMPLES_PER_CLASS, N_SAMPLES_PER_CLASS + N_LINEAR_SAMPLES);
Mat1f x2 = class2.colRange(0, 1);
Mat1f y2 = class2.colRange(1, 2);
rng.fill(x2, RNG::NORMAL, Scalar(3 * WIDTH / 4), Scalar(WIDTH/16));
rng.fill(y2, RNG::NORMAL, Scalar(HEIGHT / 2), Scalar(HEIGHT/4));
class2 = data.rowRange(N_SAMPLES_PER_CLASS + N_LINEAR_SAMPLES, 2 * N_SAMPLES_PER_CLASS);
x2 = class2.colRange(0, 1);
y2 = class2.colRange(1, 2);
rng.fill(x2, RNG::UNIFORM, Scalar(1), Scalar(WIDTH));
rng.fill(y2, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT));
// Class 3
Mat1f class3 = data.rowRange(2 * N_SAMPLES_PER_CLASS, 2 * N_SAMPLES_PER_CLASS + N_LINEAR_SAMPLES);
Mat1f x3 = class3.colRange(0, 1);
Mat1f y3 = class3.colRange(1, 2);
rng.fill(x3, RNG::NORMAL, Scalar(WIDTH / 4), Scalar(WIDTH/8));
rng.fill(y3, RNG::NORMAL, Scalar(HEIGHT / 2), Scalar(HEIGHT/8));
class3 = data.rowRange(2*N_SAMPLES_PER_CLASS + N_LINEAR_SAMPLES, 3 * N_SAMPLES_PER_CLASS);
x3 = class3.colRange(0, 1);
y3 = class3.colRange(1, 2);
rng.fill(x3, RNG::UNIFORM, Scalar(1), Scalar(WIDTH));
rng.fill(y3, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT));
// Class 4
Mat1f class4 = data.rowRange(3 * N_SAMPLES_PER_CLASS, 3 * N_SAMPLES_PER_CLASS + 0.5 * N_LINEAR_SAMPLES);
Mat1f x4 = class4.colRange(0, 1);
Mat1f y4 = class4.colRange(1, 2);
rng.fill(x4, RNG::NORMAL, Scalar(WIDTH / 2), Scalar(WIDTH / 16));
rng.fill(y4, RNG::NORMAL, Scalar(HEIGHT / 4), Scalar(HEIGHT / 16));
class4 = data.rowRange(3 * N_SAMPLES_PER_CLASS + 0.5 * N_LINEAR_SAMPLES, 3 * N_SAMPLES_PER_CLASS + N_LINEAR_SAMPLES);
x4 = class4.colRange(0, 1);
y4 = class4.colRange(1, 2);
rng.fill(x4, RNG::NORMAL, Scalar(WIDTH / 2), Scalar(WIDTH / 16));
rng.fill(y4, RNG::NORMAL, Scalar(3 * HEIGHT / 4), Scalar(HEIGHT / 16));
class4 = data.rowRange(3 * N_SAMPLES_PER_CLASS + N_LINEAR_SAMPLES, 4 * N_SAMPLES_PER_CLASS);
x4 = class4.colRange(0, 1);
y4 = class4.colRange(1, 2);
rng.fill(x4, RNG::UNIFORM, Scalar(1), Scalar(WIDTH));
rng.fill(y4, RNG::UNIFORM, Scalar(1), Scalar(HEIGHT));
// Labels
labels.rowRange(0*N_SAMPLES_PER_CLASS, 1*N_SAMPLES_PER_CLASS).setTo(1);
labels.rowRange(1*N_SAMPLES_PER_CLASS, 2*N_SAMPLES_PER_CLASS).setTo(2);
labels.rowRange(2*N_SAMPLES_PER_CLASS, 3*N_SAMPLES_PER_CLASS).setTo(3);
labels.rowRange(3*N_SAMPLES_PER_CLASS, 4*N_SAMPLES_PER_CLASS).setTo(4);
// Draw training data
Mat3b samples(HEIGHT, WIDTH, Vec3b(0,0,0));
for (int i = 0; i < labels.rows; ++i)
{
circle(samples, Point(data(i, 0), data(i, 1)), 3, colors[labels(i,0) - 1], CV_FILLED);
}
//////////////////////////
// SVM
//////////////////////////
// SVM label 1
Ptr<SVM> svm1 = SVM::create();
svm1->setType(SVM::C_SVC);
svm1->setKernel(KERNEL);
Mat1i labels1 = (labels != 1) / 255;
if (AUTO_TRAIN_ENABLED)
{
Ptr<TrainData> td1 = TrainData::create(data, ROW_SAMPLE, labels1);
svm1->trainAuto(td1);
}
else
{
svm1->setC(0.1);
svm1->setGamma(0.001);
svm1->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, (int)1e7, 1e-6));
svm1->train(data, ROW_SAMPLE, labels1);
}
// SVM label 2
Ptr<SVM> svm2 = SVM::create();
svm2->setType(SVM::C_SVC);
svm2->setKernel(KERNEL);
Mat1i labels2 = (labels != 2) / 255;
if (AUTO_TRAIN_ENABLED)
{
Ptr<TrainData> td2 = TrainData::create(data, ROW_SAMPLE, labels2);
svm2->trainAuto(td2);
}
else
{
svm2->setC(0.1);
svm2->setGamma(0.001);
svm2->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, (int)1e7, 1e-6));
svm2->train(data, ROW_SAMPLE, labels2);
}
// SVM label 3
Ptr<SVM> svm3 = SVM::create();
svm3->setType(SVM::C_SVC);
svm3->setKernel(KERNEL);
Mat1i labels3 = (labels != 3) / 255;
if (AUTO_TRAIN_ENABLED)
{
Ptr<TrainData> td3 = TrainData::create(data, ROW_SAMPLE, labels3);
svm3->trainAuto(td3);
}
else
{
svm3->setC(0.1);
svm3->setGamma(0.001);
svm3->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, (int)1e7, 1e-6));
svm3->train(data, ROW_SAMPLE, labels3);
}
// SVM label 4
Ptr<SVM> svm4 = SVM::create();
svm4->setType(SVM::C_SVC);
svm4->setKernel(KERNEL);
Mat1i labels4 = (labels != 4) / 255;
if (AUTO_TRAIN_ENABLED)
{
Ptr<TrainData> td4 = TrainData::create(data, ROW_SAMPLE, labels4);
svm4->trainAuto(td4);
}
else
{
svm4->setC(0.1);
svm4->setGamma(0.001);
svm4->setTermCriteria(TermCriteria(TermCriteria::MAX_ITER, (int)1e7, 1e-6));
svm4->train(data, ROW_SAMPLE, labels4);
}
//////////////////////////
// Show regions
//////////////////////////
Mat3b regions(HEIGHT, WIDTH);
Mat1f R(HEIGHT, WIDTH);
Mat1f R1(HEIGHT, WIDTH);
Mat1f R2(HEIGHT, WIDTH);
Mat1f R3(HEIGHT, WIDTH);
Mat1f R4(HEIGHT, WIDTH);
for (int r = 0; r < HEIGHT; ++r)
{
for (int c = 0; c < WIDTH; ++c)
{
Mat1f sample = (Mat1f(1,2) << c, r);
vector<float> responses(4);
responses[0] = svm1->predict(sample, noArray(), StatModel::RAW_OUTPUT);
responses[1] = svm2->predict(sample, noArray(), StatModel::RAW_OUTPUT);
responses[2] = svm3->predict(sample, noArray(), StatModel::RAW_OUTPUT);
responses[3] = svm4->predict(sample, noArray(), StatModel::RAW_OUTPUT);
int best_class = distance(responses.begin(), max_element(responses.begin(), responses.end()));
float best_response = responses[best_class];
// View responses for each SVM, and the best responses
R(r,c) = best_response;
R1(r, c) = responses[0];
R2(r, c) = responses[1];
R3(r, c) = responses[2];
R4(r, c) = responses[3];
if (best_response >= 0) {
regions(r, c) = colorsv[best_class];
}
else {
regions(r, c) = colorsv_shaded[best_class];
}
}
}
imwrite("svm_samples.png", samples);
imwrite("svm_x.png", regions);
imshow("Samples", samples);
imshow("Regions", regions);
waitKey();
return 0;
}
Related
Here in this code i am detecting a sheet of paper
Steps that i used
1.Apply houghLine transform
2.Detect corner
3.Applied perspective transform.
Please can anybody tell me why the codes is not working on all the images, it only works for one or two images.
it works on this image
but when i used some other image instead of this then i am getting error
#include <cv.h>
#include <highgui.h>
using namespace std;
using namespace cv;
Point2f center(0,0);
Point2f computeIntersect(Vec4i a, Vec4i b)
{
int x1 = a[0], y1 = a[1], x2 = a[2], y2 = a[3], x3 = b[0], y3 = b[1], x4 = b[2], y4 = b[3];
float denom;
if (float d = ((float)(x1 - x2) * (y3 - y4)) - ((y1 - y2) * (x3 - x4)))
{
Point2f pt;
pt.x = ((x1 * y2 - y1 * x2) * (x3 - x4) - (x1 - x2) * (x3 * y4 - y3 * x4)) / d;
pt.y = ((x1 * y2 - y1 * x2) * (y3 - y4) - (y1 - y2) * (x3 * y4 - y3 * x4)) / d;
return pt;
}
else
return Point2f(-1, -1);
}
void sortCorners(vector<Point2f>& corners, Point2f center)
{
vector<Point2f> top, bot;
for (int i = 0; i < corners.size(); i++)
{
if (corners[i].y < center.y)
top.push_back(corners[i]);
else
bot.push_back(corners[i]);
}
corners.clear();
if (top.size() == 2 && bot.size() == 2){
Point2f tl = top[0].x > top[1].x ? top[1] : top[0];
Point2f tr = top[0].x > top[1].x ? top[0] : top[1];
Point2f bl = bot[0].x > bot[1].x ? bot[1] : bot[0];
Point2f br = bot[0].x > bot[1].x ? bot[0] : bot[1];
corners.push_back(tl);
corners.push_back(tr);
corners.push_back(br);
corners.push_back(bl);
}
}
int main()
{
Mat src,cann,hsv;
src = imread("C:\\im.jpg",WINDOW_AUTOSIZE);
if (src.empty())
return -1;
imshow("original",src);
blur(src, src, Size(3, 3));
Canny(src, cann, 50, 200, 3);
cvtColor(cann, hsv, CV_GRAY2BGR);
vector<Vec4i> lines;
HoughLinesP(cann, lines, 1, CV_PI/180, 70, 30, 10);
for( size_t i = 0; i < lines.size(); i++ )
{
Vec4i l = lines[i];
line( hsv, Point(l[0], l[1]), Point(l[2], l[3]), Scalar(0,0,255), 2, CV_AA);
}
// Expand the lines
for (int i = 0; i < lines.size(); i++)
{
Vec4i v = lines[i];
lines[i][0] = 0;
lines[i][1] = ((float)v[1] - v[3]) / (v[0] - v[2]) * -v[0] + v[1];
lines[i][2] = src.cols;
lines[i][3] = ((float)v[1] - v[3]) / (v[0] - v[2]) * (src.cols - v[2]) + v[3];
}
vector<Point2f> corners;
for (int i = 0; i < lines.size(); i++)
{
for (int j = i+1; j < lines.size(); j++)
{
Point2f pt = computeIntersect(lines[i], lines[j]);
if (pt.x >= 0 && pt.y >= 0)
corners.push_back(pt);
}
}
vector<Point2f> approx;
approxPolyDP(Mat(corners), approx, arcLength(Mat(corners), true) * 0.02, true);
//if (approx.size() != 4)
// {
// cout << "The object is not quadrilateral!" << endl;
//return -1;
//}
// Get mass center
for (int i = 0; i < corners.size(); i++)
center += corners[i];
center *= (1. / corners.size());
sortCorners(corners, center);
if (corners.size() == 0)
{
cout << "The corners were not sorted correctly!" << endl;
return -1;
}
Mat dst = src.clone();
// Draw lines
for (int i = 0; i < lines.size(); i++)
{
Vec4i v = lines[i];
line(dst, Point(v[0], v[1]), Point(v[2], v[3]), CV_RGB(0,255,0));
}
// Draw corner points
circle(dst, corners[0], 3, CV_RGB(255,0,0), 2);
circle(dst, corners[1], 3, CV_RGB(0,255,0), 2);
circle(dst, corners[2], 3, CV_RGB(0,0,255), 2);
circle(dst, corners[3], 3, CV_RGB(255,255,255), 2);
// Draw mass center
circle(dst, center, 3, CV_RGB(255,255,0), 2);
Mat quad = Mat::zeros(300, 220, CV_8UC3);
vector<Point2f> quad_pts;
quad_pts.push_back(Point2f(0, 0));
quad_pts.push_back(Point2f(quad.cols, 0));
quad_pts.push_back(Point2f(quad.cols, quad.rows));
quad_pts.push_back(Point2f(0, quad.rows));
Mat transmtx = getPerspectiveTransform(corners, quad_pts);
warpPerspective(src, quad, transmtx, quad.size());
imshow("blurr",src);
imshow("canney",cann);
imshow("hough",hsv);
imshow("image", dst);
imshow("quadrilateral", quad);
waitKey(0);
return 0;
}
few days ago I've just started learning RenderScript. I managed to create some simple image processing filters e.g. grayscale, color change.
Now I'm working on Canny edge filters with no success.
Question: Why ImageView displays black image and how to solve it?
I'am using implementation of Canny egde filter made by arekolek github
optional: Can I compute it faster?
I ended with all code wrote in on method "runEdgeFilter(...)" which runs when i clicked image on my device, to make sure I'am not messing with imageView in other place. Code that i use so far.
import android.content.Context;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.support.v8.renderscript.*;
import android.support.v7.app.AppCompatActivity;
import android.os.Bundle;
import android.view.View;
import android.widget.ImageView;
public class MainActivity extends AppCompatActivity {
private static final float THRESHOLD_MULT_LOW = 0.66f * 0.00390625f;
private static final float THRESHOLD_MULT_HIGH = 1.33f * 0.00390625f;
private ImageView imageView;
private Bitmap img;
private boolean setThresholds = true;
#Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
imageView = (ImageView) findViewById(R.id.imageView);
img = BitmapFactory.decodeResource(getResources(), R.drawable.test_img_no_dpi2);
imageView.setImageBitmap(img);
}
public void imageClicked(View view) {
runEdgeFilter(img, this);
}
private void runEdgeFilter(Bitmap image, Context context) {
int width = image.getWidth();
int height = image.getHeight();
RenderScript rs = RenderScript.create(context);
Allocation allocationIn = Allocation.createFromBitmap(rs, image);
Type.Builder tb;
tb = new Type.Builder(rs, Element.F32(rs)).setX(width).setY(height);
Allocation allocationBlurred = Allocation.createTyped(rs, tb.create());
Allocation allocationMagnitude = Allocation.createTyped(rs, tb.create());
tb = new Type.Builder(rs, Element.I32(rs)).setX(width).setY(height);
Allocation allocationDirection = Allocation.createTyped(rs, tb.create());
Allocation allocationEdge = Allocation.createTyped(rs, tb.create());
tb = new Type.Builder(rs, Element.I32(rs)).setX(256);
Allocation allocationHistogram = Allocation.createTyped(rs, tb.create());
tb = new Type.Builder(rs, Element.RGBA_8888(rs)).setX(width).setY(height);
Allocation allocationOut = Allocation.createTyped(rs, tb.create());
ScriptC_edge edgeFilter = new ScriptC_edge(rs);
ScriptIntrinsicHistogram histogram = ScriptIntrinsicHistogram.create(rs, Element.U8(rs));
histogram.setOutput(allocationHistogram);
edgeFilter.invoke_set_histogram(allocationHistogram);
edgeFilter.invoke_set_blur_input(allocationIn);
edgeFilter.invoke_set_compute_gradient_input(allocationBlurred);
edgeFilter.invoke_set_suppress_input(allocationMagnitude, allocationDirection);
edgeFilter.invoke_set_hysteresis_input(allocationEdge);
edgeFilter.invoke_set_thresholds(0.2f, 0.6f);
histogram.forEach_Dot(allocationIn);
int[] histogramOutput = new int[256];
allocationHistogram.copyTo(histogramOutput);
if(setThresholds) {
int median = width * height / 2;
for (int i = 0; i < 256; ++i) {
median -= histogramOutput[i];
if (median < 1) {
edgeFilter.invoke_set_thresholds(i * THRESHOLD_MULT_LOW, i * THRESHOLD_MULT_HIGH);
break;
}
}
}
edgeFilter.forEach_blur(allocationBlurred);
edgeFilter.forEach_compute_gradient(allocationMagnitude);
edgeFilter.forEach_suppress(allocationEdge);
edgeFilter.forEach_hysteresis(allocationOut);
allocationOut.copyTo(image);
allocationIn.destroy();
allocationMagnitude.destroy();
allocationBlurred.destroy();
allocationDirection.destroy();
allocationEdge.destroy();
allocationHistogram.destroy();
allocationOut.destroy();
histogram.destroy();
edgeFilter.destroy();
rs.destroy();
imageView.setImageBitmap(image);
}
}
renderscript edge.rs:
#pragma version(1)
#pragma rs java_package_name(com.lukasz.edgeexamplers)
#pragma rs_fp_relaxed
#include "rs_debug.rsh"
static rs_allocation raw, magnitude, blurred, direction, candidates;
static float low, high;
static const uint32_t zero = 0;
void set_blur_input(rs_allocation u8_buf) {
raw = u8_buf;
}
void set_compute_gradient_input(rs_allocation f_buf) {
blurred = f_buf;
}
void set_suppress_input(rs_allocation f_buf, rs_allocation i_buf) {
magnitude = f_buf;
direction = i_buf;
}
void set_hysteresis_input(rs_allocation i_buf) {
candidates = i_buf;
}
void set_thresholds(float l, float h) {
low = l;
high = h;
}
inline static float getElementAt_uchar_to_float(rs_allocation a, uint32_t x,
uint32_t y) {
return rsGetElementAt_uchar(a, x, y) / 255.0f;
}
static rs_allocation histogram;
void set_histogram(rs_allocation h) {
histogram = h;
}
uchar4 __attribute__((kernel)) addhisto(uchar in, uint32_t x, uint32_t y) {
int px = (x - 100) / 2;
if (px > -1 && px < 256) {
int v = log((float) rsGetElementAt_int(histogram, (uint32_t) px)) * 30;
int py = (400 - y);
if (py > -1 && v > py) {
in = 255;
}
if (py == -1) {
in = 255;
}
}
uchar4 out = { in, in, in, 255 };
return out;
}
uchar4 __attribute__((kernel)) copy(uchar in) {
uchar4 out = { in, in, in, 255 };
return out;
}
uchar4 __attribute__((kernel)) blend(uchar4 in, uint32_t x, uint32_t y) {
uchar r = rsGetElementAt_uchar(raw, x, y);
uchar4 out = { r, r, r, 255 };
return max(out, in);
}
float __attribute__((kernel)) blur(uint32_t x, uint32_t y) {
float pixel = 0;
pixel += 2 * getElementAt_uchar_to_float(raw, x - 2, y - 2);
pixel += 4 * getElementAt_uchar_to_float(raw, x - 1, y - 2);
pixel += 5 * getElementAt_uchar_to_float(raw, x, y - 2);
pixel += 4 * getElementAt_uchar_to_float(raw, x + 1, y - 2);
pixel += 2 * getElementAt_uchar_to_float(raw, x + 2, y - 2);
pixel += 4 * getElementAt_uchar_to_float(raw, x - 2, y - 1);
pixel += 9 * getElementAt_uchar_to_float(raw, x - 1, y - 1);
pixel += 12 * getElementAt_uchar_to_float(raw, x, y - 1);
pixel += 9 * getElementAt_uchar_to_float(raw, x + 1, y - 1);
pixel += 4 * getElementAt_uchar_to_float(raw, x + 2, y - 1);
pixel += 5 * getElementAt_uchar_to_float(raw, x - 2, y);
pixel += 12 * getElementAt_uchar_to_float(raw, x - 1, y);
pixel += 15 * getElementAt_uchar_to_float(raw, x, y);
pixel += 12 * getElementAt_uchar_to_float(raw, x + 1, y);
pixel += 5 * getElementAt_uchar_to_float(raw, x + 2, y);
pixel += 4 * getElementAt_uchar_to_float(raw, x - 2, y + 1);
pixel += 9 * getElementAt_uchar_to_float(raw, x - 1, y + 1);
pixel += 12 * getElementAt_uchar_to_float(raw, x, y + 1);
pixel += 9 * getElementAt_uchar_to_float(raw, x + 1, y + 1);
pixel += 4 * getElementAt_uchar_to_float(raw, x + 2, y + 1);
pixel += 2 * getElementAt_uchar_to_float(raw, x - 2, y + 2);
pixel += 4 * getElementAt_uchar_to_float(raw, x - 1, y + 2);
pixel += 5 * getElementAt_uchar_to_float(raw, x, y + 2);
pixel += 4 * getElementAt_uchar_to_float(raw, x + 1, y + 2);
pixel += 2 * getElementAt_uchar_to_float(raw, x + 2, y + 2);
pixel /= 159;
return pixel;
}
float __attribute__((kernel)) compute_gradient(uint32_t x, uint32_t y) {
float gx = 0;
gx -= rsGetElementAt_float(blurred, x - 1, y - 1);
gx -= rsGetElementAt_float(blurred, x - 1, y) * 2;
gx -= rsGetElementAt_float(blurred, x - 1, y + 1);
gx += rsGetElementAt_float(blurred, x + 1, y - 1);
gx += rsGetElementAt_float(blurred, x + 1, y) * 2;
gx += rsGetElementAt_float(blurred, x + 1, y + 1);
float gy = 0;
gy += rsGetElementAt_float(blurred, x - 1, y - 1);
gy += rsGetElementAt_float(blurred, x, y - 1) * 2;
gy += rsGetElementAt_float(blurred, x + 1, y - 1);
gy -= rsGetElementAt_float(blurred, x - 1, y + 1);
gy -= rsGetElementAt_float(blurred, x, y + 1) * 2;
gy -= rsGetElementAt_float(blurred, x + 1, y + 1);
int d = ((int) round(atan2pi(gy, gx) * 4.0f) + 4) % 4;
rsSetElementAt_int(direction, d, x, y);
return hypot(gx, gy);
}
int __attribute__((kernel)) suppress(uint32_t x, uint32_t y) {
int d = rsGetElementAt_int(direction, x, y);
float g = rsGetElementAt_float(magnitude, x, y);
if (d == 0) {
// horizontal, check left and right
float a = rsGetElementAt_float(magnitude, x - 1, y);
float b = rsGetElementAt_float(magnitude, x + 1, y);
return a < g && b < g ? 1 : 0;
} else if (d == 2) {
// vertical, check above and below
float a = rsGetElementAt_float(magnitude, x, y - 1);
float b = rsGetElementAt_float(magnitude, x, y + 1);
return a < g && b < g ? 1 : 0;
} else if (d == 1) {
// NW-SE
float a = rsGetElementAt_float(magnitude, x - 1, y - 1);
float b = rsGetElementAt_float(magnitude, x + 1, y + 1);
return a < g && b < g ? 1 : 0;
} else {
// NE-SW
float a = rsGetElementAt_float(magnitude, x + 1, y - 1);
float b = rsGetElementAt_float(magnitude, x - 1, y + 1);
return a < g && b < g ? 1 : 0;
}
}
static const int NON_EDGE = 0b000;
static const int LOW_EDGE = 0b001;
static const int MED_EDGE = 0b010;
static const int HIG_EDGE = 0b100;
inline static int getEdgeType(uint32_t x, uint32_t y) {
int e = rsGetElementAt_int(candidates, x, y);
float g = rsGetElementAt_float(magnitude, x, y);
if (e == 1) {
if (g < low)
return LOW_EDGE;
if (g > high)
return HIG_EDGE;
return MED_EDGE;
}
return NON_EDGE;
}
uchar4 __attribute__((kernel)) hysteresis(uint32_t x, uint32_t y) {
uchar4 white = { 255, 255, 255, 255 };
uchar4 red = { 255, 0, 0, 255 };
uchar4 black = { 0, 0, 0, 255 };
int type = getEdgeType(x, y);
if (type) {
if (type & LOW_EDGE) {
return black;
}
if (type & HIG_EDGE) {
//rsDebug("wh : x=", x);
//rsDebug("wh : y=", y);
return white;
}
// it's medium, check nearest neighbours
type = getEdgeType(x - 1, y - 1);
type |= getEdgeType(x, y - 1);
type |= getEdgeType(x + 1, y - 1);
type |= getEdgeType(x - 1, y);
type |= getEdgeType(x + 1, y);
type |= getEdgeType(x - 1, y + 1);
type |= getEdgeType(x, y + 1);
type |= getEdgeType(x + 1, y + 1);
if (type & HIG_EDGE) {
//rsDebug("wh : x=", x);
//rsDebug("wh : y=", y);
return white;
}
if (type & MED_EDGE) {
// check further
type = getEdgeType(x - 2, y - 2);
type |= getEdgeType(x - 1, y - 2);
type |= getEdgeType(x, y - 2);
type |= getEdgeType(x + 1, y - 2);
type |= getEdgeType(x + 2, y - 2);
type |= getEdgeType(x - 2, y - 1);
type |= getEdgeType(x + 2, y - 1);
type |= getEdgeType(x - 2, y);
type |= getEdgeType(x + 2, y);
type |= getEdgeType(x - 2, y + 1);
type |= getEdgeType(x + 2, y + 1);
type |= getEdgeType(x - 2, y + 2);
type |= getEdgeType(x - 1, y + 2);
type |= getEdgeType(x, y + 2);
type |= getEdgeType(x + 1, y + 2);
type |= getEdgeType(x + 2, y + 2);
if (type & HIG_EDGE) {
//rsDebug("wh : x=", x);
//rsDebug("wh : y=", y);
return white;
}
}
}
return black;
}
After some debugging I found that:
uchar4 __attribute__((kernel)) hysteresis(uint32_t x, uint32_t y) {...}
returns white and black pixels so renderscript works properly I think.
Output is the same type as my previous renderscript filters (uchar4) which I assign to Bitmap with success.
I have no idea what I've done wrong.
Also my logcat prints:
V/RenderScript_jni: RS compat mode
V/RenderScript_jni: Unable to load libRSSupportIO.so, USAGE_IO not supported
V/RenderScript_jni: Unable to load BLAS lib, ONLY BNNM will be supported: java.lang.UnsatisfiedLinkError: Couldn't load blasV8 from loader dalvik.system.PathClassLoader[dexPath=/data/app/com.lukasz.edgeexamplers-20.apk,libraryPath=/data/app-lib/com.lukasz.edgeexamplers-20]: findLibrary returned null
E/RenderScript: Couldn't load libRSSupportIO.so
in every program which use renderscript, but other programs works even with this warnings.
Update #1
As #Stephen Hines mention, there was issue with reading out of bounds. I think I fixed it for now (without messing with renderscript) by changing those lines:
edgeFilter.forEach_blur(allocationBlurred);
edgeFilter.forEach_compute_gradient(allocationMagnitude);
edgeFilter.forEach_suppress(allocationEdge);
edgeFilter.forEach_hysteresis(allocationOut);
into:
Script.LaunchOptions sLaunchOpt = new Script.LaunchOptions();
sLaunchOpt.setX(2, width - 3);
sLaunchOpt.setY(2, height - 3);
edgeFilter.forEach_blur(allocationBlurred, sLaunchOpt);
edgeFilter.forEach_compute_gradient(allocationMagnitude, sLaunchOpt);
edgeFilter.forEach_suppress(allocationEdge, sLaunchOpt);
edgeFilter.forEach_hysteresis(allocationOut, sLaunchOpt);
But my problem is still not solved. Output is black as earlier.
Function rotates the template image from 0 to 180 (or upto 360) degrees to search all related matches(in all angles) in source image even with different scale.
The function had been written in OpenCV C interface. When I tried to port it to openCV C++ interface , I am getting lot of errors. Some one please help me to port it to OpenCV C++ interface.
void TemplateMatch()
{
int i, j, x, y, key;
double minVal;
char windowNameSource[] = "Original Image";
char windowNameDestination[] = "Result Image";
char windowNameCoefficientOfCorrelation[] = "Coefficient of Correlation Image";
CvPoint minLoc;
CvPoint tempLoc;
IplImage *sourceImage = cvLoadImage("template_source.jpg", CV_LOAD_IMAGE_ANYDEPTH | CV_LOAD_IMAGE_ANYCOLOR);
IplImage *templateImage = cvLoadImage("template.jpg", CV_LOAD_IMAGE_ANYDEPTH | CV_LOAD_IMAGE_ANYCOLOR);
IplImage *graySourceImage = cvCreateImage(cvGetSize(sourceImage), IPL_DEPTH_8U, 1);
IplImage *grayTemplateImage =cvCreateImage(cvGetSize(templateImage),IPL_DEPTH_8U,1);
IplImage *binarySourceImage = cvCreateImage(cvGetSize(sourceImage), IPL_DEPTH_8U, 1);
IplImage *binaryTemplateImage = cvCreateImage(cvGetSize(templateImage), IPL_DEPTH_8U, 1);
IplImage *destinationImage = cvCreateImage(cvGetSize(sourceImage), IPL_DEPTH_8U, 3);
cvCopy(sourceImage, destinationImage);
cvCvtColor(sourceImage, graySourceImage, CV_RGB2GRAY);
cvCvtColor(templateImage, grayTemplateImage, CV_RGB2GRAY);
cvThreshold(graySourceImage, binarySourceImage, 200, 255, CV_THRESH_OTSU );
cvThreshold(grayTemplateImage, binaryTemplateImage, 200, 255, CV_THRESH_OTSU);
int templateHeight = templateImage->height;
int templateWidth = templateImage->width;
float templateScale = 0.5f;
for(i = 2; i <= 3; i++)
{
int tempTemplateHeight = (int)(templateWidth * (i * templateScale));
int tempTemplateWidth = (int)(templateHeight * (i * templateScale));
IplImage *tempBinaryTemplateImage = cvCreateImage(cvSize(tempTemplateWidth, tempTemplateHeight), IPL_DEPTH_8U, 1);
// W - w + 1, H - h + 1
IplImage *result = cvCreateImage(cvSize(sourceImage->width - tempTemplateWidth + 1, sourceImage->height - tempTemplateHeight + 1), IPL_DEPTH_32F, 1);
cvResize(binaryTemplateImage, tempBinaryTemplateImage, CV_INTER_LINEAR);
float degree = 20.0f;
for(j = 0; j <= 9; j++)
{
IplImage *rotateBinaryTemplateImage = cvCreateImage(cvSize(tempBinaryTemplateImage- >width, tempBinaryTemplateImage->height), IPL_DEPTH_8U, 1);
//cvShowImage(windowNameSource, tempBinaryTemplateImage);
//cvWaitKey(0);
for(y = 0; y < tempTemplateHeight; y++)
{
for(x = 0; x < tempTemplateWidth; x++)
{
rotateBinaryTemplateImage->imageData[y * tempTemplateWidth + x] = 255;
}
}
for(y = 0; y < tempTemplateHeight; y++)
{
for(x = 0; x < tempTemplateWidth; x++)
{
float radian = (float)j * degree * CV_PI / 180.0f;
int scale = y * tempTemplateWidth + x;
int rotateY = - sin(radian) * ((float)x - (float)tempTemplateWidth / 2.0f) + cos(radian) * ((float)y - (float)tempTemplateHeight / 2.0f) + tempTemplateHeight / 2;
int rotateX = cos(radian) * ((float)x - (float)tempTemplateWidth / 2.0f) + sin(radian) * ((float)y - (float)tempTemplateHeight / 2.0f) + tempTemplateWidth / 2;
if(rotateY < tempTemplateHeight && rotateX < tempTemplateWidth && rotateY >= 0 && rotateX >= 0)
rotateBinaryTemplateImage->imageData[scale] = tempBinaryTemplateImage->imageData[rotateY * tempTemplateWidth + rotateX];
}
}
//cvShowImage(windowNameSource, rotateBinaryTemplateImage);
//cvWaitKey(0);
cvMatchTemplate(binarySourceImage, rotateBinaryTemplateImage, result, CV_TM_SQDIFF_NORMED);
//cvMatchTemplate(binarySourceImage, rotateBinaryTemplateImage, result, CV_TM_SQDIFF);
cvMinMaxLoc(result, &minVal, NULL, &minLoc, NULL, NULL);
printf(": %f%%\n", (int)(i * 0.5 * 100), j * 20, (1 - minVal) * 100);
if(minVal < 0.065) // 1 - 0.065 = 0.935 : 93.5%
{
tempLoc.x = minLoc.x + tempTemplateWidth;
tempLoc.y = minLoc.y + tempTemplateHeight;
cvRectangle(destinationImage, minLoc, tempLoc, CV_RGB(0, 255, 0), 1, 8, 0);
}
}
//cvShowImage(windowNameSource, result);
//cvWaitKey(0);
cvReleaseImage(&tempBinaryTemplateImage);
cvReleaseImage(&result);
}
// cvShowImage(windowNameSource, sourceImage);
// cvShowImage(windowNameCoefficientOfCorrelation, result);
cvShowImage(windowNameDestination, destinationImage);
key = cvWaitKey(0);
cvReleaseImage(&sourceImage);
cvReleaseImage(&templateImage);
cvReleaseImage(&graySourceImage);
cvReleaseImage(&grayTemplateImage);
cvReleaseImage(&binarySourceImage);
cvReleaseImage(&binaryTemplateImage);
cvReleaseImage(&destinationImage);
cvDestroyWindow(windowNameSource);
cvDestroyWindow(windowNameDestination);
cvDestroyWindow(windowNameCoefficientOfCorrelation);
}
RESULT :
Template Image:
Result image:
The function above puts rectangles around the perfect matches (angle and scale invariant) in this image .....
Now, I have been trying to port the code into C++ interface. If anyone needs more details please let me know.
C++ Port of above code:
Mat TemplateMatch(Mat sourceImage, Mat templateImage){
double minVal;
Point minLoc;
Point tempLoc;
Mat graySourceImage = Mat(sourceImage.size(),CV_8UC1);
Mat grayTemplateImage = Mat(templateImage.size(),CV_8UC1);
Mat binarySourceImage = Mat(sourceImage.size(),CV_8UC1);
Mat binaryTemplateImage = Mat(templateImage.size(),CV_8UC1);
Mat destinationImage = Mat(sourceImage.size(),CV_8UC3);
sourceImage.copyTo(destinationImage);
cvtColor(sourceImage, graySourceImage, CV_BGR2GRAY);
cvtColor(templateImage, grayTemplateImage, CV_BGR2GRAY);
threshold(graySourceImage, binarySourceImage, 200, 255, CV_THRESH_OTSU );
threshold(grayTemplateImage, binaryTemplateImage, 200, 255, CV_THRESH_OTSU);
int templateHeight = templateImage.rows;
int templateWidth = templateImage.cols;
float templateScale = 0.5f;
for(int i = 2; i <= 3; i++){
int tempTemplateHeight = (int)(templateWidth * (i * templateScale));
int tempTemplateWidth = (int)(templateHeight * (i * templateScale));
Mat tempBinaryTemplateImage = Mat(Size(tempTemplateWidth,tempTemplateHeight),CV_8UC1);
Mat result = Mat(Size(sourceImage.cols - tempTemplateWidth + 1,sourceImage.rows - tempTemplateHeight + 1),CV_32FC1);
resize(binaryTemplateImage,tempBinaryTemplateImage,Size(tempBinaryTemplateImage.cols,tempBinaryTemplateImage.rows),0,0,INTER_LINEAR);
float degree = 20.0f;
for(int j = 0; j <= 9; j++){
Mat rotateBinaryTemplateImage = Mat(Size(tempBinaryTemplateImage.cols, tempBinaryTemplateImage.rows), CV_8UC1);
for(int y = 0; y < tempTemplateHeight; y++){
for(int x = 0; x < tempTemplateWidth; x++){
rotateBinaryTemplateImage.data[y * tempTemplateWidth + x] = 255;
}
}
for(int y = 0; y < tempTemplateHeight; y++){
for(int x = 0; x < tempTemplateWidth; x++){
float radian = (float)j * degree * CV_PI / 180.0f;
int scale = y * tempTemplateWidth + x;
int rotateY = - sin(radian) * ((float)x - (float)tempTemplateWidth / 2.0f) + cos(radian) * ((float)y - (float)tempTemplateHeight / 2.0f) + tempTemplateHeight / 2;
int rotateX = cos(radian) * ((float)x - (float)tempTemplateWidth / 2.0f) + sin(radian) * ((float)y - (float)tempTemplateHeight / 2.0f) + tempTemplateWidth / 2;
if(rotateY < tempTemplateHeight && rotateX < tempTemplateWidth && rotateY >= 0 && rotateX >= 0)
rotateBinaryTemplateImage.data[scale] = tempBinaryTemplateImage.data[rotateY * tempTemplateWidth + rotateX];
}
}
matchTemplate(binarySourceImage, rotateBinaryTemplateImage, result, CV_TM_SQDIFF_NORMED);
minMaxLoc(result, &minVal, 0, &minLoc, 0, Mat());
cout<<(int)(i * 0.5 * 100)<<" , "<< j * 20<<" , "<< (1 - minVal) * 100<<endl;
if(minVal < 0.065){ // 1 - 0.065 = 0.935 : 93.5%
tempLoc.x = minLoc.x + tempTemplateWidth;
tempLoc.y = minLoc.y + tempTemplateHeight;
rectangle(destinationImage, minLoc, tempLoc, CV_RGB(0, 255, 0), 1, 8, 0);
}
}
}
return destinationImage;
}
I try to specify a different origin for the warpPerspective() function than the basic (0,0), in order to apply the transform independently of the support image size. I added a CvPoint parameter to the original code, but I can't find where to use these coordinates. I tried to use them in the computation of X0, Y0 and W0 but it didn't work, this only shift the transformed image in the resulting image. Any idea?
Here the code:
void warpPerspective( const Mat& src, Mat& dst, const Mat& M0, Size dsize,
int flags, int borderType, const Scalar& borderValue, CvPoint origin )
{
dst.create( dsize, src.type() );
const int BLOCK_SZ = 32;
short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
double M[9];
Mat _M(3, 3, CV_64F, M);
int interpolation = flags & INTER_MAX;
if( interpolation == INTER_AREA )
interpolation = INTER_LINEAR;
CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3 );
M0.convertTo(_M, _M.type());
if( !(flags & WARP_INVERSE_MAP) )
invert(_M, _M);
int x, y, x1, y1, width = dst.cols, height = dst.rows;
int bh0 = std::min(BLOCK_SZ/2, height);
int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width);
bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height);
for( y = 0; y < height; y += bh0 )
{
for( x = 0; x < width; x += bw0 )
{
int bw = std::min( bw0, width - x);
int bh = std::min( bh0, height - y);
Mat _XY(bh, bw, CV_16SC2, XY), _A;
Mat dpart(dst, Rect(x, y, bw, bh));
for( y1 = 0; y1 < bh; y1++ )
{
short* xy = XY + y1*bw*2;
double X0 = M[0]*x + M[1]*(y + y1) + M[2];
double Y0 = M[3]*x + M[4]*(y + y1) + M[5];
double W0 = M[6]*x + M[7]*(y + y1) + M[8];
if( interpolation == INTER_NEAREST )
for( x1 = 0; x1 < bw; x1++ )
{
double W = W0 + M[6]*x1;
W = W ? 1./W : 0;
int X = saturate_cast<int>((X0 + M[0]*x1)*W);
int Y = saturate_cast<int>((Y0 + M[3]*x1)*W);
xy[x1*2] = (short)X;
xy[x1*2+1] = (short)Y;
}
else
{
short* alpha = A + y1*bw;
for( x1 = 0; x1 < bw; x1++ )
{
double W = W0 + M[6]*x1;
W = W ? INTER_TAB_SIZE/W : 0;
int X = saturate_cast<int>((X0 + M[0]*x1)*W);
int Y = saturate_cast<int>((Y0 + M[3]*x1)*W);
xy[x1*2] = (short)(X >> INTER_BITS);
xy[x1*2+1] = (short)(Y >> INTER_BITS);
alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
(X & (INTER_TAB_SIZE-1)));
}
}
}
if( interpolation == INTER_NEAREST )
remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
else
{
Mat _A(bh, bw, CV_16U, A);
remap( src, dpart, _XY, _A, interpolation, borderType, borderValue );
}
}
}
}
Ok, I found it myself! You have 2 things to do:
compute the destination dimensions in source referential, and do the remap using these dimensions ;
increment the computed points coordinates.
Here is the code thus transformed:
void warpPerspective( const Mat& src, Mat& dst, const Mat& M0, Size dsize,
int flags, int borderType, const Scalar& borderValue, CvPoint origin )
{
dst.create( dsize, src.type() );
const int BLOCK_SZ = 32;
short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
double M[9];
Mat _M(3, 3, CV_64F, M);
int interpolation = flags & INTER_MAX;
if( interpolation == INTER_AREA )
interpolation = INTER_LINEAR;
CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3 );
M0.convertTo(_M, _M.type());
if( !(flags & WARP_INVERSE_MAP) )
invert(_M, _M);
int x, xDest, y, yDest, x1, y1, width = dst.cols, height = dst.rows;
int bh0 = std::min(BLOCK_SZ/2, height);
int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width);
bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height);
for( y = -origin.y, yDest = 0; y < height; y += bh0, yDest += bh0 )
{
for( x = -origin.x, xDest = 0; x < width; x += bw0, xDest += bw0 )
{
int bw = std::min( bw0, width - x);
int bh = std::min( bh0, height - y);
// to avoid dimensions errors
if (bw <= 0 || bh <= 0)
break;
Mat _XY(bh, bw, CV_16SC2, XY), _A;
Mat dpart(dst, Rect(xDest, yDest, bw, bh));
for( y1 = 0; y1 < bh; y1++ )
{
short* xy = XY + y1*bw*2;
double X0 = M[0]*x + M[1]*(y + y1) + M[2];
double Y0 = M[3]*x + M[4]*(y + y1) + M[5];
double W0 = M[6]*x + M[7]*(y + y1) + M[8];
if( interpolation == INTER_NEAREST )
for( x1 = 0; x1 < bw; x1++ )
{
double W = W0 + M[6]*x1;
W = W ? 1./W : 0;
int X = saturate_cast<int>((X0 + M[0]*x1)*W);
int Y = saturate_cast<int>((Y0 + M[3]*x1)*W);
xy[x1*2] = (short)X;
xy[x1*2+1] = (short)Y;
}
else
{
short* alpha = A + y1*bw;
for( x1 = 0; x1 < bw; x1++ )
{
double W = W0 + M[6]*x1;
W = W ? INTER_TAB_SIZE/W : 0;
int X = saturate_cast<int>((X0 + M[0]*x1)*W);
int Y = saturate_cast<int>((Y0 + M[3]*x1)*W);
xy[x1*2] = (short)(X >> INTER_BITS) + origin.x;
xy[x1*2+1] = (short)(Y >> INTER_BITS) + origin.y;
alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
(X & (INTER_TAB_SIZE-1)));
}
}
}
if( interpolation == INTER_NEAREST )
remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
else
{
Mat _A(bh, bw, CV_16U, A);
remap( src, dpart, _XY, _A, interpolation, borderType, borderValue );
}
}
}
}
with this function:
CvPoint transformPoint(const CvPoint pointToTransform, const CvMat* matrix) {
double coordinates[3] = {pointToTransform.x, pointToTransform.y, 1};
CvMat originVector = cvMat(3, 1, CV_64F, coordinates);
CvMat transformedVector = cvMat(3, 1, CV_64F, coordinates);
cvMatMul(matrix, &originVector, &transformedVector);
CvPoint outputPoint = cvPoint((int)(cvmGet(&transformedVector, 0, 0) / cvmGet(&transformedVector, 2, 0)), (int)(cvmGet(&transformedVector, 1, 0) / cvmGet(&transformedVector, 2, 0)));
return outputPoint;
}
A much simpler and cleaner solution is to modify the perspective transformation. You can do a translation which moves the origin to the desired position, then do the perspective transformation and finally do the inverse translation.
Here is a small example program in python, which rotates an image by 45 degrees around the point(100, 100):
import cv2
import numpy as np
def translation_mat(dx, dy):
return np.array([1, 0, dx, 0, 1, dy, 0, 0, 1]).reshape((3,3))
def main():
img = cv2.imread(r"pigeon.png", cv2.IMREAD_GRAYSCALE)
# a simple rotation by 45 degrees
rot = np.array([np.sin(np.pi/4), -np.cos(np.pi/4), 0, np.cos(np.pi/4), np.sin(np.pi/4), 0, 0, 0, 1]).reshape((3,3))
t1 = translation_mat(-100, -100)
t2 = translation_mat(100, 100)
rot_shifted = t2.dot(rot.dot(t1))
size = (img.shape[1], img.shape[0])
img1 = cv2.warpPerspective(img, rot, size)
img2 = cv2.warpPerspective(img, rot_shifted, size)
cv2.imshow("Original image", img)
cv2.imshow("Rotated around (0,0)", img1)
cv2.imshow("Rotated around(100, 100)", img2)
cv2.waitKey(0)
if __name__ == '__main__':
main()
Not that you read the order of transformations from right to left.
rot_shifted = t2.dot(rot.dot(t1))
will apply t1 first, then rot, and then t2.
For those of you looking for this piece in Python, here's a start. I'm not 100% sure it works as I've stripped some optimizations from it. Also there is an issue with lineair interpolation, I simply didn't use it but you might want to take a closer look if you do.
import cv2
import numpy as np
def warp_perspective(src, M, (width, height), (origin_x, origin_y),
flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT,
borderValue=0, dst=None):
"""
Implementation in Python using base code from
http://stackoverflow.com/questions/4279008/specify-an-origin-to-warpperspective-function-in-opencv-2-x
Note there is an issue with linear interpolation.
"""
B_SIZE = 32
if dst == None:
dst = np.zeros((height, width, 3), dtype=src.dtype)
# Set interpolation mode.
interpolation = flags & cv2.INTER_MAX
if interpolation == cv2.INTER_AREA:
raise Exception('Area interpolation is not supported!')
# Prepare matrix.
M = M.astype(np.float64)
if not(flags & cv2.WARP_INVERSE_MAP):
M = cv2.invert(M)[1]
M = M.flatten()
x_dst = y_dst = 0
for y in xrange(-origin_y, height, B_SIZE):
for x in xrange(-origin_x, width, B_SIZE):
print (x, y)
# Block dimensions.
bw = min(B_SIZE, width - x_dst)
bh = min(B_SIZE, height - y_dst)
# To avoid dimension errors.
if bw <= 0 or bh <= 0:
break
# View of the destination array.
dpart = dst[y_dst:y_dst+bh, x_dst:x_dst+bw]
# Original code used view of array here, but we're using numpy array's.
XY = np.zeros((bh, bw, 2), dtype=np.int16)
A = np.zeros((bh, bw), dtype=np.uint16)
for y1 in xrange(bh):
X0 = M[0]*x + M[1]*(y + y1) + M[2]
Y0 = M[3]*x + M[4]*(y + y1) + M[5]
W0 = M[6]*x + M[7]*(y + y1) + M[8]
if interpolation == cv2.INTER_NEAREST:
for x1 in xrange(bw):
W = np.float64(W0 + M[6]*x1);
if W != 0:
W = np.float64(1.0)/W
X = np.int32((X0 + M[0]*x1)*W)
Y = np.int32((Y0 + M[3]*x1)*W)
XY[y1, x1][0] = np.int16(X)
XY[y1, x1][1] = np.int16(Y)
else:
for x1 in xrange(bw):
W = np.float64(W0 + M[6]*x1);
if W != 0:
W = cv2.INTER_TAB_SIZE/W
X = np.int32((X0 + M[0]*x1)*W)
Y = np.int32((Y0 + M[3]*x1)*W)
XY[y1, x1][0] = np.int16((X >> cv2.INTER_BITS) + origin_x)
XY[y1, x1][1] = np.int16((Y >> cv2.INTER_BITS) + origin_y)
A[y1, x1] = np.int16(((Y & (cv2.INTER_TAB_SIZE-1))*cv2.INTER_TAB_SIZE + (X & (cv2.INTER_TAB_SIZE-1))))
if interpolation == cv2.INTER_NEAREST:
cv2.remap(src, XY, None, interpolation, dst=dpart,
borderMode=borderMode, borderValue=borderValue)
else:
cv2.remap(src, XY, A, interpolation, dst=dpart,
borderMode=borderMode, borderValue=borderValue)
x_dst += B_SIZE
x_dst = 0
y_dst += B_SIZE
return dst
I try to specify a different origin for the warpPerspective() function than the basic (0,0), in order to apply the transform independently of the support image size. I added a CvPoint parameter to the original code, but I can't find where to use these coordinates. I tried to use them in the computation of X0, Y0 and W0 but it didn't work, this only shift the transformed image in the resulting image. Any idea?
Here the code:
void warpPerspective( const Mat& src, Mat& dst, const Mat& M0, Size dsize,
int flags, int borderType, const Scalar& borderValue, CvPoint origin )
{
dst.create( dsize, src.type() );
const int BLOCK_SZ = 32;
short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
double M[9];
Mat _M(3, 3, CV_64F, M);
int interpolation = flags & INTER_MAX;
if( interpolation == INTER_AREA )
interpolation = INTER_LINEAR;
CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3 );
M0.convertTo(_M, _M.type());
if( !(flags & WARP_INVERSE_MAP) )
invert(_M, _M);
int x, y, x1, y1, width = dst.cols, height = dst.rows;
int bh0 = std::min(BLOCK_SZ/2, height);
int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width);
bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height);
for( y = 0; y < height; y += bh0 )
{
for( x = 0; x < width; x += bw0 )
{
int bw = std::min( bw0, width - x);
int bh = std::min( bh0, height - y);
Mat _XY(bh, bw, CV_16SC2, XY), _A;
Mat dpart(dst, Rect(x, y, bw, bh));
for( y1 = 0; y1 < bh; y1++ )
{
short* xy = XY + y1*bw*2;
double X0 = M[0]*x + M[1]*(y + y1) + M[2];
double Y0 = M[3]*x + M[4]*(y + y1) + M[5];
double W0 = M[6]*x + M[7]*(y + y1) + M[8];
if( interpolation == INTER_NEAREST )
for( x1 = 0; x1 < bw; x1++ )
{
double W = W0 + M[6]*x1;
W = W ? 1./W : 0;
int X = saturate_cast<int>((X0 + M[0]*x1)*W);
int Y = saturate_cast<int>((Y0 + M[3]*x1)*W);
xy[x1*2] = (short)X;
xy[x1*2+1] = (short)Y;
}
else
{
short* alpha = A + y1*bw;
for( x1 = 0; x1 < bw; x1++ )
{
double W = W0 + M[6]*x1;
W = W ? INTER_TAB_SIZE/W : 0;
int X = saturate_cast<int>((X0 + M[0]*x1)*W);
int Y = saturate_cast<int>((Y0 + M[3]*x1)*W);
xy[x1*2] = (short)(X >> INTER_BITS);
xy[x1*2+1] = (short)(Y >> INTER_BITS);
alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
(X & (INTER_TAB_SIZE-1)));
}
}
}
if( interpolation == INTER_NEAREST )
remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
else
{
Mat _A(bh, bw, CV_16U, A);
remap( src, dpart, _XY, _A, interpolation, borderType, borderValue );
}
}
}
}
Ok, I found it myself! You have 2 things to do:
compute the destination dimensions in source referential, and do the remap using these dimensions ;
increment the computed points coordinates.
Here is the code thus transformed:
void warpPerspective( const Mat& src, Mat& dst, const Mat& M0, Size dsize,
int flags, int borderType, const Scalar& borderValue, CvPoint origin )
{
dst.create( dsize, src.type() );
const int BLOCK_SZ = 32;
short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
double M[9];
Mat _M(3, 3, CV_64F, M);
int interpolation = flags & INTER_MAX;
if( interpolation == INTER_AREA )
interpolation = INTER_LINEAR;
CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3 );
M0.convertTo(_M, _M.type());
if( !(flags & WARP_INVERSE_MAP) )
invert(_M, _M);
int x, xDest, y, yDest, x1, y1, width = dst.cols, height = dst.rows;
int bh0 = std::min(BLOCK_SZ/2, height);
int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width);
bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height);
for( y = -origin.y, yDest = 0; y < height; y += bh0, yDest += bh0 )
{
for( x = -origin.x, xDest = 0; x < width; x += bw0, xDest += bw0 )
{
int bw = std::min( bw0, width - x);
int bh = std::min( bh0, height - y);
// to avoid dimensions errors
if (bw <= 0 || bh <= 0)
break;
Mat _XY(bh, bw, CV_16SC2, XY), _A;
Mat dpart(dst, Rect(xDest, yDest, bw, bh));
for( y1 = 0; y1 < bh; y1++ )
{
short* xy = XY + y1*bw*2;
double X0 = M[0]*x + M[1]*(y + y1) + M[2];
double Y0 = M[3]*x + M[4]*(y + y1) + M[5];
double W0 = M[6]*x + M[7]*(y + y1) + M[8];
if( interpolation == INTER_NEAREST )
for( x1 = 0; x1 < bw; x1++ )
{
double W = W0 + M[6]*x1;
W = W ? 1./W : 0;
int X = saturate_cast<int>((X0 + M[0]*x1)*W);
int Y = saturate_cast<int>((Y0 + M[3]*x1)*W);
xy[x1*2] = (short)X;
xy[x1*2+1] = (short)Y;
}
else
{
short* alpha = A + y1*bw;
for( x1 = 0; x1 < bw; x1++ )
{
double W = W0 + M[6]*x1;
W = W ? INTER_TAB_SIZE/W : 0;
int X = saturate_cast<int>((X0 + M[0]*x1)*W);
int Y = saturate_cast<int>((Y0 + M[3]*x1)*W);
xy[x1*2] = (short)(X >> INTER_BITS) + origin.x;
xy[x1*2+1] = (short)(Y >> INTER_BITS) + origin.y;
alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
(X & (INTER_TAB_SIZE-1)));
}
}
}
if( interpolation == INTER_NEAREST )
remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
else
{
Mat _A(bh, bw, CV_16U, A);
remap( src, dpart, _XY, _A, interpolation, borderType, borderValue );
}
}
}
}
with this function:
CvPoint transformPoint(const CvPoint pointToTransform, const CvMat* matrix) {
double coordinates[3] = {pointToTransform.x, pointToTransform.y, 1};
CvMat originVector = cvMat(3, 1, CV_64F, coordinates);
CvMat transformedVector = cvMat(3, 1, CV_64F, coordinates);
cvMatMul(matrix, &originVector, &transformedVector);
CvPoint outputPoint = cvPoint((int)(cvmGet(&transformedVector, 0, 0) / cvmGet(&transformedVector, 2, 0)), (int)(cvmGet(&transformedVector, 1, 0) / cvmGet(&transformedVector, 2, 0)));
return outputPoint;
}
A much simpler and cleaner solution is to modify the perspective transformation. You can do a translation which moves the origin to the desired position, then do the perspective transformation and finally do the inverse translation.
Here is a small example program in python, which rotates an image by 45 degrees around the point(100, 100):
import cv2
import numpy as np
def translation_mat(dx, dy):
return np.array([1, 0, dx, 0, 1, dy, 0, 0, 1]).reshape((3,3))
def main():
img = cv2.imread(r"pigeon.png", cv2.IMREAD_GRAYSCALE)
# a simple rotation by 45 degrees
rot = np.array([np.sin(np.pi/4), -np.cos(np.pi/4), 0, np.cos(np.pi/4), np.sin(np.pi/4), 0, 0, 0, 1]).reshape((3,3))
t1 = translation_mat(-100, -100)
t2 = translation_mat(100, 100)
rot_shifted = t2.dot(rot.dot(t1))
size = (img.shape[1], img.shape[0])
img1 = cv2.warpPerspective(img, rot, size)
img2 = cv2.warpPerspective(img, rot_shifted, size)
cv2.imshow("Original image", img)
cv2.imshow("Rotated around (0,0)", img1)
cv2.imshow("Rotated around(100, 100)", img2)
cv2.waitKey(0)
if __name__ == '__main__':
main()
Not that you read the order of transformations from right to left.
rot_shifted = t2.dot(rot.dot(t1))
will apply t1 first, then rot, and then t2.
For those of you looking for this piece in Python, here's a start. I'm not 100% sure it works as I've stripped some optimizations from it. Also there is an issue with lineair interpolation, I simply didn't use it but you might want to take a closer look if you do.
import cv2
import numpy as np
def warp_perspective(src, M, (width, height), (origin_x, origin_y),
flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT,
borderValue=0, dst=None):
"""
Implementation in Python using base code from
http://stackoverflow.com/questions/4279008/specify-an-origin-to-warpperspective-function-in-opencv-2-x
Note there is an issue with linear interpolation.
"""
B_SIZE = 32
if dst == None:
dst = np.zeros((height, width, 3), dtype=src.dtype)
# Set interpolation mode.
interpolation = flags & cv2.INTER_MAX
if interpolation == cv2.INTER_AREA:
raise Exception('Area interpolation is not supported!')
# Prepare matrix.
M = M.astype(np.float64)
if not(flags & cv2.WARP_INVERSE_MAP):
M = cv2.invert(M)[1]
M = M.flatten()
x_dst = y_dst = 0
for y in xrange(-origin_y, height, B_SIZE):
for x in xrange(-origin_x, width, B_SIZE):
print (x, y)
# Block dimensions.
bw = min(B_SIZE, width - x_dst)
bh = min(B_SIZE, height - y_dst)
# To avoid dimension errors.
if bw <= 0 or bh <= 0:
break
# View of the destination array.
dpart = dst[y_dst:y_dst+bh, x_dst:x_dst+bw]
# Original code used view of array here, but we're using numpy array's.
XY = np.zeros((bh, bw, 2), dtype=np.int16)
A = np.zeros((bh, bw), dtype=np.uint16)
for y1 in xrange(bh):
X0 = M[0]*x + M[1]*(y + y1) + M[2]
Y0 = M[3]*x + M[4]*(y + y1) + M[5]
W0 = M[6]*x + M[7]*(y + y1) + M[8]
if interpolation == cv2.INTER_NEAREST:
for x1 in xrange(bw):
W = np.float64(W0 + M[6]*x1);
if W != 0:
W = np.float64(1.0)/W
X = np.int32((X0 + M[0]*x1)*W)
Y = np.int32((Y0 + M[3]*x1)*W)
XY[y1, x1][0] = np.int16(X)
XY[y1, x1][1] = np.int16(Y)
else:
for x1 in xrange(bw):
W = np.float64(W0 + M[6]*x1);
if W != 0:
W = cv2.INTER_TAB_SIZE/W
X = np.int32((X0 + M[0]*x1)*W)
Y = np.int32((Y0 + M[3]*x1)*W)
XY[y1, x1][0] = np.int16((X >> cv2.INTER_BITS) + origin_x)
XY[y1, x1][1] = np.int16((Y >> cv2.INTER_BITS) + origin_y)
A[y1, x1] = np.int16(((Y & (cv2.INTER_TAB_SIZE-1))*cv2.INTER_TAB_SIZE + (X & (cv2.INTER_TAB_SIZE-1))))
if interpolation == cv2.INTER_NEAREST:
cv2.remap(src, XY, None, interpolation, dst=dpart,
borderMode=borderMode, borderValue=borderValue)
else:
cv2.remap(src, XY, A, interpolation, dst=dpart,
borderMode=borderMode, borderValue=borderValue)
x_dst += B_SIZE
x_dst = 0
y_dst += B_SIZE
return dst