Tensorflow.js: WebGL context lost when concatenating tensors

Tensorflow.js: WebGL context lost when concatenating tensors - webgl

I'm playing around with tensorflow.js Transfer learning tutorial/guide source code and getting following error when concatenating two tensors:
Couldn't parse line number in error: tfjs#0.13.0:2
precision highp float; tfjs#0.13.0:2
precision highp int;
varying vec2 resultUV;
const vec2 halfCR = vec2(0.5, 0.5);
struct ivec5
{
int x;
int y;
int z;
int w;
int u;
};
struct ivec6
{
int x;
int y;
int z;
int w;
int u;
int v;
};
bool isNaN(float val) {
return (val < 0.0 || 0.0 < val || val == 0.0) ? false : true;
}
bool hasNaN(vec4 values) {
vec4 v1 = values * values;
vec4 v2 = values * values;
return any(notEqual(v1, v2));
}
float getNaN(vec4 values) {
return dot(vec4(1), values);
}
int round(float value) {
return int(floor(value + 0.5));
}
int imod(int x, int y) {
return x - y * (x / y);
}
//Based on the work of Dave Hoskins
//https://www.shadertoy.com/view/4djSRW
#define HASHSCALE1 443.8975
float random(float seed){
vec2 p = resultUV * seed;
vec3 p3 = fract(vec3(p.xyx) * HASHSCALE1);
p3 += dot(p3, p3.yzx + 19.19);
return fract((p3.x + p3.y) * p3.z);
}
vec2 UVfrom1D(int texNumR, int texNumC, int index) {
int texR = index / texNumC;
int texC = index - texR * texNumC;
return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
}
vec2 UVfrom2D(int texNumR, int texNumC, int numC, int row, int col) {
int index = row * numC + col;
int texR = index / texNumC;
int texC = index - texR * texNumC;
return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
}
vec2 UVfrom3D(int texNumR, int texNumC, int stride0,
int stride1, int row, int col, int depth) {
// Explicitly use integer operations as dot() only works on floats.
int index = row * stride0 + col * stride1 + depth;
int texR = index / texNumC;
int texC = index - texR * texNumC;
return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
}
vec2 UVfrom4D(int texNumR, int texNumC, int stride0,
int stride1, int stride2, int row, int col, int depth,
int depth2) {
// Explicitly use integer operations as dot() only works on floats.
int index = row * stride0 + col * stride1 + depth * stride2 + depth2;
int texR = index / texNumC;
int texC = index - texR * texNumC;
return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
}
vec2 UVfrom5D(int texNumR, int texNumC, int stride0,
int stride1, int stride2, int stride3, int row, int col, int depth,
int depth2, int depth3) {
// Explicitly use integer operations as dot() only works on floats.
int index = row * stride0 + col * stride1 +
depth * stride2 + depth2 * stride3 + depth3;
int texR = index / texNumC;
int texC = index - texR * texNumC;
return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
}
vec2 UVfrom6D(int texNumR, int texNumC, int stride0,
int stride1, int stride2, int stride3, int stride4,
int row, int col, int depth, int depth2, int depth3, int depth4) {
// Explicitly use integer operations as dot() only works on floats.
int index = row * stride0 + col * stride1 + depth * stride2 + depth2 *
stride3 + depth3 * stride4 + depth4;
int texR = index / texNumC;
int texC = index - texR * texNumC;
return (vec2(texC, texR) + halfCR) / vec2(texNumC, texNumR);
}
float sampleTexture(sampler2D textureSampler, vec2 uv) {
return texture2D(textureSampler, uv).r;
}
void setOutput(float val) {
gl_FragColor = vec4(val, 0, 0, 0);
}
uniform sampler2D A;
uniform sampler2D B;
ivec2 getOutputCoords() {
ivec2 resTexRC = ivec2(resultUV.yx *
vec2(1984, 2352));
int index = resTexRC.x * 2352 + resTexRC.y;
return ivec2(0, index);
}
float getAFlat(int index) {
vec2 uv = UVfrom1D(1792, 2597, index);
return sampleTexture(A, uv);
}
float getA(int index) {
return getAFlat(index);
}
float getA(int row, int col) {
return getA(col);
}
float getBFlat(int index) {
vec2 uv = UVfrom1D(7, 1792, index);
return sampleTexture(B, uv);
}
float getB(int index) {
return getBFlat(index);
}
float getB(int row, int col) {
return getB(col);
}
void main() {
ivec2 coords = getOutputCoords();
int yR = coords.x;
int yC = coords.y;
float value = 0.0;
if (yC < 4653824) {
value = getA(yR, yC);
} else {
yC -= 4653824;
value = getB(yR, yC);
}
setOutput(value);
}
Uncaught Error: Failed to compile fragment shader.
at createFragmentShader (tfjs#0.13.0:2)
at e.createProgram (tfjs#0.13.0:2)
at compileProgram (tfjs#0.13.0:2)
at tfjs#0.13.0:2
at e.getAndSaveBinary (tfjs#0.13.0:2)
at e.compileAndRun (tfjs#0.13.0:2)
at e.concat2Tensors (tfjs#0.13.0:2)
at e.concat (tfjs#0.13.0:2)
at tfjs#0.13.0:2
at tfjs#0.13.0:2
WebGL: CONTEXT_LOST_WEBGL: loseContext: context lost
Uncaught Error: Unable to create fragment WebGLShader.
at throwIfNull (tfjs#0.13.0:2)
at createFragmentShader (tfjs#0.13.0:2)
at e.createProgram (tfjs#0.13.0:2)
at compileProgram (tfjs#0.13.0:2)
at tfjs#0.13.0:2
at e.getAndSaveBinary (tfjs#0.13.0:2)
at e.compileAndRun (tfjs#0.13.0:2)
at e.concat2Tensors (tfjs#0.13.0:2)
at e.concat (tfjs#0.13.0:2)
at tfjs#0.13.0:2
I'm creating an application which takes audiosignal in through laptop mic input and associates signal frequencies to images captured from laptops webcam stream. I'm feeding electric guitar signal to mic input to control frequency by hand and I have Web audio API ScriptProcessorNodes onaudioprocess function for catching frequency data.
When frequency is clear enough image is captured from webcam stream and then transformed into tensor with tf.fromPixels method and passed through truncated mobilenet model to get internal activations for later use. On a first round internal activations from truncated model are saved into a variable and after first time new internal activations are concatenated to previous activation with concat() method. All this is happening many times per second if the input signal frequency is steady and clear. This works fine some time but after the concatenated tensor has reached size of 4678912 program crashes and produces error detailed above. I'm not sure if tensor size is crucial here.
I'm running the application in the Chrome browser.
Here is the concatenating function which causes the error I suppose :
addExample(example, label) {
// One-hot encode the label.
const y = tf.tidy(() => tf.oneHot(tf.tensor1d([label]).toInt(), this.numClasses));
if (this.xs == null) {
// For the first example that gets added, keep example and y so that the
// ControllerDataset owns the memory of the inputs. This makes sure that
// if addExample() is called in a tf.tidy(), these Tensors will not get
// disposed.
this.xs = tf.keep(example);
this.ys = tf.keep(y);
} else {
const oldX = this.xs;
this.xs = tf.keep(oldX.concat(example, 0));
const oldY = this.ys;
this.ys = tf.keep(oldY.concat(y, 0));
oldX.dispose();
oldY.dispose();
y.dispose();
}
}
And here is the concatenated tensor just before crash:
e {isDisposedInternal: false, shape: Array(4), dtype: "float32", size:
4678912, strides: Array(3), …}
dataId: {}
dtype: "float32"
id: 117079
isDisposed: (...)
isDisposedInternal: false
rank: (...)
rankType: "4"
shape: Array(4)
0: 373
1: 7
2: 7
3: 256
length: 4
__proto__: Array(0)
size: 4678912
strides: Array(3)
0: 12544
1: 1792
2: 256
length: 3
__proto__: Array(0)
__proto__: Object
The error appears also when I try original demo (Pacman-game) and add 300-400 examples to one of the four controllers. Basic logic in my application is the the same as in original demo. What differs is the number of classes (48 classes representing different note frequencies from electric guitar) and the way the examples are added in ScriptProcessorNode. Also the volume of needed examples is much bigger.
I don't know where to start to digging into the error. My webGL knowledge is 0. Any help is appreciated.

Related

How can I calculate the mean and variance value of an image with 16 channels using Metal Shader Lanuage

how can I calculate mean and variance value of an image with 16 channels using Metal ?
I want to calculate mean and variance value of different channel sperately!
ex.:
kernel void meanandvariance(texture2d_array<float, access::read> in[[texture(0)]],
texture2d_array<float, access::write> out[[texture(1)]],
ushort3 gid[[thread_position_in_grid]],
ushort tid[[thread_index_in_threadgroup]],
ushort3 tg_size[[threads_per_threadgroup]]) {
}

There's probably a way to do this by creating a sequence of texture views on the input texture array and output texture array, encoding a MPSImageStatisticsMeanAndVariance kernel invocation for each slice.
But let's take a look at how to do it ourselves. There are many different possible approaches, so I chose one that was simple and used some interesting results from statistics.
Essentially, we'll do the following:
Write a kernel that can produce a subset mean and variance for a single row of the image.
Write a kernel that can produce an overall mean and variance from the partial results from step 1.
Here are the kernels:
kernel void compute_row_mean_variance_array(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
uint3 tpig [[thread_position_in_grid]])
{
uint row = tpig.x;
uint slice = tpig.y;
uint width = inTexture.get_width();
if (row >= inTexture.get_height() || slice >= inTexture.get_array_size()) { return; }
float4 mean(0.0f);
float4 var(0.0f);
for (uint col = 0; col < width; ++col) {
float4 rgba = inTexture.read(ushort2(col, row), slice);
// http://datagenetics.com/blog/november22017/index.html
float weight = 1.0f / (col + 1);
float4 oldMean = mean;
mean = mean + (rgba - mean) * weight;
var = var + (rgba - oldMean) * (rgba - mean);
}
var = var / width;
outTexture.write(mean, ushort2(row, 0), slice);
outTexture.write(var, ushort2(row, 1), slice);
}
kernel void reduce_mean_variance_array(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
uint3 tpig [[thread_position_in_grid]])
{
uint width = inTexture.get_width();
uint slice = tpig.x;
// https://arxiv.org/pdf/1007.1012.pdf
float4 mean(0.0f);
float4 meanOfVar(0.0f);
float4 varOfMean(0.0f);
for (uint col = 0; col < width; ++col) {
float weight = 1.0f / (col + 1);
float4 oldMean = mean;
float4 submean = inTexture.read(ushort2(col, 0), slice);
mean = mean + (submean - mean) * weight;
float4 subvar = inTexture.read(ushort2(col, 1), slice);
meanOfVar = meanOfVar + (subvar - meanOfVar) * weight;
varOfMean = varOfMean + (submean - oldMean) * (submean - mean);
}
float4 var = meanOfVar + varOfMean / width;
outTexture.write(mean, ushort2(0, 0), slice);
outTexture.write(var, ushort2(1, 0), slice);
}
In summary, to achieve step 1, we use an "online" (incremental) algorithm to calculate the partial mean/variance of the row in a way that's more numerically-stable than just adding all the pixel values and dividing by the width. My reference for writing this kernel was this post. Each thread in the grid writes its row's statistics to the appropriate column and slice of an intermediate texture array.
To achieve step 2, we need to find a statistically-sound way of computing the overall statistics from the partial results. This is quite simple in the case of finding the mean: the mean of the population is the mean of the means of the subsets (this holds when the sample size of each subset is the same; in the general case, the overall mean is a weighted sum of the subset means). The variance is trickier, but it turns out that the variance of the population is the sum of the mean of the variances of the subsets and the variance of the means of the subsets (the same caveat about equally-sized subsets applies here). This is a convenient fact that we can combine with our incremental approach above to produce the final mean and variance of each slice, which is written to the corresponding slice of the output texture.
For completeness, here's the Swift code I used to drive these kernels:
let library = device.makeDefaultLibrary()!
let meanVarKernelFunction = library.makeFunction(name: "compute_row_mean_variance_array")!
let meanVarComputePipelineState = try! device.makeComputePipelineState(function: meanVarKernelFunction)
let reduceKernelFunction = library.makeFunction(name: "reduce_mean_variance_array")!
let reduceComputePipelineState = try! device.makeComputePipelineState(function: reduceKernelFunction)
let width = sourceTexture.width
let height = sourceTexture.height
let arrayLength = sourceTexture.arrayLength
let textureDescriptor = MTLTextureDescriptor.texture2DDescriptor(pixelFormat: .rgba32Float, width: width, height: height, mipmapped: false)
textureDescriptor.textureType = .type2DArray
textureDescriptor.arrayLength = arrayLength
textureDescriptor.width = height
textureDescriptor.height = 2
textureDescriptor.usage = [.shaderRead, .shaderWrite]
let partialResultsTexture = device.makeTexture(descriptor: textureDescriptor)!
textureDescriptor.width = 2
textureDescriptor.height = 1
textureDescriptor.usage = .shaderWrite
let destTexture = device.makeTexture(descriptor: textureDescriptor)!
let commandBuffer = commandQueue.makeCommandBuffer()!
let computeCommandEncoder = commandBuffer.makeComputeCommandEncoder()!
computeCommandEncoder.setComputePipelineState(meanVarComputePipelineState)
computeCommandEncoder.setTexture(sourceTexture, index: 0)
computeCommandEncoder.setTexture(partialResultsTexture, index: 1)
let meanVarGridSize = MTLSize(width: sourceTexture.height, height: sourceTexture.arrayLength, depth: 1)
let meanVarThreadgroupSize = MTLSizeMake(meanVarComputePipelineState.threadExecutionWidth, 1, 1)
let meanVarThreadgroupCount = MTLSizeMake((meanVarGridSize.width + meanVarThreadgroupSize.width - 1) / meanVarThreadgroupSize.width,
(meanVarGridSize.height + meanVarThreadgroupSize.height - 1) / meanVarThreadgroupSize.height,
1)
computeCommandEncoder.dispatchThreadgroups(meanVarThreadgroupCount, threadsPerThreadgroup: meanVarThreadgroupSize)
computeCommandEncoder.setComputePipelineState(reduceComputePipelineState)
computeCommandEncoder.setTexture(partialResultsTexture, index: 0)
computeCommandEncoder.setTexture(destTexture, index: 1)
let reduceThreadgroupSize = MTLSizeMake(1, 1, 1)
let reduceThreadgroupCount = MTLSizeMake(arrayLength, 1, 1)
computeCommandEncoder.dispatchThreadgroups(reduceThreadgroupCount, threadsPerThreadgroup: reduceThreadgroupSize)
computeCommandEncoder.endEncoding()
let destTexture2DDesc = MTLTextureDescriptor.texture2DDescriptor(pixelFormat: .rgba32Float, width: 2, height: 1, mipmapped: false)
destTexture2DDesc.usage = .shaderWrite
let destTexture2D = device.makeTexture(descriptor: destTexture2DDesc)!
meanVarKernel.encode(commandBuffer: commandBuffer, sourceTexture: sourceTexture2D, destinationTexture: destTexture2D)
#if os(macOS)
let blitCommandEncoder = commandBuffer.makeBlitCommandEncoder()!
blitCommandEncoder.synchronize(resource: destTexture)
blitCommandEncoder.synchronize(resource: destTexture2D)
blitCommandEncoder.endEncoding()
#endif
commandBuffer.commit()
commandBuffer.waitUntilCompleted()
In my experiments, this program produced the same results as MPSImageStatisticsMeanAndVariance, give or take some differences on the order of 1e-7. It was also 2.5x slower than MPS on my Mac, probably due in part to failure to exploit latency hiding with granular parallelism.

#include <metal_stdlib>
using namespace metal;
kernel void instance_norm(constant float4* scale[[buffer(0)]],
constant float4* shift[[buffer(1)]],
texture2d_array<float, access::read> in[[texture(0)]],
texture2d_array<float, access::write> out[[texture(1)]],
ushort3 gid[[thread_position_in_grid]],
ushort tid[[thread_index_in_threadgroup]],
ushort3 tg_size[[threads_per_threadgroup]]) {
ushort width = in.get_width();
ushort height = in.get_height();
const ushort thread_count = tg_size.x * tg_size.y;
threadgroup float4 shared_mem [256];
float4 sum = 0;
for(ushort xIndex = gid.x; xIndex < width; xIndex += tg_size.x) {
for(ushort yIndex = gid.y; yIndex < height; yIndex += tg_size.y) {
sum += in.read(ushort2(xIndex, yIndex), gid.z);
}
}
shared_mem[tid] = sum;
threadgroup_barrier(mem_flags::mem_threadgroup);
// Reduce to 32 values
sum = 0;
if (tid < 32) {
for (ushort i = tid + 32; i < thread_count; i += 32) {
sum += shared_mem[i];
}
}
shared_mem[tid] += sum;
threadgroup_barrier(mem_flags::mem_threadgroup);
// Calculate mean
sum = 0;
if (tid == 0) {
ushort top = min(ushort(32), thread_count);
for (ushort i = 0; i < top; i += 1) {
sum += shared_mem[i];
}
shared_mem[0] = sum / (width * height);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
const float4 mean = shared_mem[0];
threadgroup_barrier(mem_flags::mem_threadgroup);
// Variance
sum = 0;
for(ushort xIndex = gid.x; xIndex < width; xIndex += tg_size.x) {
for(ushort yIndex = gid.y; yIndex < height; yIndex += tg_size.y) {
sum += pow(in.read(ushort2(xIndex, yIndex), gid.z) - mean, 2);
}
}
shared_mem[tid] = sum;
threadgroup_barrier(mem_flags::mem_threadgroup);
// Reduce to 32 values
sum = 0;
if (tid < 32) {
for (ushort i = tid + 32; i < thread_count; i += 32) {
sum += shared_mem[i];
}
}
shared_mem[tid] += sum;
threadgroup_barrier(mem_flags::mem_threadgroup);
// Calculate variance
sum = 0;
if (tid == 0) {
ushort top = min(ushort(32), thread_count);
for (ushort i = 0; i < top; i += 1) {
sum += shared_mem[i];
}
shared_mem[0] = sum / (width * height);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
const float4 sigma = sqrt(shared_mem[0] + float4(1e-4));
float4 multiplier = scale[gid.z] / sigma;
for(ushort xIndex = gid.x; xIndex < width; xIndex += tg_size.x) {
for(ushort yIndex = gid.y; yIndex < height; yIndex += tg_size.y) {
float4 val = in.read(ushort2(xIndex, yIndex), gid.z);
out.write(clamp((val - mean) * multiplier + shift[gid.z], -10.0, 10.0), ushort2(xIndex, yIndex), gid.z);
}
}
}
this is how Blend implement, but I do not think it is true, can anybody solve it ?
https://github.com/xmartlabs/Bender/blob/master/Sources/Metal/instanceNorm.metal

WTV in Opencv's Opencl code for Image resizing

What does WTV stands for in the following Opencl code?
I can't find much info for that. The code is from Opencv for processing on gpu.
__
kernel void resizeAREA(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
float ifx, float ify, __global const int * ofs_tab,
__global const int * map_tab, __global const float * alpha_tab)
{
int dx = get_global_id(0);
int dy = get_global_id(1);
if (dx < dst_cols && dy < dst_rows)
{
int dst_index = mad24(dy, dst_step, dst_offset);
__global const int * xmap_tab = map_tab;
__global const int * ymap_tab = (__global const int *)(map_tab + (src_cols << 1));
__global const float * xalpha_tab = alpha_tab;
__global const float * yalpha_tab = (__global const float *)(alpha_tab + (src_cols << 1));
__global const int * xofs_tab = ofs_tab;
__global const int * yofs_tab = (__global const int *)(ofs_tab + dst_cols + 1);
int xk0 = xofs_tab[dx], xk1 = xofs_tab[dx + 1];
int yk0 = yofs_tab[dy], yk1 = yofs_tab[dy + 1];
int sy0 = ymap_tab[yk0], sy1 = ymap_tab[yk1 - 1];
int sx0 = xmap_tab[xk0], sx1 = xmap_tab[xk1 - 1];
WTV sum = (WTV)(0), buf;
int src_index = mad24(sy0, src_step, src_offset);
for (int sy = sy0, yk = yk0; sy <= sy1; ++sy, src_index += src_step, ++yk)
{
WTV beta = (WTV)(yalpha_tab[yk]);
buf = (WTV)(0);
for (int sx = sx0, xk = xk0; sx <= sx1; ++sx, ++xk)
{
WTV alpha = (WTV)(xalpha_tab[xk]);
buf += convertToWTV(loadpix(src + mad24(sx, TSIZE, src_index))) * alpha;
}
sum += buf * beta;
}
storepix(convertToT(sum), dst + mad24(dx, TSIZE, dst_index));
}
}

It is not defined in the source you shared. It appears to be a type, like float. Just guessing: it's defined using "-D WTV=something" while compiling the kernel.

Need help in Parallelizing if and else condition in CUDA C program

I have written a filter for image blurring in C and it's working fine, I am trying to run in on GPU using CUDA C for faster processing. The program has a few if and else conditions as can be seen below for the C code version,
The input to the function being input image, output image, and size of columns.
void convolve_young1D(double * in, double * out, int datasize) {
int i, j;
/* Compute first 3 output elements */
out[0] = B*in[0];
out[1] = B*in[1] + bf[2]*out[0];
out[2] = B*in[2] + (bf[1]*out[0]+bf[2]*out[1]);
/* Recursive computation of output in forward direction using filter parameters bf and B */
for (i=3; i<datasize; i++) {
out[i] = B*in[i];
for (j=0; j<3; j++) {
out[i] += bf[j]*out[i-(3-j)];
}
}
}
//Calling function below
void convolve_young2D(int rows, int columns, int sigma, double ** ip_padded) {
/** \brief Filter radius */
w = 3*sigma;
/** \brief Filter parameter q */
double q;
if (sigma < 2.5)
q = 3.97156 - 4.14554*sqrt(1-0.26891*sigma);
else
q = 0.98711*sigma - 0.9633;
/** \brief Filter parameters b0, b1, b2, b3 */
double b0 = 1.57825 + 2.44413*q + 1.4281*q*q + 0.422205*q*q*q;
double b1 = 2.44413*q + 2.85619*q*q + 1.26661*q*q*q;
double b2 = -(1.4281*q*q + 1.26661*q*q*q);
double b3 = 0.422205*q*q*q;
/** \brief Filter parameters bf, bb, B */
bf[0] = b3/b0; bf[1] = b2/b0; bf[2] = b1/b0;
bb[0] = b1/b0; bb[1] = b2/b0; bb[2] = b3/b0;
B = 1 - (b1+b2+b3)/b0;
int i,j;
/* Convolve each row with 1D Gaussian filter */
double *out_t = calloc(columns+(2*w),sizeof(double ));
for (i=0; i<rows+2*w; i++) {
convolve_young1D(ip_padded[i], out_t, columns+2*w);
}
free(out_t);
Tried the same approach with blocks and threads in CUDA C but wasn't successful I have been getting zeros as output and even the input values seem to change to Zeros don't know where I am going wrong please do help. I am pretty new to CUDA C programming. Here is my attempted version of the CUDA Kernel.
__global__ void convolve_young2D( float *in, float *out,int rows,int columns, int j,float B,float bf[3],int w) {
int k;
int x = blockIdx.x * blockDim.x + threadIdx.x;
if((x>0) && (x<(rows+2*w)))
{
//printf("%d \t",x);
if(j ==0)
{
// Compute first output elements
out[x*columns] = B*in[x*columns];
}
else if(j==1)
{
out[x*columns +1 ] = B*in[x*columns +1] + bf[2]*out[x*columns];
}
else if (j== 2)
{
out[2] = B*in[x*columns +2] + (bf[1]*out[x*columns]+bf[2]*out[x*columns+1]);
}
else{
// Recursive computation of output in forward direction using filter parameters bf and B
out[x*columns+j] = B*in[x*columns+j];
for (k=0; k<3; k++) {
out[x*columns + j] += bf[k]*out[(x*columns+j)-(3-k)];
}
}
}
}
//Calling function below
void convolve_young2D(int rows, int columns, int sigma, const float * const ip_padded, float * const op_padded) {
float bf[3], bb[3];
float B;
int w;
/** \brief Filter radius */
w = 3*sigma;
/** \brief Filter parameter q */
float q;
if (sigma < 2.5)
q = 3.97156 - 4.14554*sqrt(1-0.26891*sigma);
else
q = 0.98711*sigma - 0.9633;
/** \brief Filter parameters b0, b1, b2, b3 */
float b0 = 1.57825 + 2.44413*q + 1.4281*q*q + 0.422205*q*q*q;
float b1 = 2.44413*q + 2.85619*q*q + 1.26661*q*q*q;
float b2 = -(1.4281*q*q + 1.26661*q*q*q);
float b3 = 0.422205*q*q*q;
/** \brief Filter parameters bf, bb, B */
bf[0] = b3/b0; bf[1] = b2/b0; bf[2] = b1/b0;
bb[0] = b1/b0; bb[1] = b2/b0; bb[2] = b3/b0;
B = 1 - (b1+b2+b3)/b0;
int p;
const int inputBytes = (rows+2*w) * (columns+2*w) * sizeof(float);
float *d_input, *d_output; // arrays in the GPU´s global memory
cudaMalloc(&d_input, inputBytes);
cudaMemcpy(d_input, ip_padded, inputBytes, cudaMemcpyHostToDevice);
cudaMalloc(&d_output,inputBytes);
for (p = 0; p<columns+2*w; p++){
convolve_young<<<4,500>>>(d_input,d_output,rows,columns,p,B,bf,w);
}
cudaMemcpy(op_padded, d_input, inputBytes, cudaMemcpyDeviceToHost);
cudaFree(d_input);

The first problem is that you call convolve_young<<<4,500>>>(d_input,d_output,rows,columns,p,B,bf,w); but you defined a kernel named convolve_young2D.
Another possible problem is that to do the convolution you do:
for (p = 0; p<columns+2*w; p++){
convolve_young<<<4,500>>>(d_input,d_output,rows,columns,p,B,bf,w);
}
Here you're looping over the columns instead of the rows compared to the CPU algorithm:
for (i=0; i<rows+2*w; i++) {
convolve_young1D(ip_padded[i], out_t, columns+2*w);
}
First you should try to do a direct port of your CPU algorithm, computing one line at the time, and then modify it to transfer the whole image.

Pixel Shader performance on xbox

I've got a pixelshader (below) that i'm using with XNA. On my laptop (crappy graphics card) it runs a little jerky, but ok. I've just tried running it on the xbox and it's horrible!
There's nothing to the game (it's just a fractal renderer) so it's got to be the pixel shader causing the issues. I also think it's the PS code because i've lowered the iterations and it's ok. I've also checked, and the GC delta is zero.
Are there any HLSL functions that are no-no's on the xbox?? I must be doing something wrong here, performance can't be that bad!
#include "FractalBase.fxh"
float ZPower;
float3 Colour;
float3 ColourScale;
float ComAbs(float2 Arg)
{
return sqrt(Arg.x * Arg.x + Arg.y * Arg.y);
}
float2 ComPow(float2 Arg, float Power)
{
float Mod = pow(Arg.x * Arg.x + Arg.y * Arg.y, Power / 2);
float Ang = atan2(Arg.y, Arg.x) * Power;
return float2(Mod * cos(Ang), Mod * sin(Ang));
}
float4 FractalPixelShader(float2 texCoord : TEXCOORD0, uniform float Iterations) : COLOR0
{
float2 c = texCoord.xy;
float2 z = 0;
float i;
float oldBailoutTest = 0;
float bailoutTest = 0;
for(i = 0; i < Iterations; i++)
{
z = ComPow(z, ZPower) + c;
bailoutTest = z.x * z.x + z.y * z.y;
if(bailoutTest >= ZPower * ZPower)
{
break;
}
oldBailoutTest = bailoutTest;
}
float normalisedIterations = i / Iterations;
float factor = (bailoutTest - oldBailoutTest) / (ZPower * ZPower - oldBailoutTest);
float4 Result = normalisedIterations + (1 / factor / Iterations);
Result = (i >= Iterations - 1) ? float4(0.0, 0.0, 0.0, 1.0) : float4(Result.x * Colour.r * ColourScale.x, Result.y * Colour.g * ColourScale.y, Result.z * Colour.b * ColourScale.z, 1);
return Result;
}
technique Technique1
{
pass
{
VertexShader = compile vs_3_0 SpriteVertexShader();
PixelShader = compile ps_3_0 FractalPixelShader(128);
}
}
Below is FractalBase.fxh:
float4x4 MatrixTransform : register(vs, c0);
float2 Pan;
float Zoom;
float Aspect;
void SpriteVertexShader(inout float4 Colour : COLOR0,
inout float2 texCoord : TEXCOORD0,
inout float4 position : SV_Position)
{
position = mul(position, MatrixTransform);
// Convert the position into from screen space into complex coordinates
texCoord = (position) * Zoom * float2(1, Aspect) - float2(Pan.x, -Pan.y);
}
EDIT I did try removing the conditional by using lots of lerps, however when i did that i got loads of artifacts (and not the kind that "belong in a museum"!). I changed things around, and fixed a few logic errors, however the key was to multiply the GreaterThan result by 1 + epsilon, to account for rounding errors just making 0.9999 = 0 (integer). See the fixed code below:
#include "FractalBase.fxh"
float ZPower;
float3 Colour;
float3 ColourScale;
float ComAbs(float2 Arg)
{
return sqrt(Arg.x * Arg.x + Arg.y * Arg.y);
}
float2 ComPow(float2 Arg, float Power)
{
float Mod = pow(Arg.x * Arg.x + Arg.y * Arg.y, Power / 2);
float Ang = atan2(Arg.y, Arg.x) * Power;
return float2(Mod * cos(Ang), Mod * sin(Ang));
}
float GreaterThan(float x, float y)
{
return ((x - y) / (2 * abs(x - y)) + 0.5) * 1.001;
}
float4 FractalPixelShader(float2 texCoord : TEXCOORD0, uniform float Iterations) : COLOR0
{
float2 c = texCoord.xy;
float2 z = 0;
int i;
float oldBailoutTest = 0;
float bailoutTest = 0;
int KeepGoing = 1;
int DoneIterations = Iterations;
int Bailout = 0;
for(i = 0; i < Iterations; i++)
{
z = lerp(z, ComPow(z, ZPower) + c, KeepGoing);
bailoutTest = lerp(bailoutTest, z.x * z.x + z.y * z.y, KeepGoing);
Bailout = lerp(Bailout, GreaterThan(bailoutTest, ZPower * ZPower), -abs(Bailout) + 1);
KeepGoing = lerp(KeepGoing, 0.0, Bailout);
DoneIterations = lerp(DoneIterations, min(i, DoneIterations), Bailout);
oldBailoutTest = lerp(oldBailoutTest, bailoutTest, KeepGoing);
}
float normalisedIterations = DoneIterations / Iterations;
float factor = (bailoutTest - oldBailoutTest) / (ZPower * ZPower - oldBailoutTest);
float4 Result = normalisedIterations + (1 / factor / Iterations);
Result = (DoneIterations >= Iterations - 1) ? float4(0.0, 0.0, 0.0, 1.0) : float4(Result.x * Colour.r * ColourScale.x, Result.y * Colour.g * ColourScale.y, Result.z * Colour.b * ColourScale.z, 1);
return Result;
}
technique Technique1
{
pass
{
VertexShader = compile vs_3_0 SpriteVertexShader();
PixelShader = compile ps_3_0 FractalPixelShader(128);
}
}

The xbox has a pretty large block size, so branching on the xbox isn't always so great. Also the compiler isn't always the most effective at emitting dynamic branches which your code seems to use.
Look into the branch attribute: http://msdn.microsoft.com/en-us/library/bb313972%28v=xnagamestudio.31%29.aspx
Also, if you move the early bailout, does the PC become more more similar to the Xbox?
Keep in mind that modern graphic cards are actually quite a bit faster then the Xenon unit by now.

How to simulate fisheye lens effect by openCV?

I am looking for ways to create fisheye lens effect, looked at documentations for openCV, it looks like it contains Camera Calibration functions for radial distortions like fisheye. Is it possible to simulate fisheye distortion by openCV?
If it is possible to do it by openCV, comparing to openGL, which one will generate better results? Thanks.

I created this app using opencv. Is this the effect you are referring to?
I basically coded the formula shown on wikipedia's "Distortion(optics)" I can show the code if needed
Update:
OK, so below is the actual code written in c++ using opencv (not documented so feel free to ask for explanations):
The program recieves as input the following parameter: |input image| |output image| |K which controlls amount of distortion (typically try values around 0.001)| |x coordinate of center of distortion| |y coordinate of center of distortion|
So the crux of the program is the double for loop which iterates pixel by pixel on the result image and looks for the matching pixel in the input image using the formula for radial distortion (this is the way image warping is generally done - perhaps counter intuitively by back-projection from output to input). There are some subtleties which have to do with the scale of the output image (in this program the resulting image is the same size as the input), and I won't get into it unless you want to get into more details.enjoy.
#include <cv.h>
#include <highgui.h>
#include <math.h>
#include <unistd.h>
#include <getopt.h>
#include <iostream>
void sampleImage(const IplImage* arr, float idx0, float idx1, CvScalar& res)
{
if(idx0<0 || idx1<0 || idx0>(cvGetSize(arr).height-1) || idx1>(cvGetSize(arr).width-1)){
res.val[0]=0;
res.val[1]=0;
res.val[2]=0;
res.val[3]=0;
return;
}
float idx0_fl=floor(idx0);
float idx0_cl=ceil(idx0);
float idx1_fl=floor(idx1);
float idx1_cl=ceil(idx1);
CvScalar s1=cvGet2D(arr,(int)idx0_fl,(int)idx1_fl);
CvScalar s2=cvGet2D(arr,(int)idx0_fl,(int)idx1_cl);
CvScalar s3=cvGet2D(arr,(int)idx0_cl,(int)idx1_cl);
CvScalar s4=cvGet2D(arr,(int)idx0_cl,(int)idx1_fl);
float x = idx0 - idx0_fl;
float y = idx1 - idx1_fl;
res.val[0]= s1.val[0]*(1-x)*(1-y) + s2.val[0]*(1-x)*y + s3.val[0]*x*y + s4.val[0]*x*(1-y);
res.val[1]= s1.val[1]*(1-x)*(1-y) + s2.val[1]*(1-x)*y + s3.val[1]*x*y + s4.val[1]*x*(1-y);
res.val[2]= s1.val[2]*(1-x)*(1-y) + s2.val[2]*(1-x)*y + s3.val[2]*x*y + s4.val[2]*x*(1-y);
res.val[3]= s1.val[3]*(1-x)*(1-y) + s2.val[3]*(1-x)*y + s3.val[3]*x*y + s4.val[3]*x*(1-y);
}
float xscale;
float yscale;
float xshift;
float yshift;
float getRadialX(float x,float y,float cx,float cy,float k){
x = (x*xscale+xshift);
y = (y*yscale+yshift);
float res = x+((x-cx)*k*((x-cx)*(x-cx)+(y-cy)*(y-cy)));
return res;
}
float getRadialY(float x,float y,float cx,float cy,float k){
x = (x*xscale+xshift);
y = (y*yscale+yshift);
float res = y+((y-cy)*k*((x-cx)*(x-cx)+(y-cy)*(y-cy)));
return res;
}
float thresh = 1;
float calc_shift(float x1,float x2,float cx,float k){
float x3 = x1+(x2-x1)*0.5;
float res1 = x1+((x1-cx)*k*((x1-cx)*(x1-cx)));
float res3 = x3+((x3-cx)*k*((x3-cx)*(x3-cx)));
// std::cerr<<"x1: "<<x1<<" - "<<res1<<" x3: "<<x3<<" - "<<res3<<std::endl;
if(res1>-thresh and res1 < thresh)
return x1;
if(res3<0){
return calc_shift(x3,x2,cx,k);
}
else{
return calc_shift(x1,x3,cx,k);
}
}
int main(int argc, char** argv)
{
IplImage* src = cvLoadImage( argv[1], 1 );
IplImage* dst = cvCreateImage(cvGetSize(src),src->depth,src->nChannels);
IplImage* dst2 = cvCreateImage(cvGetSize(src),src->depth,src->nChannels);
float K=atof(argv[3]);
float centerX=atoi(argv[4]);
float centerY=atoi(argv[5]);
int width = cvGetSize(src).width;
int height = cvGetSize(src).height;
xshift = calc_shift(0,centerX-1,centerX,K);
float newcenterX = width-centerX;
float xshift_2 = calc_shift(0,newcenterX-1,newcenterX,K);
yshift = calc_shift(0,centerY-1,centerY,K);
float newcenterY = height-centerY;
float yshift_2 = calc_shift(0,newcenterY-1,newcenterY,K);
// scale = (centerX-xshift)/centerX;
xscale = (width-xshift-xshift_2)/width;
yscale = (height-yshift-yshift_2)/height;
std::cerr<<xshift<<" "<<yshift<<" "<<xscale<<" "<<yscale<<std::endl;
std::cerr<<cvGetSize(src).height<<std::endl;
std::cerr<<cvGetSize(src).width<<std::endl;
for(int j=0;j<cvGetSize(dst).height;j++){
for(int i=0;i<cvGetSize(dst).width;i++){
CvScalar s;
float x = getRadialX((float)i,(float)j,centerX,centerY,K);
float y = getRadialY((float)i,(float)j,centerX,centerY,K);
sampleImage(src,y,x,s);
cvSet2D(dst,j,i,s);
}
}
#if 0
cvNamedWindow( "Source1", 1 );
cvShowImage( "Source1", dst);
cvWaitKey(0);
#endif
cvSaveImage(argv[2],dst,0);
#if 0
for(int j=0;j<cvGetSize(src).height;j++){
for(int i=0;i<cvGetSize(src).width;i++){
CvScalar s;
sampleImage(src,j+0.25,i+0.25,s);
cvSet2D(dst,j,i,s);
}
}
cvNamedWindow( "Source1", 1 );
cvShowImage( "Source1", src);
cvWaitKey(0);
#endif
}

Thanks to the above 2 for this code. I've modified the above transcribed code in Java to use Bitmaps instead of BufferedImage. This enables the code to run on Android(which doesn't support AWT). I've also made the effect just manipulate the pixels in a circle rather than the whole Bitmap, this gives a fisheye "lens" effect. Hopes this helps any Android developers.
import android.graphics.Bitmap;
import android.util.Log;
class Filters{
float xscale;
float yscale;
float xshift;
float yshift;
int [] s;
private String TAG = "Filters";
public Filters(){
Log.e(TAG, "***********inside constructor");
}
public Bitmap barrel (Bitmap input, float k){
Log.e(TAG, "***********inside barrel method ");
float centerX=input.getWidth()/2; //center of distortion
float centerY=input.getHeight()/2;
int width = input.getWidth(); //image bounds
int height = input.getHeight();
Bitmap dst = Bitmap.createBitmap(width, height,input.getConfig() ); //output pic
Log.e(TAG, "***********dst bitmap created ");
xshift = calc_shift(0,centerX-1,centerX,k);
float newcenterX = width-centerX;
float xshift_2 = calc_shift(0,newcenterX-1,newcenterX,k);
yshift = calc_shift(0,centerY-1,centerY,k);
float newcenterY = height-centerY;
float yshift_2 = calc_shift(0,newcenterY-1,newcenterY,k);
xscale = (width-xshift-xshift_2)/width;
yscale = (height-yshift-yshift_2)/height;
Log.e(TAG, "***********about to loop through bm");
/*for(int j=0;j<dst.getHeight();j++){
for(int i=0;i<dst.getWidth();i++){
float x = getRadialX((float)i,(float)j,centerX,centerY,k);
float y = getRadialY((float)i,(float)j,centerX,centerY,k);
sampleImage(input,x,y);
int color = ((s[1]&0x0ff)<<16)|((s[2]&0x0ff)<<8)|(s[3]&0x0ff);
// System.out.print(i+" "+j+" \\");
dst.setPixel(i, j, color);
}
}*/
int origPixel; // the pixel in orig image
for(int j=0;j<dst.getHeight();j++){
for(int i=0;i<dst.getWidth();i++){
origPixel= input.getPixel(i,j);
float x = getRadialX((float)i,(float)j,centerX,centerY,k);
float y = getRadialY((float)i,(float)j,centerX,centerY,k);
sampleImage(input,x,y);
int color = ((s[1]&0x0ff)<<16)|((s[2]&0x0ff)<<8)|(s[3]&0x0ff);
// System.out.print(i+" "+j+" \\");
// check whether a pixel is within the circle bounds of 150
if( Math.sqrt( Math.pow(i - centerX, 2) + ( Math.pow(j - centerY, 2) ) ) <= 150 ){
dst.setPixel(i, j, color);
}else{
dst.setPixel(i,j,origPixel);
}
}
}
return dst;
}
void sampleImage(Bitmap arr, float idx0, float idx1)
{
s = new int [4];
if(idx0<0 || idx1<0 || idx0>(arr.getHeight()-1) || idx1>(arr.getWidth()-1)){
s[0]=0;
s[1]=0;
s[2]=0;
s[3]=0;
return;
}
float idx0_fl=(float) Math.floor(idx0);
float idx0_cl=(float) Math.ceil(idx0);
float idx1_fl=(float) Math.floor(idx1);
float idx1_cl=(float) Math.ceil(idx1);
int [] s1 = getARGB(arr,(int)idx0_fl,(int)idx1_fl);
int [] s2 = getARGB(arr,(int)idx0_fl,(int)idx1_cl);
int [] s3 = getARGB(arr,(int)idx0_cl,(int)idx1_cl);
int [] s4 = getARGB(arr,(int)idx0_cl,(int)idx1_fl);
float x = idx0 - idx0_fl;
float y = idx1 - idx1_fl;
s[0]= (int) (s1[0]*(1-x)*(1-y) + s2[0]*(1-x)*y + s3[0]*x*y + s4[0]*x*(1-y));
s[1]= (int) (s1[1]*(1-x)*(1-y) + s2[1]*(1-x)*y + s3[1]*x*y + s4[1]*x*(1-y));
s[2]= (int) (s1[2]*(1-x)*(1-y) + s2[2]*(1-x)*y + s3[2]*x*y + s4[2]*x*(1-y));
s[3]= (int) (s1[3]*(1-x)*(1-y) + s2[3]*(1-x)*y + s3[3]*x*y + s4[3]*x*(1-y));
}
int [] getARGB(Bitmap buf,int x, int y){
int rgb = buf.getPixel(y, x); // Returns by default ARGB.
int [] scalar = new int[4];
scalar[0] = (rgb >>> 24) & 0xFF;
scalar[1] = (rgb >>> 16) & 0xFF;
scalar[2] = (rgb >>> 8) & 0xFF;
scalar[3] = (rgb >>> 0) & 0xFF;
return scalar;
}
float getRadialX(float x,float y,float cx,float cy,float k){
x = (x*xscale+xshift);
y = (y*yscale+yshift);
float res = x+((x-cx)*k*((x-cx)*(x-cx)+(y-cy)*(y-cy)));
return res;
}
float getRadialY(float x,float y,float cx,float cy,float k){
x = (x*xscale+xshift);
y = (y*yscale+yshift);
float res = y+((y-cy)*k*((x-cx)*(x-cx)+(y-cy)*(y-cy)));
return res;
}
float thresh = 1;
float calc_shift(float x1,float x2,float cx,float k){
float x3 = (float)(x1+(x2-x1)*0.5);
float res1 = x1+((x1-cx)*k*((x1-cx)*(x1-cx)));
float res3 = x3+((x3-cx)*k*((x3-cx)*(x3-cx)));
if(res1>-thresh && res1 < thresh)
return x1;
if(res3<0){
return calc_shift(x3,x2,cx,k);
}
else{
return calc_shift(x1,x3,cx,k);
}
}
}
.
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.FutureTask;
import android.graphics.Bitmap;
import android.graphics.drawable.BitmapDrawable;
import android.os.Debug;
import android.util.Log;
public class MultiRuntimeProcessorFilter {
private static final String TAG = "mrpf";
private int x = 0;
private Bitmap input = null;
private int radius;
public void createBitmapSections(int nOp, int[] sections){
int processors = nOp;
int jMax = input.getHeight();
int aSectionSize = (int) Math.ceil(jMax/processors);
Log.e(TAG, "++++++++++ sections size = "+aSectionSize);
int k = 0;
for(int h=0; h<processors+1; h++){
sections[h] = k;
k+= aSectionSize;
}
}// end of createBitmapSections()
#SuppressWarnings("unchecked")
public Bitmap barrel (Bitmap input, float k, int r){
this.radius = r;
this.input = input;
int []arr = new int[input.getWidth()*input.getHeight()];
Log.e(TAG, "bitmap height = "+input.getHeight());
int nrOfProcessors = Runtime.getRuntime().availableProcessors();
Log.e(TAG, "no of processors = "+nrOfProcessors);
int[] sections = new int[nrOfProcessors+1];
createBitmapSections(nrOfProcessors,sections);
ExecutorService threadPool = Executors.newFixedThreadPool(nrOfProcessors);
for(int g=0; g<sections.length;g++){
Log.e(TAG, "++++++++++ sections= "+sections[g]);
}
// ExecutorService threadPool = Executors.newFixedThreadPool(nrOfProcessors);
Object[] task = new Object[nrOfProcessors];
for(int z = 0; z < nrOfProcessors; z++){
task[z] = (FutureTask<PartialResult>) threadPool.submit(new PartialProcessing(sections[z], sections[z+1] - 1, input, k));
Log.e(TAG, "++++++++++ task"+z+"= "+task[z].toString());
}
PartialResult[] results = new PartialResult[nrOfProcessors];
try{
for(int t = 0; t < nrOfProcessors; t++){
results[t] = ((FutureTask<PartialResult>) task[t]).get();
results[t].fill(arr);
}
}catch(Exception e){
e.printStackTrace();
}
Bitmap dst2 = Bitmap.createBitmap(arr,input.getWidth(),input.getHeight(),input.getConfig());
return dst2;
}//end of barrel()
public class PartialResult {
int startP;
int endP;
int[] storedValues;
public PartialResult(int startp, int endp, Bitmap input){
this.startP = startp;
this.endP = endp;
this.storedValues = new int[input.getWidth()*input.getHeight()];
}
public void addValue(int p, int result) {
storedValues[p] = result;
}
public void fill(int[] arr) {
for (int p = startP; p < endP; p++){
for(int b=0;b<radius;b++,x++)
arr[x] = storedValues[x];
}
Log.e(TAG, "++++++++++ x ="+x);
}
}//end of partialResult
public class PartialProcessing implements Callable<PartialResult> {
int startJ;
int endJ;
private int[] scalar;
private float xscale;
private float yscale;
private float xshift;
private float yshift;
private float thresh = 1;
private int [] s1;
private int [] s2;
private int [] s3;
private int [] s4;
private int [] s;
private Bitmap input;
private float k;
public PartialProcessing(int startj, int endj, Bitmap input, float k) {
this.startJ = startj;
this.endJ = endj;
this.input = input;
this.k = k;
s = new int[4];
scalar = new int[4];
s1 = new int[4];
s2 = new int[4];
s3 = new int[4];
s4 = new int[4];
}
int [] getARGB(Bitmap buf,int x, int y){
int rgb = buf.getPixel(y, x); // Returns by default ARGB.
// int [] scalar = new int[4];
// scalar[0] = (rgb >>> 24) & 0xFF;
scalar[1] = (rgb >>> 16) & 0xFF;
scalar[2] = (rgb >>> 8) & 0xFF;
scalar[3] = (rgb >>> 0) & 0xFF;
return scalar;
}
float getRadialX(float x,float y,float cx,float cy,float k){
x = (x*xscale+xshift);
y = (y*yscale+yshift);
float res = x+((x-cx)*k*((x-cx)*(x-cx)+(y-cy)*(y-cy)));
return res;
}
float getRadialY(float x,float y,float cx,float cy,float k){
x = (x*xscale+xshift);
y = (y*yscale+yshift);
float res = y+((y-cy)*k*((x-cx)*(x-cx)+(y-cy)*(y-cy)));
return res;
}
float calc_shift(float x1,float x2,float cx,float k){
float x3 = (float)(x1+(x2-x1)*0.5);
float res1 = x1+((x1-cx)*k*((x1-cx)*(x1-cx)));
float res3 = x3+((x3-cx)*k*((x3-cx)*(x3-cx)));
if(res1>-thresh && res1 < thresh)
return x1;
if(res3<0){
return calc_shift(x3,x2,cx,k);
}
else{
return calc_shift(x1,x3,cx,k);
}
}
void sampleImage(Bitmap arr, float idx0, float idx1)
{
// s = new int [4];
if(idx0<0 || idx1<0 || idx0>(arr.getHeight()-1) || idx1>(arr.getWidth()-1)){
s[0]=0;
s[1]=0;
s[2]=0;
s[3]=0;
return;
}
float idx0_fl=(float) Math.floor(idx0);
float idx0_cl=(float) Math.ceil(idx0);
float idx1_fl=(float) Math.floor(idx1);
float idx1_cl=(float) Math.ceil(idx1);
s1 = getARGB(arr,(int)idx0_fl,(int)idx1_fl);
s2 = getARGB(arr,(int)idx0_fl,(int)idx1_cl);
s3 = getARGB(arr,(int)idx0_cl,(int)idx1_cl);
s4 = getARGB(arr,(int)idx0_cl,(int)idx1_fl);
float x = idx0 - idx0_fl;
float y = idx1 - idx1_fl;
// s[0]= (int) (s1[0]*(1-x)*(1-y) + s2[0]*(1-x)*y + s3[0]*x*y + s4[0]*x*(1-y));
s[1]= (int) (s1[1]*(1-x)*(1-y) + s2[1]*(1-x)*y + s3[1]*x*y + s4[1]*x*(1-y));
s[2]= (int) (s1[2]*(1-x)*(1-y) + s2[2]*(1-x)*y + s3[2]*x*y + s4[2]*x*(1-y));
s[3]= (int) (s1[3]*(1-x)*(1-y) + s2[3]*(1-x)*y + s3[3]*x*y + s4[3]*x*(1-y));
}
#Override public PartialResult call() {
PartialResult partialResult = new PartialResult(startJ, endJ,input);
float centerX=input.getWidth()/2; //center of distortion
float centerY=input.getHeight()/2;
int width = input.getWidth(); //image bounds
int height = input.getHeight();
xshift = calc_shift(0,centerX-1,centerX,k);
float newcenterX = width-centerX;
float xshift_2 = calc_shift(0,newcenterX-1,newcenterX,k);
yshift = calc_shift(0,centerY-1,centerY,k);
float newcenterY = height-centerY;
float yshift_2 = calc_shift(0,newcenterY-1,newcenterY,k);
xscale = (width-xshift-xshift_2)/width;
yscale = (height-yshift-yshift_2)/height;
int p = startJ*radius;
int origPixel = 0;
int color = 0;
int i;
for (int j = startJ; j < endJ; j++){
for ( i = 0; i < width; i++, p++){
origPixel = input.getPixel(i,j);
float x = getRadialX((float)j,(float)i,centerX,centerY,k);
float y = getRadialY((float)j,(float)i,centerX,centerY,k);
sampleImage(input,x,y);
color = ((s[1]&0x0ff)<<16)|((s[2]&0x0ff)<<8)|(s[3]&0x0ff);
//Log.e(TAG, "radius = "+radius);
if(((i-centerX)*(i-centerX) + (j-centerY)*(j-centerY)) <= radius*(radius/4)){
partialResult.addValue(p, color);
}else{
partialResult.addValue(p, origPixel);
}
}//end of inner for
}//end of outer for
return partialResult;
}//end of call
}// end of partialprocessing
}//end of MultiProcesorFilter
#And_Dev as promised
Below is the view that gets the users touch co-ords and then calls the filter on a selected area. the selected area is the cord eg center of circle plus a radius(a circle). the code does this twice as its for a breast augmentation app:) Just comment out the HorizontalSlider code as you don't need this.
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import android.content.Context;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.graphics.Canvas;
import android.graphics.Color;
import android.graphics.Paint;
import android.graphics.PorterDuff.Mode;
import android.graphics.PorterDuffXfermode;
import android.os.Environment;
import android.util.AttributeSet;
import android.util.Log;
import android.view.MotionEvent;
import android.view.View;
import com.tecmark.HorizontalSlider.OnProgressChangeListener;
public class TouchView extends View{
private File tempFile;
private byte[] imageArray;
private Bitmap bgr;
private Bitmap crop;
private Bitmap crop2;
private Bitmap overLay;
private Bitmap overLay2;
private float centreX;
private float centreY;
private float centreA = 200;
private float centreB = 200;
private Boolean xyFound = false;
private int Progress = 1;
private static final String TAG = "*********TouchView";
private Filters f = null;
private boolean bothCirclesInPlace = false;
private MultiProcessorFilter mpf;
private MultiProcessorFilter mpf2;
private MultiRuntimeProcessorFilter mrpf;
private MultiRuntimeProcessorFilter mrpf2;
public TouchView(Context context) {
super(context);
}
public TouchView(Context context, AttributeSet attr) {
super(context,attr);
Log.e(TAG, "++++++++++ inside touchview constructor");
tempFile = new File(Environment.getExternalStorageDirectory().
getAbsolutePath() + "/"+"image.jpg");
imageArray = new byte[(int)tempFile.length()];
try{
InputStream is = new FileInputStream(tempFile);
BufferedInputStream bis = new BufferedInputStream(is);
DataInputStream dis = new DataInputStream(bis);
int i = 0;
while (dis.available() > 0) {
imageArray[i] = dis.readByte();
i++;
}
dis.close();
} catch (Exception e) {
e.printStackTrace();
}
Bitmap bm = BitmapFactory.decodeByteArray(imageArray, 0, imageArray.length);
bgr = bm.copy(bm.getConfig(), true);;
overLay = null;
overLay2 = null;
bm.recycle();
}// end of touchView constructor
public void findCirclePixels(){
// f = new Filters();
// mpf = new MultiProcessorFilter();
// mpf2 = new MultiProcessorFilter();
mrpf = new MultiRuntimeProcessorFilter();
mrpf2 = new MultiRuntimeProcessorFilter();
crop = Bitmap.createBitmap(bgr,Math.max((int)centreX-75,0),Math.max((int)centreY-75,0),150,150);
crop2 = Bitmap.createBitmap(bgr,Math.max((int)centreA-75,0),Math.max((int)centreB-75,0),150,150);
new Thread(new Runnable() {
public void run() {
float prog = (float)Progress/150001;
// final Bitmap bgr3 = f.barrel(crop,prog);
// final Bitmap bgr4 = f.barrel(crop2,prog);
// final Bitmap bgr3 = mpf.barrel(crop,prog);
// final Bitmap bgr4 = mpf2.barrel(crop2,prog);
final Bitmap bgr3 = mrpf.barrel(crop,prog);
final Bitmap bgr4 = mrpf2.barrel(crop2,prog);
TouchView.this.post(new Runnable() {
public void run() {
TouchView.this.overLay = bgr3;
TouchView.this.overLay2 = bgr4;
TouchView.this.invalidate();
}
});
}
}).start();
}// end of changePixel()
#Override
public boolean onTouchEvent(MotionEvent ev) {
switch (ev.getAction()) {
case MotionEvent.ACTION_DOWN: {
if(xyFound == false){
centreX = (int) ev.getX();
centreY = (int) ev.getY();
xyFound = true;
}else{
centreA = (int) ev.getX();
centreB = (int) ev.getY();
bothCirclesInPlace = true;
}
break;
}
/* case MotionEvent.ACTION_MOVE: {
if(xyFound == false){
centreX = (int) ev.getX();
centreY = (int) ev.getY();
xyFound = true;
}else{
centreA = (int) ev.getX();
centreB = (int) ev.getY();
bothCirclesInPlace = true;
}
findCirclePixels();
// TouchView.this.invalidate();
break;
}*/
case MotionEvent.ACTION_UP:
break;
}
return true;
}//end of onTouchEvent
public void initSlider(final HorizontalSlider slider)
{
slider.setOnProgressChangeListener(changeListener);
}
private OnProgressChangeListener changeListener = new OnProgressChangeListener() {
#Override
public void onProgressChanged(View v, int progress) {
setProgress(progress);
}
};
#Override
public void onDraw(Canvas canvas){
super.onDraw(canvas);
Log.e(TAG, "******about to draw bgr ");
canvas.drawBitmap(bgr, 0, 0, null);
if(bothCirclesInPlace == true){
if(overLay != null){
Log.e(TAG, "******about to draw overlay1 ");
canvas.drawBitmap(overLay, centreX-75, centreY-75, null);
}
if(overLay2 != null){
Log.e(TAG, "******about to draw overlay2 ");
canvas.drawBitmap(overLay2, centreA-75, centreB-75, null);
}
}
}//end of onDraw
protected void setProgress(int progress2) {
Log.e(TAG, "***********in SETPROGRESS");
this.Progress = progress2;
findCirclePixels();
}
}
.
The calling activity.
import android.app.Activity;
import android.os.AsyncTask;
import android.os.Bundle;
import android.util.Log;
import android.view.View;
import android.view.Window;
import android.view.WindowManager;
import android.view.View.OnClickListener;
import android.widget.Button;
public class Jjilapp extends Activity {
private static final String TAG = "*********jjil";
#Override
public void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
requestWindowFeature(Window.FEATURE_NO_TITLE);
getWindow().setFlags(WindowManager.LayoutParams.FLAG_FULLSCREEN,
WindowManager.LayoutParams.FLAG_FULLSCREEN);
setContentView(R.layout.touchview);
final TouchView touchView = (TouchView)findViewById(R.id.touchview);
final HorizontalSlider slider = (HorizontalSlider)findViewById(R.id.slider);
touchView.initSlider(slider);
}//end of oncreate
}
If you need any help mate just ask. hope this helps

Thanks to you for that code. It helps me a lot. I transcrypted it for Java. Maybe someone has a similar function for symulating tangencial distorsion?
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import javax.imageio.ImageIO;
import com.jhlabs.image.InterpolateFilter;
class Filters{
float xscale;
float yscale;
float xshift;
float yshift;
int [] s;
public Filters(){
}
public BufferedImage barrel (BufferedImage input, float k){
float centerX=input.getWidth()/2; //center of distortion
float centerY=input.getHeight()/2;
int width = input.getWidth(); //image bounds
int height = input.getHeight();
BufferedImage dst = new BufferedImage(width, height,BufferedImage.TYPE_INT_RGB); //output pic
xshift = calc_shift(0,centerX-1,centerX,k);
float newcenterX = width-centerX;
float xshift_2 = calc_shift(0,newcenterX-1,newcenterX,k);
yshift = calc_shift(0,centerY-1,centerY,k);
float newcenterY = height-centerY;
float yshift_2 = calc_shift(0,newcenterY-1,newcenterY,k);
xscale = (width-xshift-xshift_2)/width;
yscale = (height-yshift-yshift_2)/height;
for(int j=0;j<dst.getHeight();j++){
for(int i=0;i<dst.getWidth();i++){
float x = getRadialX((float)i,(float)j,centerX,centerY,k);
float y = getRadialY((float)i,(float)j,centerX,centerY,k);
sampleImage(input,x,y);
int color = ((s[1]&0x0ff)<<16)|((s[2]&0x0ff)<<8)|(s[3]&0x0ff);
// System.out.print(i+" "+j+" \\");
dst.setRGB(i, j, color);
}
}
return dst;
}
void sampleImage(BufferedImage arr, float idx0, float idx1)
{
s = new int [4];
if(idx0<0 || idx1<0 || idx0>(arr.getHeight()-1) || idx1>(arr.getWidth()-1)){
s[0]=0;
s[1]=0;
s[2]=0;
s[3]=0;
return;
}
float idx0_fl=(float) Math.floor(idx0);
float idx0_cl=(float) Math.ceil(idx0);
float idx1_fl=(float) Math.floor(idx1);
float idx1_cl=(float) Math.ceil(idx1);
int [] s1 = getARGB(arr,(int)idx0_fl,(int)idx1_fl);
int [] s2 = getARGB(arr,(int)idx0_fl,(int)idx1_cl);
int [] s3 = getARGB(arr,(int)idx0_cl,(int)idx1_cl);
int [] s4 = getARGB(arr,(int)idx0_cl,(int)idx1_fl);
float x = idx0 - idx0_fl;
float y = idx1 - idx1_fl;
s[0]= (int) (s1[0]*(1-x)*(1-y) + s2[0]*(1-x)*y + s3[0]*x*y + s4[0]*x*(1-y));
s[1]= (int) (s1[1]*(1-x)*(1-y) + s2[1]*(1-x)*y + s3[1]*x*y + s4[1]*x*(1-y));
s[2]= (int) (s1[2]*(1-x)*(1-y) + s2[2]*(1-x)*y + s3[2]*x*y + s4[2]*x*(1-y));
s[3]= (int) (s1[3]*(1-x)*(1-y) + s2[3]*(1-x)*y + s3[3]*x*y + s4[3]*x*(1-y));
}
int [] getARGB(BufferedImage buf,int x, int y){
int rgb = buf.getRGB(x, y); // Returns by default ARGB.
int [] scalar = new int[4];
scalar[0] = (rgb >>> 24) & 0xFF;
scalar[1] = (rgb >>> 16) & 0xFF;
scalar[2] = (rgb >>> 8) & 0xFF;
scalar[3] = (rgb >>> 0) & 0xFF;
return scalar;
}
float getRadialX(float x,float y,float cx,float cy,float k){
x = (x*xscale+xshift);
y = (y*yscale+yshift);
float res = x+((x-cx)*k*((x-cx)*(x-cx)+(y-cy)*(y-cy)));
return res;
}
float getRadialY(float x,float y,float cx,float cy,float k){
x = (x*xscale+xshift);
y = (y*yscale+yshift);
float res = y+((y-cy)*k*((x-cx)*(x-cx)+(y-cy)*(y-cy)));
return res;
}
float thresh = 1;
float calc_shift(float x1,float x2,float cx,float k){
float x3 = (float)(x1+(x2-x1)*0.5);
float res1 = x1+((x1-cx)*k*((x1-cx)*(x1-cx)));
float res3 = x3+((x3-cx)*k*((x3-cx)*(x3-cx)));
if(res1>-thresh && res1 < thresh)
return x1;
if(res3<0){
return calc_shift(x3,x2,cx,k);
}
else{
return calc_shift(x1,x3,cx,k);
}
}
}

I debugged the java files and it works fine on my phones(higher than 4.0). It consists of 3 java files and 1 xml file. You have to place checkerboardback.jpg file under drawaable directory. As someone said, alpha value was missing and I gave it "0x0ff". In addition, upperbound of some Looping were wrong.
//1. MultiRuntimeProcessorFilter.java
public class MultiRuntimeProcessorFilter {
private static final String TAG = "mrpf";
private int x = 0;
private Bitmap input = null;
private int radius;
private int mHeight;
public void createBitmapSections(int nOp, int[] sections){
int processors = nOp;
int jMax = input.getHeight();
int aSectionSize = (int) Math.ceil(jMax/processors);
Log.e("yoSIZECHK", "++++++++++ sections size = "+aSectionSize);
int k = 0;
for(int h=0; h<processors+1; h++){
sections[h] = k;
k+= aSectionSize;
if(h==processors){
sections[h] = mHeight;//Last must cover ceiling
}
Log.v("yoSEC","sections = "+h+" "+sections[h]);
}
}// end of createBitmapSections()
//#SuppressWarnings("unchecked")
public Bitmap barrel (Bitmap input, float k, int r){
this.radius = r;
this.input = input;
int []mArray = new int[input.getWidth()*input.getHeight()];
mHeight = input.getHeight();
Log.e(TAG, "bitmap height x width = "+mHeight+" "+input.getWidth());
//Log.v("yoRESULT", "height width = "+ input.getWidth()+" "+input.getHeight());
int nrOfProcessors = Runtime.getRuntime().availableProcessors();
Log.e(TAG, "no of processors = "+nrOfProcessors);
int[] sections = new int[nrOfProcessors+1];
createBitmapSections(nrOfProcessors,sections);
ExecutorService threadPool = Executors.newFixedThreadPool(nrOfProcessors);
for(int g=0; g<sections.length;g++){
Log.e(TAG, "++++++++++ sections= "+sections[g]);
}
// ExecutorService threadPool = Executors.newFixedThreadPool(nrOfProcessors);
Object[] task = new Object[nrOfProcessors];
for(int z = 0; z < nrOfProcessors; z++){
task[z] = (FutureTask<PartialResult>) threadPool.submit(new PartialProcessing(sections[z], sections[z+1] - 1, input, k, z));
Log.e(TAG, "++++++++++ task"+z+"= "+task[z].toString());
}
PartialResult[] results = new PartialResult[nrOfProcessors];
try{
for(int t = 0; t < nrOfProcessors; t++){
results[t] = ((FutureTask<PartialResult>) task[t]).get();
results[t].fill(mArray);
}
}catch(Exception e){
e.printStackTrace();
}
Log.v("yoRESULT", "height width = "+ input.getHeight()+" "+input.getWidth());
Bitmap dst2 = Bitmap.createBitmap(mArray,input.getWidth(),input.getHeight(),input.getConfig());
return dst2;
}//end of barrel()
public class PartialResult {
int startP;
int endP;
int[] storedValues;
public PartialResult(int startp, int endp, Bitmap input){
this.startP = startp;
this.endP = endp;
this.storedValues = new int[input.getWidth()*input.getHeight()];
}
public void addValue(int p, int result) {
storedValues[p] = result;
}
public void fill(int[] mArray) {
Log.v("yo09", startP + " " + endP + " " + input.getWidth());
//yoko for (int p = startP; p < endP; p++){
for (int p = startP; p < endP+1; p++){
//for(int b=0;b<radius;b++,x++)
for(int b=0;b<input.getWidth();b++,x++) {
mArray[x] = storedValues[x];
if (b == 0) Log.v("yoyoyo", p+" + " + storedValues[x]);
}
}
Log.e("yoFill", " ++++++++++ radius x = "+radius+" "+x);
}
}//end of partialResult
public class PartialProcessing implements Callable<PartialResult> {
int startJ;
int endJ;
int mID;
private int[] scalar;
private float xscale;
private float yscale;
private float xshift;
private float yshift;
private float thresh = 1;
private int [] s1;
private int [] s2;
private int [] s3;
private int [] s4;
private int [] s;
private Bitmap input;
private float k;
public PartialProcessing(int startj, int endj, Bitmap input, float k, int mID) {
this.startJ = startj;
this.endJ = endj;
this.input = input;
this.k = k;
this.mID = mID;
s = new int[4];
scalar = new int[4];
s1 = new int[4];
s2 = new int[4];
s3 = new int[4];
s4 = new int[4];
}
int [] getARGB(Bitmap buf,int x, int y){
int rgb = buf.getPixel(y, x); // Returns by default ARGB.
// int [] scalar = new int[4];
// scalar[0] = (rgb >>> 24) & 0xFF;
scalar[1] = (rgb >>> 16) & 0xFF;
scalar[2] = (rgb >>> 8) & 0xFF;
scalar[3] = (rgb >>> 0) & 0xFF;
return scalar;
}
float getRadialX(float x,float y,float cx,float cy,float k){
x = (x*xscale+xshift);
y = (y*yscale+yshift);
float res = x+((x-cx)*k*((x-cx)*(x-cx)+(y-cy)*(y-cy)));
return res;
}
float getRadialY(float x,float y,float cx,float cy,float k){
x = (x*xscale+xshift);
y = (y*yscale+yshift);
float res = y+((y-cy)*k*((x-cx)*(x-cx)+(y-cy)*(y-cy)));
return res;
}
float calc_shift(float x1,float x2,float cx,float k){
float x3 = (float)(x1+(x2-x1)*0.5);
float res1 = x1+((x1-cx)*k*((x1-cx)*(x1-cx)));
float res3 = x3+((x3-cx)*k*((x3-cx)*(x3-cx)));
if(res1>-thresh && res1 < thresh)
return x1;
if(res3<0){
return calc_shift(x3,x2,cx,k);
}
else{
return calc_shift(x1,x3,cx,k);
}
}
//void sampleImage(Bitmap mArray, float idx0, float idx1)
int [] sampleImage(Bitmap mArray2, float idx0, float idx1)
{
// s = new int [4];
if(idx0<0 || idx1<0 || idx0>(mArray2.getHeight()-1) || idx1>(mArray2.getWidth()-1)){
s[0]=0;
s[1]=0;
s[2]=0;
s[3]=0;
return s;// yoko
}
float idx0_fl=(float) Math.floor(idx0);
float idx0_cl=(float) Math.ceil(idx0);
float idx1_fl=(float) Math.floor(idx1);
float idx1_cl=(float) Math.ceil(idx1);
s1 = getARGB(mArray2,(int)idx0_fl,(int)idx1_fl);
s2 = getARGB(mArray2,(int)idx0_fl,(int)idx1_cl);
s3 = getARGB(mArray2,(int)idx0_cl,(int)idx1_cl);
s4 = getARGB(mArray2,(int)idx0_cl,(int)idx1_fl);
float x = idx0 - idx0_fl;
float y = idx1 - idx1_fl;
// s[0]= (int) (s1[0]*(1-x)*(1-y) + s2[0]*(1-x)*y + s3[0]*x*y + s4[0]*x*(1-y));
s[1]= (int) (s1[1]*(1-x)*(1-y) + s2[1]*(1-x)*y + s3[1]*x*y + s4[1]*x*(1-y));
s[2]= (int) (s1[2]*(1-x)*(1-y) + s2[2]*(1-x)*y + s3[2]*x*y + s4[2]*x*(1-y));
s[3]= (int) (s1[3]*(1-x)*(1-y) + s2[3]*(1-x)*y + s3[3]*x*y + s4[3]*x*(1-y));
return s;
}
#Override
public PartialResult call() {
PartialResult partialResult = new PartialResult(startJ, endJ,input);
float centerX=input.getWidth()/2; //center of distortion
float centerY=input.getHeight()/2;
int width = input.getWidth(); //image bounds
int height = input.getHeight();
xshift = calc_shift(0,centerX-1,centerX,k);
float newcenterX = width-centerX;
float xshift_2 = calc_shift(0,newcenterX-1,newcenterX,k);
yshift = calc_shift(0,centerY-1,centerY,k);
float newcenterY = height-centerY;
float yshift_2 = calc_shift(0,newcenterY-1,newcenterY,k);
xscale = (width-xshift-xshift_2)/width;
yscale = (height-yshift-yshift_2)/height;
// yoko int p = startJ*radius;
int p = startJ*width;//yoko
int origPixel = 0;
int color = 0;
int i;
Log.v("yokoIJ","PartialResult startJ endJ "+startJ+" "+endJ);
//yoko for (int j = startJ; j < endJ; j++){
for (int j = startJ; j < endJ+1; j++){
for ( i = 0; i < width; i++, p++){
s = new int [4];//yoko added
origPixel = input.getPixel(i,j);
float x = getRadialX((float)j,(float)i,centerX,centerY,k);
float y = getRadialY((float)j,(float)i,centerX,centerY,k);
//sampleImage(input,x,y); //yoko
s= sampleImage(input,x,y);
color = (0xff<<24)|((s[1]&0x0ff)<<16)|((s[2]&0x0ff)<<8)|(s[3]&0x0ff);
//Log.e(TAG, "radius = "+radius);
//Not understand why it is not radius but radius/2
//yoko if(((i-centerX)*(i-centerX) + (j-centerY)*(j-centerY)) <= radius*(radius/4)){
if(((i-centerX)*(i-centerX) + (j-centerY)*(j-centerY)) <= radius*radius){
//yo if(j%10 == 1 && i%10 == 1)
//yo Log.v("yoJI", mID+" "+j + " " + i );
partialResult.addValue(p, color);
}else{
partialResult.addValue(p, origPixel);
}
}//end of inner for
}//end of outer for
return partialResult;
}//end of call
}// end of partialprocessing
}//end of MultiProcesorFilter
// 2. Filters.java:
class Filters{
float xscale;
float yscale;
float xshift;
float yshift;
int [] s;
private static String TAG = "Filters";
public Filters(){
Log.e(TAG, "***********inside constructor");
}
public Bitmap barrel (Bitmap input, float k, boolean check, int Range){
Log.e(TAG, "***********inside barrel method : hasAlpha = ");
float centerX=input.getWidth()/2; //center of distortion
float centerY=input.getHeight()/2;
int width = input.getWidth(); //image bounds
int height = input.getHeight();
//yoko Log.v("yoQQ", width+" "+height+" "+centerX+" "+centerY);
if(check)return input;
Bitmap dst = Bitmap.createBitmap(width, height,input.getConfig() ); //output pic
Log.e(TAG, "***********dst bitmap created ");
xshift = calc_shift(0,centerX-1,centerX,k);
float newcenterX = width-centerX;
float xshift_2 = calc_shift(0,newcenterX-1,newcenterX,k);
yshift = calc_shift(0,centerY-1,centerY,k);
float newcenterY = height-centerY;
float yshift_2 = calc_shift(0,newcenterY-1,newcenterY,k);
xscale = (width-xshift-xshift_2)/width;
yscale = (height-yshift-yshift_2)/height;
Log.e(TAG, "***********about to loop through bm");
Log.v("yoQQ2", xscale + " " + yscale);
//if(check==1)return input;//yoko
/*for(int j=0;j<dst.getHeight();j++){
for(int i=0;i<dst.getWidth();i++){
float x = getRadialX((float)i,(float)j,centerX,centerY,k);
float y = getRadialY((float)i,(float)j,centerX,centerY,k);
sampleImage(input,x,y);
int color = ((s[1]&0x0ff)<<16)|((s[2]&0x0ff)<<8)|(s[3]&0x0ff);
// System.out.print(i+" "+j+" \\");
dst.setPixel(i, j, color);
}
}*/
int origPixel; // the pixel in orig image
int i=0,j=0;
for(j=0;j<dst.getHeight();j++){
for(i=0;i<dst.getWidth();i++){
s = new int [4];//yoko added
origPixel= input.getPixel(i,j);
float x = getRadialX((float)i,(float)j,centerX,centerY,k);
float y = getRadialY((float)i,(float)j,centerX,centerY,k);
//yoko sampleImage(input,x,y);
s = sampleImage(input,x,y);
//yoko int color = ((s[1]&0x0ff)<<16)|((s[2]&0x0ff)<<8)|(s[3]&0x0ff);
int color = (0xff<<24)|((s[1]&0xff)<<16)|((s[2]&0xff)<<8)|(s[3]&0xff);
//Log.v("yoQQ3", j + " " + i + " : "+dst.getHeight()+" "+dst.getWidth());
// check whether a pixel is within the circle bounds of 150
if( Math.sqrt( Math.pow(i - centerX, 2) + ( Math.pow(j - centerY, 2) ) ) <= Range ){
dst.setPixel(i, j, color);
//if(j%10 == 1 && i%10 == 1)
// Log.v("yoJI", j + " " + i );
}else{
dst.setPixel(i,j,origPixel);
}
}
}
Log.v("yoDONE", "======== Loop End ======== "+j+" "+i+" : " + dst.getHeight()+" "+dst.getWidth());
return dst;
}//barrel
// void sampleImage(Bitmap arr, float idx0, float idx1) // yoko
int[] sampleImage(Bitmap arr, float idx0, float idx1)
{
s = new int [4];
if(idx0<0 || idx1<0 || idx0>(arr.getHeight()-1) || idx1>(arr.getWidth()-1)){
s[0]=0;
s[1]=0;
s[2]=0;
s[3]=0;
return s;
}
float idx0_fl=(float) Math.floor(idx0);
float idx0_cl=(float) Math.ceil(idx0);
float idx1_fl=(float) Math.floor(idx1);
float idx1_cl=(float) Math.ceil(idx1);
int [] s1 = getARGB(arr,(int)idx0_fl,(int)idx1_fl);
int [] s2 = getARGB(arr,(int)idx0_fl,(int)idx1_cl);
int [] s3 = getARGB(arr,(int)idx0_cl,(int)idx1_cl);
int [] s4 = getARGB(arr,(int)idx0_cl,(int)idx1_fl);
float x = idx0 - idx0_fl;
float y = idx1 - idx1_fl;
s[0]= (int) (s1[0]*(1-x)*(1-y) + s2[0]*(1-x)*y + s3[0]*x*y + s4[0]*x*(1-y));
s[1]= (int) (s1[1]*(1-x)*(1-y) + s2[1]*(1-x)*y + s3[1]*x*y + s4[1]*x*(1-y));
s[2]= (int) (s1[2]*(1-x)*(1-y) + s2[2]*(1-x)*y + s3[2]*x*y + s4[2]*x*(1-y));
s[3]= (int) (s1[3]*(1-x)*(1-y) + s2[3]*(1-x)*y + s3[3]*x*y + s4[3]*x*(1-y));
return s;///yoko added to make return the result value
}//sampleImage
int [] getARGB(Bitmap buf,int x, int y){
int rgb = buf.getPixel(y, x); // Returns by default ARGB.
int [] scalar = new int[4];
scalar[0] = (rgb >>> 24) & 0xFF;
scalar[1] = (rgb >>> 16) & 0xFF;
scalar[2] = (rgb >>> 8) & 0xFF;
scalar[3] = (rgb >>> 0) & 0xFF;
return scalar;
}//getARGB
float getRadialX(float x,float y,float cx,float cy,float k){
x = (x*xscale+xshift);
y = (y*yscale+yshift);
float res = x+((x-cx)*k*((x-cx)*(x-cx)+(y-cy)*(y-cy)));
return res;
}//getRadial1X
float getRadialY(float x,float y,float cx,float cy,float k){
x = (x*xscale+xshift);
y = (y*yscale+yshift);
float res = y+((y-cy)*k*((x-cx)*(x-cx)+(y-cy)*(y-cy)));
return res;
}//getRadialY
float thresh = 1;
float calc_shift(float x1,float x2,float cx,float k){
float x3 = (float)(x1+(x2-x1)*0.5);
float res1 = x1+((x1-cx)*k*((x1-cx)*(x1-cx)));
float res3 = x3+((x3-cx)*k*((x3-cx)*(x3-cx)));
if(res1>-thresh && res1 < thresh)
return x1;
if(res3<0){
return calc_shift(x3,x2,cx,k);
}
else{
return calc_shift(x1,x3,cx,k);
}
}//calc_shift
}
And
//3 MainActivity.java, toplevel class.
public class MainActivity extends Activity {
ImageView iv1=null;
ImageView iv2=null;
Button bT, bB, b0;
Bitmap bitmap1, bitmap2, bitmapSP;
Boolean view1 = true;
private static final String TAG = "*********jjil";
public static int mH,mW,RADIUS;
#Override
public void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
requestWindowFeature(Window.FEATURE_NO_TITLE);
getWindow().setFlags(WindowManager.LayoutParams.FLAG_FULLSCREEN,
WindowManager.LayoutParams.FLAG_FULLSCREEN);
setContentView(R.layout.activity_main);
Resources res = this.getResources();
//bitmap1 = BitmapFactory.decodeResource(res, R.drawable.checkerboard);
bitmap1 = BitmapFactory.decodeResource(res, R.drawable.checkerboardback);
mH=bitmap1.getHeight();
mW=bitmap1.getWidth();
RADIUS = mH/3;
bT = (Button)findViewById(R.id.buttontoggle);
bT.setOnClickListener(onClickToggleView);
bB = (Button)findViewById(R.id.buttonbarrel);
bB.setOnClickListener(onClickToggleView);
b0 = (Button)findViewById(R.id.button0);
b0.setOnClickListener(onClickToggleView);
iv1=(ImageView)findViewById(R.id.touchview1);
iv1.setImageBitmap(bitmap1);
iv1.setVisibility(View.VISIBLE);
}//end of oncreate
public View.OnClickListener onClickToggleView = new View.OnClickListener() {
public void onClick(View v) {
if (v == bT) {
/// fromhere
new AsyncTask<Void, Void, String>() {
com.example.owner.opengl2.Filters mFilers = new com.example.owner.opengl2.Filters();
TextView tx = (TextView)findViewById(R.id.mStatus);
Bitmap bitmapSP;long start,end;
protected void onPreExecute() {
start = System.nanoTime();
iv1.setImageBitmap(bitmap1);
tx.setText("- Running -");
}
protected String doInBackground(Void... params) {
bitmapSP = mFilers.barrel(bitmap1,(float)0.00005,false,RADIUS);
return "message";
}
protected void onPostExecute(String msg) {
end = System.nanoTime();
long elapsedTime = end - start;
long seconds = elapsedTime / 1000000;
iv1.setImageBitmap(bitmapSP);
tx.setText("- READY : ElapsedTime(ms) = "+seconds);
// Post Code
// Use `msg` in code
}
}.execute();
///upto here
} else if (v == bB){
/// fromhere
new AsyncTask<Void, Void, String>() {
com.example.owner.opengl2.MultiRuntimeProcessorFilter mFilers = new com.example.owner.opengl2.MultiRuntimeProcessorFilter();
TextView tx = (TextView)findViewById(R.id.mStatus);
Bitmap bitmapSP;long start,end;
protected void onPreExecute() {
start = System.nanoTime();
iv1.setImageBitmap(bitmap1);
tx.setText("- Running -");
}
protected String doInBackground(Void... params) {
bitmapSP = mFilers.barrel(bitmap1,(float)0.00005,RADIUS);
return "message";
}
protected void onPostExecute(String msg) {
end = System.nanoTime();
long elapsedTime = end - start;
//double seconds = (double)elapsedTime / 1000000000.0;
long seconds = elapsedTime / 1000000;
iv1.setImageBitmap(bitmapSP);
tx.setText("- READY : ElapsedTime(ms) = "+seconds);
// Post Code
// Use `msg` in code
}
}.execute();
} else if (v == b0){
new AsyncTask<Void, Void, String>() {
protected String doInBackground(Void... Unused) {
return "OK";
}
protected void onPostExecute(String message) {
Log.v("YO", "---------------------------------");
Log.v("YO", "----------ORIGINAL SHAPE-------- "+message);
Log.v("YO", "---------------------------------");
iv1.setImageBitmap(bitmap1);
TextView tx = (TextView)findViewById(R.id.mStatus);
tx.setText("- READY : w h RADIUS = "+mW+" "+mH+" "+RADIUS);
}
}.execute();
}
///upto here
}
};
}
Here is XML file
//4 activity_main.xml
<RelativeLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools" android:layout_width="match_parent"
android:layout_height="match_parent" android:paddingLeft="#dimen/activity_horizontal_margin"
android:paddingRight="#dimen/activity_horizontal_margin"
android:paddingTop="#dimen/activity_vertical_margin"
android:orientation="vertical"
android:paddingBottom="#dimen/activity_vertical_margin" tools:context=".MainActivity">
<LinearLayout
android:id="#+id/buttons"
android:layout_centerHorizontal="true"
android:layout_alignParentTop="true"
android:orientation="horizontal"
android:layout_width="wrap_content"
android:layout_height="wrap_content">
<Button
android:id="#+id/buttontoggle"
android:text="Barrel 1P"
android:layout_width="wrap_content"
android:layout_height="wrap_content" />
<Button
android:id="#+id/buttonbarrel"
android:text="Barrele NP"
android:layout_width="wrap_content"
android:layout_height="wrap_content" />
<Button
android:id="#+id/button0"
android:text="ORIGINAL"
android:layout_width="wrap_content"
android:layout_height="wrap_content" />
</LinearLayout>
<TextView
android:id="#+id/mStatus"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text=" - Ready - "
android:textAppearance="?android:attr/textAppearanceSmall"
android:layout_below="#+id/buttons"
android:layout_centerHorizontal="true"
/>
<ImageView
android:id="#+id/touchview1"
android:layout_below="#+id/mStatus"
android:layout_width="match_parent"
android:layout_height="match_parent"
android:layout_centerHorizontal="true"
/>
<!--ImageView
android:id="#+id/touchview2"
android:layout_below="#+id/touchview1"
android:layout_alignParentBottom="true"
android:layout_width="match_parent"
android:layout_height="match_parent"
android:layout_centerHorizontal="true" /-->

Do you want to use this distortion on sintetic images, or do you want to apply to a video camera or something ?
In OpenCv you should be able to do camera calibration (using the built-in functions, Zhang's algorithm) ..
In OpenGL see this.
Regards

Develop Reference

ios ruby-on-rails asp.net-mvc docker delphi jenkins grails google-sheets machine-learning dart

Tensorflow.js: WebGL context lost when concatenating tensors - webgl

Related

How can I calculate the mean and variance value of an image with 16 channels using Metal Shader Lanuage

WTV in Opencv's Opencl code for Image resizing

Need help in Parallelizing if and else condition in CUDA C program

Pixel Shader performance on xbox

How to simulate fisheye lens effect by openCV?

Categories

Resources