is there is way to use custom model in ml5 yolo()? - machine-learning

since ml5 yolo() mention that
"this implementation is heavily derived from ModelDepot".
It didn't mention which pre-train model it is using or how can you use your own trained model
let video;
let yolo;
let status;
let objects = [];
function setup() {
createCanvas(320, 240);
video = createCapture(VIDEO);
video.size(320, 240);
// Create a YOLO method
yolo = ml5.YOLO(video, startDetecting);
// Hide the original video
video.hide();
status = select('#status');
}
function draw() {
image(video, 0, 0, width, height);
for (let i = 0; i < objects.length; i++) {
noStroke();
fill(0, 255, 0);
text(objects[i].className, objects[i].x * width, objects[i].y * height - 5);
noFill();
strokeWeight(4);
stroke(0, 255, 0);
rect(objects[i].x * width, objects[i].y * height, objects[i].w * width, objects[i].h * height);
}
}
function startDetecting() {
status.html('Model loaded!');
detect();
}
function detect() {
yolo.detect(function(err, results) {
objects = results;
detect();
});
}

I've been reading yolo documentation and I kinda found a way:
ml5-yolo-library -> Heavily derived from https://github.com/ModelDepot/tfjs-yolo-tiny (ModelDepot: modeldepot.io)
here -> https://modeldepot.io/mikeshi/tiny-yolo-in-javascript
says -> This model was created from taking the original Darknet Tiny YOLO cfg and weights, converting it into Keras via YAD2K, and then using tensorflowjs_converter to convert it into the Tensorflow.js format.
So, i'm trying to do the same as you right now. hope i'll find a way

Related

what is the correct way to use gl.readPixels?

I want to get the pixel-data from a Three.js demo.
As far as I know, there are 2 way to proceed :
1) draw the webGl-canvas inside a 2D-canvas and use Context2D.getImageData like that :
var canvas = document.createElement("canvas");
var ctx = canvas.getContext("2d");
ctx.drawImage(renderer.domElement,0,0);
var data = ctx.getImageData(0,0,w,h).data;
2) use directly the context3D with readPixels, like that :
var ctx = renderer.domElement.getContext("webgl");
var data = new UInt8Array(w*h*4);
ctx.readPixels(0, 0, w,h, ctx.RGBA, ctx.UNSIGNED_BYTE, data);
These 2 way to proceed works and give the same results but the second one is almost 2 times slower than the one using context2d.getImageData.
Sounds very weird to me. How the fact to draw the 3D-stuff into a 2D-canvas could be faster than using the context3D directly ? I don't understand and I'm almost sure I don't use gl.readPixels correctly.
Then my question is : how to use gl.readPixels in order to be faster than context2d.drawImage + context2d.getImageData ?
I tryed to used a Float32Array like that
var ctx = renderer.domElement.getContext("webgl");
var data = new Float32Array(w*h*4);
ctx.readPixels(0, 0, w,h, ctx.RGBA, ctx.FLOAT, data);
I thought it should be faster since there is no conversion from Float to UInt8 but it looks like it doesn't work like that because my 'data' array stay empty after the call of ctx.readPixels
Thank you for your help !
(please excuse me if my english is not perfect, it's not my native language)
On my machine I get readPixels as 2x to 20x faster than drawImage/getImageData. Tested on MacOS Chrome, Firefox, well as Windows 10 Chrome, and Firefox. Safari came out readPixels as slower. Sounds like a bug in Safari and in fact checking Safari Technology Preview Release 46, as expected, readPixels is 3x to 1.2x faster than drawImage/getImageData
const gl = document.createElement("canvas").getContext("webgl");
const ctx = document.createElement("canvas").getContext("2d");
const w = 512;
const h = 512;
gl.canvas.width = w;
gl.canvas.height = h;
ctx.canvas.width = w;
ctx.canvas.height = h;
const readPixelBuffer = new Uint8Array(w * h * 4);
const tests = [
{ fn: withReadPixelsPreAlloc, msg: "readPixelsPreAlloc", },
{ fn: withReadPixels, msg: "readPixels", },
{ fn: withDrawImageGetImageData, msg: "drawImageGetPixels", },
];
let ndx = 0;
runNextTest();
function runNextTest() {
if (ndx >= tests.length) {
return;
}
const test = tests[ndx++];
// use setTimeout to give the browser a change to
// do something between tests
setTimeout(function() {
log(test.msg, "iterations in 5 seconds:", runTest(test.fn));
runNextTest();
}, 0);
}
function runTest(fn) {
const start = performance.now();
let count = 0;
for (;;) {
const elapsed = performance.now() - start;
if (elapsed > 5000) {
break;
}
fn();
++count;
}
return count;
}
function withReadPixelsPreAlloc() {
gl.readPixels(0, 0, w, h, gl.RGBA, gl.UNSIGNED_BYTE, readPixelBuffer);
}
function withReadPixels() {
const readPixelBuffer = new Uint8Array(w * h * 4);
gl.readPixels(0, 0, w, h, gl.RGBA, gl.UNSIGNED_BYTE, readPixelBuffer);
}
function withDrawImageGetImageData() {
ctx.drawImage(gl.canvas, 0, 0);
ctx.getImageData(0, 0, w, h);
}
function log(...args) {
const elem = document.createElement("pre");
elem.textContent = [...args].join(' ');
document.body.appendChild(elem);
}
As for converting to float the canvas itself is stored in bytes. There is no conversion to float and you likely got a GL error
const gl = document.createElement("canvas").getContext("webgl");
const buf = new Float32Array(4);
gl.readPixels(0, 0, 1, 1, gl.RGBA, gl.FLOAT, buf);
log("ERROR:", glEnumToString(gl, gl.getError()));
function log(...args) {
const elem = document.createElement("pre");
elem.textContent = [...args].join(' ');
document.body.appendChild(elem);
}
function glEnumToString(gl, val) {
if (val === 0) { return 'NONE'; }
for (key in gl) {
if (gl[key] === val) {
return key;
}
}
return `0x${val.toString(16)}`;
}
Checking the console I see the error is
WebGL: INVALID_ENUM: readPixels: invalid type

Running a DX11 compute shader with SharpDX - cannot get results

I am trying to run a compute shader and get the resulting texture using SharpDX.
From what I understood, I need to:
1. Create a texture to set as an output to the shader.
2. Set the above texture as an unordered access view so I can write to it.
3. Run the shader
4. Copy the UAV texture to a staging texture so it can be accessed by the CPU
5. Read the staging texture to a Bitmap
The problem is that whatever I do, the result is a black bitmap. I don't think the bug is in the Texture2D -> Bitmap conversion code as printing the first pixel directly from the staging texture also gives me 0.
This is my shader code:
RWTexture2D<float4> Output : register(u0);
[numthreads(32, 32, 1)]
void main(uint3 id : SV_DispatchThreadID) {
Output[id.xy] = float4(0, 1.0, 0, 1.0);
}
Using the MS DX11 docs and blogs, I pieced together this code to run the texture:
public class GPUScreenColor {
private int adapterIndex = 0;
private Adapter1 gpu;
private Device device;
private ComputeShader computeShader;
private Texture2D texture;
private Texture2D stagingTexture;
private UnorderedAccessView view;
public GPUScreenColor() {
initializeDirectX();
}
private void initializeDirectX() {
using (var factory = new Factory1()) {
gpu = factory.GetAdapter1(adapterIndex);
}
device = new Device(gpu, DeviceCreationFlags.Debug, FeatureLevel.Level_11_1);
var compilationResult = ShaderBytecode.CompileFromFile("test.hlsl", "main", "cs_5_0", ShaderFlags.Debug);
computeShader = new ComputeShader(device, compilationResult.Bytecode);
texture = new Texture2D(device, new Texture2DDescription() {
BindFlags = BindFlags.UnorderedAccess | BindFlags.ShaderResource,
Format = Format.R8G8B8A8_UNorm,
Width = 1024,
Height = 1024,
OptionFlags = ResourceOptionFlags.None,
MipLevels = 1,
ArraySize = 1,
SampleDescription = { Count = 1, Quality = 0 }
});
UnorderedAccessView view = new UnorderedAccessView(device, texture, new UnorderedAccessViewDescription() {
Format = Format.R8G8B8A8_UNorm,
Dimension = UnorderedAccessViewDimension.Texture2D,
Texture2D = { MipSlice = 0 }
});
stagingTexture = new Texture2D(device, new Texture2DDescription {
CpuAccessFlags = CpuAccessFlags.Read,
BindFlags = BindFlags.None,
Format = Format.R8G8B8A8_UNorm,
Width = 1024,
Height = 1024,
OptionFlags = ResourceOptionFlags.None,
MipLevels = 1,
ArraySize = 1,
SampleDescription = { Count = 1, Quality = 0 },
Usage = ResourceUsage.Staging
});
}
public Bitmap getBitmap() {
device.ImmediateContext.ComputeShader.Set(computeShader);
device.ImmediateContext.ComputeShader.SetUnorderedAccessView(0, view);
device.ImmediateContext.Dispatch(32, 32, 1);
device.ImmediateContext.CopyResource(texture, stagingTexture);
var mapSource = device.ImmediateContext.MapSubresource(stagingTexture, 0, MapMode.Read, MapFlags.None);
Console.WriteLine(Marshal.ReadInt32(IntPtr.Add(mapSource.DataPointer, 0)));
try {
// Copy pixels from screen capture Texture to GDI bitmap
Bitmap bitmap = new Bitmap(1024, 1024, System.Drawing.Imaging.PixelFormat.Format32bppRgb);
BitmapData mapDest = bitmap.LockBits(new Rectangle(0, 0, 1024, 1024), ImageLockMode.ReadWrite, bitmap.PixelFormat);
try {
var sourcePtr = mapSource.DataPointer;
var destPtr = mapDest.Scan0;
for (int y = 0; y < 1024; y++) {
// Copy a single line
Utilities.CopyMemory(destPtr, sourcePtr, 1024 * 4);
// Advance pointers
sourcePtr = IntPtr.Add(sourcePtr, mapSource.RowPitch);
destPtr = IntPtr.Add(destPtr, mapDest.Stride);
}
return bitmap;
} finally {
bitmap.UnlockBits(mapDest);
}
} finally {
device.ImmediateContext.UnmapSubresource(stagingTexture, 0);
}
}
}
I am pretty new to shaders so it may be something obvious...
First thing, you create your UAV as a local :
UnorderedAccessView view = new UnorderedAccessView(....
So the field is then null, replacing by
view = new UnorderedAccessView(....
will solve the first issue.
Second, it's quite likely that the runtime will complain about types (debug will give you something like :
The resource return type for component 0 declared in the shader code (FLOAT) is not compatible with the resource type bound to Unordered Access View slot 0 of the Compute Shader unit (UNORM).
Some cards might do something (fix it silently), some might do nothing, some might crash :)
Problem is that RWTexture2D does not match UNORM format (as you specify flating point format here).
You need to enforce your RWTexture to be specifically of unorm format eg (yes runtime can be that picky):
RWTexture2D<unorm float4> Output : register(u0);
Then your whole setup should work (PS: I did not check the bitmap code, but I doubled checked that the shader is running without error and first pixel is matching)

Contour position with "findcontour" opencv on processing

I'm working on a project where I have to use a webcam, an arduino, a raspberry and an IR proximity sensor. I arrived to do everything with some help of google. But I have a big problem that's really I think.
I'm using OpenCV library on processing and I'd like the contours that get by the webcam be in the center of the sketch. But All only arrived to move the video and not the contours here's my code.
I hope you'll could help me :)
All the best
Alexandre
////////////////////////////////////////////
////////////////////////////////// LIBRARIES
////////////////////////////////////////////
import processing.serial.*;
import gab.opencv.*;
import processing.video.*;
/////////////////////////////////////////////////
////////////////////////////////// INITIALIZATION
/////////////////////////////////////////////////
Movie mymovie;
Capture video;
OpenCV opencv;
Contour contour;
////////////////////////////////////////////
////////////////////////////////// VARIABLES
////////////////////////////////////////////
int lf = 10; // Linefeed in ASCII
String myString = null;
Serial myPort; // The serial port
int sensorValue = 0;
int x = 300;
/////////////////////////////////////////////
////////////////////////////////// VOID SETUP
/////////////////////////////////////////////
void setup() {
size(1280, 1024);
// List all the available serial ports
printArray(Serial.list());
// Open the port you are using at the rate you want:
myPort = new Serial(this, Serial.list()[1], 9600);
myPort.clear();
// Throw out the first reading, in case we started reading
// in the middle of a string from the sender.
myString = myPort.readStringUntil(lf);
myString = null;
opencv = new OpenCV(this, 720, 480);
video = new Capture(this, 720, 480);
mymovie = new Movie(this, "visage.mov");
opencv.startBackgroundSubtraction(5, 3, 0.5);
mymovie.loop();
}
////////////////////////////////////////////
////////////////////////////////// VOID DRAW
////////////////////////////////////////////
void draw() {
image(mymovie, 0, 0);
image(video, 20, 20);
//tint(150, 20);
noFill();
stroke(255, 0, 0);
strokeWeight(1);
// check if there is something new on the serial port
while (myPort.available() > 0) {
// store the data in myString
myString = myPort.readStringUntil(lf);
// check if we really have something
if (myString != null) {
myString = myString.trim(); // let's remove whitespace characters
// if we have at least one character...
if (myString.length() > 0) {
println(myString); // print out the data we just received
// if we received a number (e.g. 123) store it in sensorValue, we sill use this to change the background color.
try {
sensorValue = Integer.parseInt(myString);
}
catch(Exception e) {
}
}
}
}
if (x < sensorValue) {
video.start();
opencv.loadImage(video);
}
if (x > sensorValue) {
image(mymovie, 0, 0);
}
opencv.updateBackground();
opencv.dilate();
opencv.erode();
for (Contour contour : opencv.findContours()) {
contour.draw();
}
}
//////////////////////////////////////////////
////////////////////////////////// VOID CUSTOM
//////////////////////////////////////////////
void captureEvent(Capture video) {
video.read(); // affiche l'image de la webcam
}
void movieEvent(Movie myMovie) {
myMovie.read();
}
One approach you could use is to call the translate() function to move the origin of the canvas before you call contour.draw(). Something like this:
translate(moveX, moveY);
for (Contour contour : opencv.findContours()) {
contour.draw();
}
What you use for moveX and moveY depends entirely on exactly what you're trying to do. You might use whatever position you're using to draw the video (if you want the contours displayed on top of the video), or you might use width/2 and height/2 (maybe minus a bit to really center the contours).
More info can be found in the reference. Play with a bunch of different values, and post an MCVE if you get stuck. Good luck.

Must render buffer texture dimensions be power-of-two?

Does the texture that we use for WebGL render buffer storage need to have dimensions that are power-of-two?
Background info
I'm chasing a FRAMEBUFFER_INCOMPLETE_ATTACHMENT reported by a client on this setup:
Windows 7 Enterprise 32-Bit
Firefox Version: 33
Video Card: Intel Q45/Q43 Express Chipset Driver Version 8.13.10.2413
and so far I'm at a loss as to why it's happening, so guessing it might be something to do with NPOT textures.
Here's my render buffer implementation, which does not have power-of-two-texture yet:
SceneJS._webgl.RenderBuffer = function (cfg) {
/**
* True as soon as this buffer is allocated and ready to go
* #type {boolean}
*/
this.allocated = false;
this.canvas = cfg.canvas;
this.gl = cfg.canvas.gl;
this.buf = null;
this.bound = false;
};
/**
* Called after WebGL context is restored.
*/
SceneJS._webgl.RenderBuffer.prototype.webglRestored = function (_gl) {
this.gl = _gl;
this.buf = null;
};
/**
* Binds this buffer
*/
SceneJS._webgl.RenderBuffer.prototype.bind = function () {
this._touch();
if (this.bound) {
return;
}
this.gl.bindFramebuffer(this.gl.FRAMEBUFFER, this.buf.framebuf);
this.bound = true;
};
SceneJS._webgl.RenderBuffer.prototype._touch = function () {
var width = this.canvas.canvas.width;
var height = this.canvas.canvas.height;
if (this.buf) { // Currently have a buffer
if (this.buf.width == width && this.buf.height == height) { // Canvas size unchanged, buffer still good
return;
} else { // Buffer needs reallocation for new canvas size
this.gl.deleteTexture(this.buf.texture);
this.gl.deleteFramebuffer(this.buf.framebuf);
this.gl.deleteRenderbuffer(this.buf.renderbuf);
}
}
this.buf = {
framebuf: this.gl.createFramebuffer(),
renderbuf: this.gl.createRenderbuffer(),
texture: this.gl.createTexture(),
width: width,
height: height
};
this.gl.bindFramebuffer(this.gl.FRAMEBUFFER, this.buf.framebuf);
this.gl.bindTexture(this.gl.TEXTURE_2D, this.buf.texture);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MAG_FILTER, this.gl.NEAREST);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_MIN_FILTER, this.gl.NEAREST);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_S, this.gl.CLAMP_TO_EDGE);
this.gl.texParameteri(this.gl.TEXTURE_2D, this.gl.TEXTURE_WRAP_T, this.gl.CLAMP_TO_EDGE);
try {
// Do it the way the spec requires
this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, width, height, 0, this.gl.RGBA, this.gl.UNSIGNED_BYTE, null);
} catch (exception) {
// Workaround for what appears to be a Minefield bug.
var textureStorage = new WebGLUnsignedByteArray(width * height * 3);
this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, width, height, 0, this.gl.RGBA, this.gl.UNSIGNED_BYTE, textureStorage);
}
this.gl.bindRenderbuffer(this.gl.RENDERBUFFER, this.buf.renderbuf);
this.gl.renderbufferStorage(this.gl.RENDERBUFFER, this.gl.DEPTH_COMPONENT16, width, height);
this.gl.framebufferTexture2D(this.gl.FRAMEBUFFER, this.gl.COLOR_ATTACHMENT0, this.gl.TEXTURE_2D, this.buf.texture, 0);
this.gl.framebufferRenderbuffer(this.gl.FRAMEBUFFER, this.gl.DEPTH_ATTACHMENT, this.gl.RENDERBUFFER, this.buf.renderbuf);
this.gl.bindTexture(this.gl.TEXTURE_2D, null);
this.gl.bindRenderbuffer(this.gl.RENDERBUFFER, null);
this.gl.bindFramebuffer(this.gl.FRAMEBUFFER, null);
// Verify framebuffer is OK
this.gl.bindFramebuffer(this.gl.FRAMEBUFFER, this.buf.framebuf);
if (!this.gl.isFramebuffer(this.buf.framebuf)) {
throw SceneJS_error.fatalError(SceneJS.errors.ERROR, "Invalid framebuffer");
}
var status = this.gl.checkFramebufferStatus(this.gl.FRAMEBUFFER);
switch (status) {
case this.gl.FRAMEBUFFER_COMPLETE:
break;
case this.gl.FRAMEBUFFER_INCOMPLETE_ATTACHMENT:
throw SceneJS_error.fatalError(SceneJS.errors.ERROR, "Incomplete framebuffer: FRAMEBUFFER_INCOMPLETE_ATTACHMENT");
case this.gl.FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT:
throw SceneJS_error.fatalError(SceneJS.errors.ERROR, "Incomplete framebuffer: FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT");
case this.gl.FRAMEBUFFER_INCOMPLETE_DIMENSIONS:
throw SceneJS_error.fatalError(SceneJS.errors.ERROR, "Incomplete framebuffer: FRAMEBUFFER_INCOMPLETE_DIMENSIONS");
case this.gl.FRAMEBUFFER_UNSUPPORTED:
throw SceneJS_error.fatalError(SceneJS.errors.ERROR, "Incomplete framebuffer: FRAMEBUFFER_UNSUPPORTED");
default:
throw SceneJS_error.fatalError(SceneJS.errors.ERROR, "Incomplete framebuffer: " + status);
}
this.bound = false;
};
/**
* Clears this renderbuffer
*/
SceneJS._webgl.RenderBuffer.prototype.clear = function () {
if (!this.bound) {
throw "Render buffer not bound";
}
this.gl.clear(this.gl.COLOR_BUFFER_BIT | this.gl.DEPTH_BUFFER_BIT);
this.gl.disable(this.gl.BLEND);
};
/**
* Reads buffer pixel at given coordinates
*/
SceneJS._webgl.RenderBuffer.prototype.read = function (pickX, pickY) {
var x = pickX;
var y = this.canvas.canvas.height - pickY;
var pix = new Uint8Array(4);
this.gl.readPixels(x, y, 1, 1, this.gl.RGBA, this.gl.UNSIGNED_BYTE, pix);
return pix;
};
/**
* Unbinds this renderbuffer
*/
SceneJS._webgl.RenderBuffer.prototype.unbind = function () {
this.gl.bindFramebuffer(this.gl.FRAMEBUFFER, null);
this.bound = false;
};
/** Returns the texture
*/
SceneJS._webgl.RenderBuffer.prototype.getTexture = function () {
var self = this;
return {
bind: function (unit) {
if (self.buf && self.buf.texture) {
self.gl.activeTexture(self.gl["TEXTURE" + unit]);
self.gl.bindTexture(self.gl.TEXTURE_2D, self.buf.texture);
return true;
}
return false;
},
unbind: function (unit) {
if (self.buf && self.buf.texture) {
self.gl.activeTexture(self.gl["TEXTURE" + unit]);
self.gl.bindTexture(self.gl.TEXTURE_2D, null);
}
}
};
};
/** Destroys this buffer
*/
SceneJS._webgl.RenderBuffer.prototype.destroy = function () {
if (this.buf) {
this.gl.deleteTexture(this.buf.texture);
this.gl.deleteFramebuffer(this.buf.framebuf);
this.gl.deleteRenderbuffer(this.buf.renderbuf);
this.buf = null;
this.bound = false;
}
};
As far as I could find (I don't use WebGL), the WebGL spec delegates to the OpenGL ES 2.0 spec on these FBO related calls. RGBA with 8 bits per component is not a format that is supported as render target in ES 2.0. Many devices do support it (advertised with the OES_rgb8_rgba8 extension), but it is not part of the standard.
The texture you are using as COLOR_ATTACHMENT0 is RGBA with 8-bit components:
this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, width, height, 0,
this.gl.RGBA, this.gl.UNSIGNED_BYTE, textureStorage);
Try specifying this as RGB565, which is color renderable:
this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, width, height, 0,
this.gl.RGB, this.gl.UNSIGNED_SHORT_5_6_5, textureStorage);
If you do need an alpha component in the texture, RGBA4444 or RGB5_A1 are your only portable options:
this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, width, height, 0,
this.gl.RGBA, this.gl.UNSIGNED_SHORT_4_4_4_4, textureStorage);
this.gl.texImage2D(this.gl.TEXTURE_2D, 0, this.gl.RGBA, width, height, 0,
this.gl.RGBA, this.gl.UNSIGNED_SHORT_5_5_5_1, textureStorage);
The spec actually looks somewhat contradictory to me. Under "Differences Between WebGL and OpenGL ES 2.0", it says:
The following combinations of framebuffer object attachments, when all of the attachments are framebuffer attachment complete, non-zero, and have the same width and height, must result in the framebuffer being framebuffer complete:
COLOR_ATTACHMENT0 = RGBA/UNSIGNED_BYTE texture
Which at first sight suggests that RGBA/UNSIGNED_BYTE is supported. But that's under the condition "when all of the attachments are framebuffer attachment complete", and according to the ES 2.0 spec, attachments with this format are not attachment complete. And there is no override in the WebGL spec on what "attachment complete" means.

Improve Face Recognition

I am trying to a develop face-recognition app in android. I am using JavaCv FaceRecognizer. But so far I am getting very poor results. It recognizes image of person which was trained but it also recognizes unknown images. For the known faces it gives me large value as a distance, most of the time from 70-90, sometimes 90+, while unknown images also get 70-90.
So how can I increase the performance of face-recognition? What techniques are there? What percentage of success you can get with this normally?
I have never worked with image processing. I will appreciate any guidelines.
Here is the code:
public class PersonRecognizer {
public final static int MAXIMG = 100;
FaceRecognizer faceRecognizer;
String mPath;
int count=0;
labels labelsFile;
static final int WIDTH= 70;
static final int HEIGHT= 70;
private static final String TAG = "PersonRecognizer";
private int mProb=999;
PersonRecognizer(String path)
{
faceRecognizer = com.googlecode.javacv.cpp.opencv_contrib.createLBPHFaceRecognizer(2,8,8,8,100);
// path=Environment.getExternalStorageDirectory()+"/facerecog/faces/";
mPath=path;
labelsFile= new labels(mPath);
}
void changeRecognizer(int nRec)
{
switch(nRec) {
case 0: faceRecognizer = com.googlecode.javacv.cpp.opencv_contrib.createLBPHFaceRecognizer(1,8,8,8,100);
break;
case 1: faceRecognizer = com.googlecode.javacv.cpp.opencv_contrib.createFisherFaceRecognizer();
break;
case 2: faceRecognizer = com.googlecode.javacv.cpp.opencv_contrib.createEigenFaceRecognizer();
break;
}
train();
}
void add(Mat m, String description)
{
Bitmap bmp= Bitmap.createBitmap(m.width(), m.height(), Bitmap.Config.ARGB_8888);
Utils.matToBitmap(m,bmp);
bmp= Bitmap.createScaledBitmap(bmp, WIDTH, HEIGHT, false);
FileOutputStream f;
try
{
f = new FileOutputStream(mPath+description+"-"+count+".jpg",true);
count++;
bmp.compress(Bitmap.CompressFormat.JPEG, 100, f);
f.close();
} catch (Exception e) {
Log.e("error",e.getCause()+" "+e.getMessage());
e.printStackTrace();
}
}
public boolean train() {
File root = new File(mPath);
FilenameFilter pngFilter = new FilenameFilter() {
public boolean accept(File dir, String name) {
return name.toLowerCase().endsWith(".jpg");
};
};
File[] imageFiles = root.listFiles(pngFilter);
MatVector images = new MatVector(imageFiles.length);
int[] labels = new int[imageFiles.length];
int counter = 0;
int label;
IplImage img=null;
IplImage grayImg;
int i1=mPath.length();
for (File image : imageFiles) {
String p = image.getAbsolutePath();
img = cvLoadImage(p);
if (img==null)
Log.e("Error","Error cVLoadImage");
Log.i("image",p);
int i2=p.lastIndexOf("-");
int i3=p.lastIndexOf(".");
int icount = 0;
try
{
icount=Integer.parseInt(p.substring(i2+1,i3));
}
catch(Exception ex)
{
ex.printStackTrace();
}
if (count<icount) count++;
String description=p.substring(i1,i2);
if (labelsFile.get(description)<0)
labelsFile.add(description, labelsFile.max()+1);
label = labelsFile.get(description);
grayImg = IplImage.create(img.width(), img.height(), IPL_DEPTH_8U, 1);
cvCvtColor(img, grayImg, CV_BGR2GRAY);
images.put(counter, grayImg);
labels[counter] = label;
counter++;
}
if (counter>0)
if (labelsFile.max()>1)
faceRecognizer.train(images, labels);
labelsFile.Save();
return true;
}
public boolean canPredict()
{
if (labelsFile.max()>1)
return true;
else
return false;
}
public String predict(Mat m) {
if (!canPredict())
return "";
int n[] = new int[1];
double p[] = new double[1];
//conver Mat to black and white
/*Mat gray_m = new Mat();
Imgproc.cvtColor(m, gray_m, Imgproc.COLOR_RGBA2GRAY);*/
IplImage ipl = MatToIplImage(m, WIDTH, HEIGHT);
faceRecognizer.predict(ipl, n, p);
if (n[0]!=-1)
{
mProb=(int)p[0];
Log.v(TAG, "Distance = "+mProb+"");
Log.v(TAG, "N = "+n[0]);
}
else
{
mProb=-1;
Log.v(TAG, "Distance = "+mProb);
}
if (n[0] != -1)
{
return labelsFile.get(n[0]);
}
else
{
return "Unknown";
}
}
IplImage MatToIplImage(Mat m,int width,int heigth)
{
Bitmap bmp;
try
{
bmp = Bitmap.createBitmap(m.width(), m.height(), Bitmap.Config.RGB_565);
}
catch(OutOfMemoryError er)
{
bmp = Bitmap.createBitmap(m.width()/2, m.height()/2, Bitmap.Config.RGB_565);
er.printStackTrace();
}
Utils.matToBitmap(m, bmp);
return BitmapToIplImage(bmp, width, heigth);
}
IplImage BitmapToIplImage(Bitmap bmp, int width, int height) {
if ((width != -1) || (height != -1)) {
Bitmap bmp2 = Bitmap.createScaledBitmap(bmp, width, height, false);
bmp = bmp2;
}
IplImage image = IplImage.create(bmp.getWidth(), bmp.getHeight(),
IPL_DEPTH_8U, 4);
bmp.copyPixelsToBuffer(image.getByteBuffer());
IplImage grayImg = IplImage.create(image.width(), image.height(),
IPL_DEPTH_8U, 1);
cvCvtColor(image, grayImg, opencv_imgproc.CV_BGR2GRAY);
return grayImg;
}
protected void SaveBmp(Bitmap bmp,String path)
{
FileOutputStream file;
try
{
file = new FileOutputStream(path , true);
bmp.compress(Bitmap.CompressFormat.JPEG, 100, file);
file.close();
}
catch (Exception e) {
// TODO Auto-generated catch block
Log.e("",e.getMessage()+e.getCause());
e.printStackTrace();
}
}
public void load() {
train();
}
public int getProb() {
// TODO Auto-generated method stub
return mProb;
}
}
I have faced similar challenges recently, here are the things which helped me in getting better results:
Crop the faces from images - this will remove unnecessary pixels at the time of inference
Resize the cropped face images - this impacts when detecting face landmarks, try different scales on test sets to understand what works best. Also, this impacts the inference time as well, smaller the size, faster the inference.
Improve the brightness of the face images - I found this really helpful, detecting face landmarks in darker images was not much good, this is mainly due to the model, which was pre-trained with mostly white faces - having understanding on training data will helps when dealing with bias.
Convert to grayscale images - this I have seen it in many forums and said that, this will helpful in finding the edges efficiently - and processing time is less when compared to colour images (3 channels -RGB) - however, this did not help much.
Try to capture (register) as many as images for individual person in different angles, lightings and other variations - this one really helps as it is comparing with encodings of the stored images.
Try to implement 1-1 comparison for face verification - for example, in my system, I have captured 10 pictures for each person, and at the time of verification, I am comparing against 10 pictures, instead of all the encodings of all the persons stored in the system. This will provide, false positives, however use-cases are limited in this setup, I am using it for face authentication, and compare the new face against existing faces where mobile number is same.
My understanding as of today, face recognition system works great and but not 100% accurate, we have to understand the model architecture, training data and our requirement and deploy it accordingly to get better outcome. Here are some points which helped me improve overall system:
Implement fallback method - provide option to user, when our system failed to detects them correctly, example, if face authentication failed for some reason, show them enter PIN option
In critical system - add periodic human intervention to confirm system result - for example, if a system not allows a user based on FR result - verify with a human agent for failed result and allow the user
Implement multiple factors for authentication - deploy face recognition system as addition to existing system - for example, after user logged in with credentials - verify them its intended person using face recognition system
Design your user interface in a way, at the time of verification, how user should behave like open eyes, close mouth, etc without impacting user experience
Provide clear instruction to users, when they are dealing with the system - for example, let user know, FR system integrated and they need to show their faces in good lighting condition, etc.

Resources