I am trying to convert an image into grayscale in the following way:
#define bytesPerPixel 4
#define bitsPerComponent 8
-(unsigned char*) getBytesForImage: (UIImage*)pImage
{
CGImageRef image = [pImage CGImage];
NSUInteger width = CGImageGetWidth(image);
NSUInteger height = CGImageGetHeight(image);
NSUInteger bytesPerRow = bytesPerPixel * width;
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
unsigned char *rawData = malloc(height * width * 4);
CGContextRef context = CGBitmapContextCreate(rawData, width, height, bitsPerComponent, bytesPerRow, colorSpace, kCGImageAlphaPremultipliedLast | kCGBitmapByteOrder32Big);
CGColorSpaceRelease(colorSpace);
CGContextDrawImage(context, CGRectMake(0, 0, width, height), image);
CGContextRelease(context);
return rawData;
}
-(UIImage*) processImage: (UIImage*)pImage
{
DebugLog(#"processing image");
unsigned char *rawData = [self getBytesForImage: pImage];
NSUInteger width = pImage.size.width;
NSUInteger height = pImage.size.height;
DebugLog(#"width: %d", width);
DebugLog(#"height: %d", height);
NSUInteger bytesPerRow = bytesPerPixel * width;
for (int xCoordinate = 0; xCoordinate < width; xCoordinate++)
{
for (int yCoordinate = 0; yCoordinate < height; yCoordinate++)
{
int byteIndex = (bytesPerRow * yCoordinate) + xCoordinate * bytesPerPixel;
//Getting original colors
float red = ( rawData[byteIndex] / 255.f );
float green = ( rawData[byteIndex + 1] / 255.f );
float blue = ( rawData[byteIndex + 2] / 255.f );
//Processing pixel data
float averageColor = (red + green + blue) / 3.0f;
red = averageColor;
green = averageColor;
blue = averageColor;
//Assigning new color components
rawData[byteIndex] = (unsigned char) red * 255;
rawData[byteIndex + 1] = (unsigned char) green * 255;
rawData[byteIndex + 2] = (unsigned char) blue * 255;
}
}
NSData* newPixelData = [NSData dataWithBytes: rawData length: height * width * 4];
UIImage* newImage = [UIImage imageWithData: newPixelData];
free(rawData);
DebugLog(#"image processed");
return newImage;
}
So when I want to convert an image I just call processImage:
imageToDisplay.image = [self processImage: image];
But imageToDisplay doesn't display. What may be the problem?
Thanks.
I needed a version that preserved the alpha channel, so I modified the code posted by Dutchie432:
#implementation UIImage (grayscale)
typedef enum {
ALPHA = 0,
BLUE = 1,
GREEN = 2,
RED = 3
} PIXELS;
- (UIImage *)convertToGrayscale {
CGSize size = [self size];
int width = size.width;
int height = size.height;
// the pixels will be painted to this array
uint32_t *pixels = (uint32_t *) malloc(width * height * sizeof(uint32_t));
// clear the pixels so any transparency is preserved
memset(pixels, 0, width * height * sizeof(uint32_t));
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
// create a context with RGBA pixels
CGContextRef context = CGBitmapContextCreate(pixels, width, height, 8, width * sizeof(uint32_t), colorSpace,
kCGBitmapByteOrder32Little | kCGImageAlphaPremultipliedLast);
// paint the bitmap to our context which will fill in the pixels array
CGContextDrawImage(context, CGRectMake(0, 0, width, height), [self CGImage]);
for(int y = 0; y < height; y++) {
for(int x = 0; x < width; x++) {
uint8_t *rgbaPixel = (uint8_t *) &pixels[y * width + x];
// convert to grayscale using recommended method: http://en.wikipedia.org/wiki/Grayscale#Converting_color_to_grayscale
uint32_t gray = 0.3 * rgbaPixel[RED] + 0.59 * rgbaPixel[GREEN] + 0.11 * rgbaPixel[BLUE];
// set the pixels to gray
rgbaPixel[RED] = gray;
rgbaPixel[GREEN] = gray;
rgbaPixel[BLUE] = gray;
}
}
// create a new CGImageRef from our context with the modified pixels
CGImageRef image = CGBitmapContextCreateImage(context);
// we're done with the context, color space, and pixels
CGContextRelease(context);
CGColorSpaceRelease(colorSpace);
free(pixels);
// make a new UIImage to return
UIImage *resultUIImage = [UIImage imageWithCGImage:image];
// we're done with image now too
CGImageRelease(image);
return resultUIImage;
}
#end
Here is a code using only UIKit and the luminosity blending mode. A bit of a hack, but it works well.
// Transform the image in grayscale.
- (UIImage*) grayishImage: (UIImage*) inputImage {
// Create a graphic context.
UIGraphicsBeginImageContextWithOptions(inputImage.size, YES, 1.0);
CGRect imageRect = CGRectMake(0, 0, inputImage.size.width, inputImage.size.height);
// Draw the image with the luminosity blend mode.
// On top of a white background, this will give a black and white image.
[inputImage drawInRect:imageRect blendMode:kCGBlendModeLuminosity alpha:1.0];
// Get the resulting image.
UIImage *filteredImage = UIGraphicsGetImageFromCurrentImageContext();
UIGraphicsEndImageContext();
return filteredImage;
}
To keep the transparency, maybe you can just set the opaque mode parameter of the UIGraphicsBeginImageContextWithOptions to NO. Needs to be checked.
Based on Cam's code with the ability to deal with the scale for Retina displays.
- (UIImage *) toGrayscale
{
const int RED = 1;
const int GREEN = 2;
const int BLUE = 3;
// Create image rectangle with current image width/height
CGRect imageRect = CGRectMake(0, 0, self.size.width * self.scale, self.size.height * self.scale);
int width = imageRect.size.width;
int height = imageRect.size.height;
// the pixels will be painted to this array
uint32_t *pixels = (uint32_t *) malloc(width * height * sizeof(uint32_t));
// clear the pixels so any transparency is preserved
memset(pixels, 0, width * height * sizeof(uint32_t));
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
// create a context with RGBA pixels
CGContextRef context = CGBitmapContextCreate(pixels, width, height, 8, width * sizeof(uint32_t), colorSpace,
kCGBitmapByteOrder32Little | kCGImageAlphaPremultipliedLast);
// paint the bitmap to our context which will fill in the pixels array
CGContextDrawImage(context, CGRectMake(0, 0, width, height), [self CGImage]);
for(int y = 0; y < height; y++) {
for(int x = 0; x < width; x++) {
uint8_t *rgbaPixel = (uint8_t *) &pixels[y * width + x];
// convert to grayscale using recommended method: http://en.wikipedia.org/wiki/Grayscale#Converting_color_to_grayscale
uint8_t gray = (uint8_t) ((30 * rgbaPixel[RED] + 59 * rgbaPixel[GREEN] + 11 * rgbaPixel[BLUE]) / 100);
// set the pixels to gray
rgbaPixel[RED] = gray;
rgbaPixel[GREEN] = gray;
rgbaPixel[BLUE] = gray;
}
}
// create a new CGImageRef from our context with the modified pixels
CGImageRef image = CGBitmapContextCreateImage(context);
// we're done with the context, color space, and pixels
CGContextRelease(context);
CGColorSpaceRelease(colorSpace);
free(pixels);
// make a new UIImage to return
UIImage *resultUIImage = [UIImage imageWithCGImage:image
scale:self.scale
orientation:UIImageOrientationUp];
// we're done with image now too
CGImageRelease(image);
return resultUIImage;
}
I liked Mathieu Godart's answer, but it didn't seem to work properly for retina or alpha images. Here's an updated version that seems to work for both of those for me:
- (UIImage*)convertToGrayscale
{
UIGraphicsBeginImageContextWithOptions(self.size, NO, self.scale);
CGRect imageRect = CGRectMake(0.0f, 0.0f, self.size.width, self.size.height);
CGContextRef ctx = UIGraphicsGetCurrentContext();
// Draw a white background
CGContextSetRGBFillColor(ctx, 1.0f, 1.0f, 1.0f, 1.0f);
CGContextFillRect(ctx, imageRect);
// Draw the luminosity on top of the white background to get grayscale
[self drawInRect:imageRect blendMode:kCGBlendModeLuminosity alpha:1.0f];
// Apply the source image's alpha
[self drawInRect:imageRect blendMode:kCGBlendModeDestinationIn alpha:1.0f];
UIImage* grayscaleImage = UIGraphicsGetImageFromCurrentImageContext();
UIGraphicsEndImageContext();
return grayscaleImage;
}
What exactly takes place when you use this function? Is the function returning an invalid image, or is the display not showing it correctly?
This is the method I use to convert to greyscale.
- (UIImage *) convertToGreyscale:(UIImage *)i {
int kRed = 1;
int kGreen = 2;
int kBlue = 4;
int colors = kGreen | kBlue | kRed;
int m_width = i.size.width;
int m_height = i.size.height;
uint32_t *rgbImage = (uint32_t *) malloc(m_width * m_height * sizeof(uint32_t));
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
CGContextRef context = CGBitmapContextCreate(rgbImage, m_width, m_height, 8, m_width * 4, colorSpace, kCGBitmapByteOrder32Little | kCGImageAlphaNoneSkipLast);
CGContextSetInterpolationQuality(context, kCGInterpolationHigh);
CGContextSetShouldAntialias(context, NO);
CGContextDrawImage(context, CGRectMake(0, 0, m_width, m_height), [i CGImage]);
CGContextRelease(context);
CGColorSpaceRelease(colorSpace);
// now convert to grayscale
uint8_t *m_imageData = (uint8_t *) malloc(m_width * m_height);
for(int y = 0; y < m_height; y++) {
for(int x = 0; x < m_width; x++) {
uint32_t rgbPixel=rgbImage[y*m_width+x];
uint32_t sum=0,count=0;
if (colors & kRed) {sum += (rgbPixel>>24)&255; count++;}
if (colors & kGreen) {sum += (rgbPixel>>16)&255; count++;}
if (colors & kBlue) {sum += (rgbPixel>>8)&255; count++;}
m_imageData[y*m_width+x]=sum/count;
}
}
free(rgbImage);
// convert from a gray scale image back into a UIImage
uint8_t *result = (uint8_t *) calloc(m_width * m_height *sizeof(uint32_t), 1);
// process the image back to rgb
for(int i = 0; i < m_height * m_width; i++) {
result[i*4]=0;
int val=m_imageData[i];
result[i*4+1]=val;
result[i*4+2]=val;
result[i*4+3]=val;
}
// create a UIImage
colorSpace = CGColorSpaceCreateDeviceRGB();
context = CGBitmapContextCreate(result, m_width, m_height, 8, m_width * sizeof(uint32_t), colorSpace, kCGBitmapByteOrder32Little | kCGImageAlphaNoneSkipLast);
CGImageRef image = CGBitmapContextCreateImage(context);
CGContextRelease(context);
CGColorSpaceRelease(colorSpace);
UIImage *resultUIImage = [UIImage imageWithCGImage:image];
CGImageRelease(image);
free(m_imageData);
// make sure the data will be released by giving it to an autoreleased NSData
[NSData dataWithBytesNoCopy:result length:m_width * m_height];
return resultUIImage;
}
Different approach with CIFilter. Preserves alpha channel and works with transparent background:
+ (UIImage *)convertImageToGrayScale:(UIImage *)image
{
CIImage *inputImage = [CIImage imageWithCGImage:image.CGImage];
CIContext *context = [CIContext contextWithOptions:nil];
CIFilter *filter = [CIFilter filterWithName:#"CIColorControls"];
[filter setValue:inputImage forKey:kCIInputImageKey];
[filter setValue:#(0.0) forKey:kCIInputSaturationKey];
CIImage *outputImage = filter.outputImage;
CGImageRef cgImageRef = [context createCGImage:outputImage fromRect:outputImage.extent];
UIImage *result = [UIImage imageWithCGImage:cgImageRef];
CGImageRelease(cgImageRef);
return result;
}
A swift extension to UIImage, preserving alpha:
extension UIImage {
private func convertToGrayScaleNoAlpha() -> CGImageRef {
let colorSpace = CGColorSpaceCreateDeviceGray();
let bitmapInfo = CGBitmapInfo(CGImageAlphaInfo.None.rawValue)
let context = CGBitmapContextCreate(nil, UInt(size.width), UInt(size.height), 8, 0, colorSpace, bitmapInfo)
CGContextDrawImage(context, CGRectMake(0, 0, size.width, size.height), self.CGImage)
return CGBitmapContextCreateImage(context)
}
/**
Return a new image in shades of gray + alpha
*/
func convertToGrayScale() -> UIImage {
let bitmapInfo = CGBitmapInfo(CGImageAlphaInfo.Only.rawValue)
let context = CGBitmapContextCreate(nil, UInt(size.width), UInt(size.height), 8, 0, nil, bitmapInfo)
CGContextDrawImage(context, CGRectMake(0, 0, size.width, size.height), self.CGImage);
let mask = CGBitmapContextCreateImage(context)
return UIImage(CGImage: CGImageCreateWithMask(convertToGrayScaleNoAlpha(), mask), scale: scale, orientation:imageOrientation)!
}
}
Here is another good solution as a category method on UIImage. It's based on this blog post and its comments. But I fixed a memory issue here:
- (UIImage *)grayScaleImage {
// Create image rectangle with current image width/height
CGRect imageRect = CGRectMake(0, 0, self.size.width * self.scale, self.size.height * self.scale);
// Grayscale color space
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceGray();
// Create bitmap content with current image size and grayscale colorspace
CGContextRef context = CGBitmapContextCreate(nil, self.size.width * self.scale, self.size.height * self.scale, 8, 0, colorSpace, kCGImageAlphaNone);
// Draw image into current context, with specified rectangle
// using previously defined context (with grayscale colorspace)
CGContextDrawImage(context, imageRect, [self CGImage]);
// Create bitmap image info from pixel data in current context
CGImageRef grayImage = CGBitmapContextCreateImage(context);
// release the colorspace and graphics context
CGColorSpaceRelease(colorSpace);
CGContextRelease(context);
// make a new alpha-only graphics context
context = CGBitmapContextCreate(nil, self.size.width * self.scale, self.size.height * self.scale, 8, 0, nil, kCGImageAlphaOnly);
// draw image into context with no colorspace
CGContextDrawImage(context, imageRect, [self CGImage]);
// create alpha bitmap mask from current context
CGImageRef mask = CGBitmapContextCreateImage(context);
// release graphics context
CGContextRelease(context);
// make UIImage from grayscale image with alpha mask
CGImageRef cgImage = CGImageCreateWithMask(grayImage, mask);
UIImage *grayScaleImage = [UIImage imageWithCGImage:cgImage scale:self.scale orientation:self.imageOrientation];
// release the CG images
CGImageRelease(cgImage);
CGImageRelease(grayImage);
CGImageRelease(mask);
// return the new grayscale image
return grayScaleImage;
}
An fast and efficient Swift 3 implementation for iOS 9/10. I feel this is efficient having now tried every image filtering method I could find for processing 100's of images at a time (when downloading using AlamofireImage's ImageFilter option). I settled on this method as FAR better than any other I tried (for my use case) in terms of memory and speed.
func convertToGrayscale() -> UIImage? {
UIGraphicsBeginImageContextWithOptions(self.size, false, self.scale)
let imageRect = CGRect(x: 0.0, y: 0.0, width: self.size.width, height: self.size.height)
let context = UIGraphicsGetCurrentContext()
// Draw a white background
context!.setFillColor(red: 1.0, green: 1.0, blue: 1.0, alpha: 1.0)
context!.fill(imageRect)
// optional: increase contrast with colorDodge before applying luminosity
// (my images were too dark when using just luminosity - you may not need this)
self.draw(in: imageRect, blendMode: CGBlendMode.colorDodge, alpha: 0.7)
// Draw the luminosity on top of the white background to get grayscale of original image
self.draw(in: imageRect, blendMode: CGBlendMode.luminosity, alpha: 0.90)
// optional: re-apply alpha if your image has transparency - based on user1978534's answer (I haven't tested this as I didn't have transparency - I just know this would be the the syntax)
// self.draw(in: imageRect, blendMode: CGBlendMode.destinationIn, alpha: 1.0)
let grayscaleImage = UIGraphicsGetImageFromCurrentImageContext()
UIGraphicsEndImageContext()
return grayscaleImage
}
Re the use of colorDodge: I initially had issues getting my images light enough to match the grayscale coloring produced by using CIFilter("CIPhotoEffectTonal") - my results turned out too dark. I was able to get a decent match by applying CGBlendMode.colorDodge # ~ 0.7 alpha, which seems to increase the overall contrast.
Other color blend effects might work too - but I think you would want to apply before luminocity, which is the greyscale filtering effect. I found this page very helpful to reference about the different BlendModes.
Re efficiency gains I found: I need to process 100's of thumbnail images as they are loaded from a server (using AlamofireImage for async loading, caching, and applying a filter). I started to experience crashes when the total size of my images exceeded the cache size, so I experimented with other methods.
The CoreImage CPU based CIFilter approach was the first I tried, and wasn't memory efficient enough for the number of images I'm handling.
I also tried applying a CIFilter via the GPU using EAGLContext(api: .openGLES3), which was actually even more memory intensive - I actually got memory warnings for 450+ mb use while loading 200 + images.
I tried bitmap processing (i.e. CGContext(data: nil, width: width, height: height, bitsPerComponent: 8, bytesPerRow: 0, space: colorSpace, bitmapInfo: CGImageAlphaInfo.none.rawValue)... which worked well except I couldn't get a high enough resolution for a modern retina device. Images were very grainy even when I added context.scaleBy(x: scaleFactor, y: scaleFactor).
So out of everything I tried, this method (UIGraphics Context Draw) to be VASTLY more efficient re speed and memory when applying as a filter to AlamofireImage. As in, seeing less than 70 mbs ram when processing my 200+ images and them basically loading instantly, rather than over about 35 seconds it took with the openEAGL methods. I know these are not very scientific benchmarks. I will instrument it if anyone is very curious though :)
And lastly, if you do need to pass this or another greyscale filter into AlamofireImage - this is how to do it: (note you must import AlamofireImage into your class to use ImageFilter)
public struct GrayScaleFilter: ImageFilter {
public init() {
}
public var filter: (UIImage) -> UIImage {
return { image in
return image.convertToGrayscale() ?? image
}
}
}
To use it, create the filter like this and pass into af_setImage like so:
let filter = GrayScaleFilter()
imageView.af_setImage(withURL: url, filter: filter)
#interface UIImageView (Settings)
- (void)convertImageToGrayScale;
#end
#implementation UIImageView (Settings)
- (void)convertImageToGrayScale
{
// Create image rectangle with current image width/height
CGRect imageRect = CGRectMake(0, 0, self.image.size.width, self.image.size.height);
// Grayscale color space
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceGray();
// Create bitmap content with current image size and grayscale colorspace
CGContextRef context = CGBitmapContextCreate(nil, self.image.size.width, self.image.size.height, 8, 0, colorSpace, kCGImageAlphaNone);
// Draw image into current context, with specified rectangle
// using previously defined context (with grayscale colorspace)
CGContextDrawImage(context, imageRect, [self.image CGImage]);
// Create bitmap image info from pixel data in current context
CGImageRef imageRef = CGBitmapContextCreateImage(context);
// Create a new UIImage object
UIImage *newImage = [UIImage imageWithCGImage:imageRef];
// Release colorspace, context and bitmap information
CGColorSpaceRelease(colorSpace);
CGContextRelease(context);
CFRelease(imageRef);
// Return the new grayscale image
self.image = newImage;
}
#end
I have yet another answer. This one is extremely performant and handles retina graphics as well as transparency. It expands on Sargis Gevorgyan's approach:
+ (UIImage*) grayScaleFromImage:(UIImage*)image opaque:(BOOL)opaque
{
// NSTimeInterval start = [NSDate timeIntervalSinceReferenceDate];
CGSize size = image.size;
CGRect bounds = CGRectMake(0, 0, size.width, size.height);
// Create bitmap content with current image size and grayscale colorspace
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceGray();
size_t bitsPerComponent = 8;
size_t bytesPerPixel = opaque ? 1 : 2;
size_t bytesPerRow = bytesPerPixel * size.width * image.scale;
CGContextRef context = CGBitmapContextCreate(nil, size.width, size.height, bitsPerComponent, bytesPerRow, colorSpace, opaque ? kCGImageAlphaNone : kCGImageAlphaPremultipliedLast);
// create image from bitmap
CGContextDrawImage(context, bounds, image.CGImage);
CGImageRef cgImage = CGBitmapContextCreateImage(context);
UIImage* result = [[UIImage alloc] initWithCGImage:cgImage scale:image.scale orientation:UIImageOrientationUp];
CGImageRelease(cgImage);
CGContextRelease(context);
// performance results on iPhone 6S+ in Release mode.
// Results are in photo pixels, not device pixels:
// ~ 5ms for 500px x 600px
// ~ 15ms for 2200px x 600px
// NSLog(#"generating %d x %d # %dx grayscale took %f seconds", (int)size.width, (int)size.height, (int)image.scale, [NSDate timeIntervalSinceReferenceDate] - start);
return result;
}
Using blending modes instead is elegant, but copying to a grayscale bitmap is more performant because you only use one or two color channels instead of four. The opacity bool is meant to take in your UIView's opaque flag so you can opt out of using an alpha channel if you know you won't need one.
I haven't tried the Core Image based solutions in this answer thread, but I would be very cautious about using Core Image if performance is important.
Thats my try to convert fast by drawing directly to grayscale colorspace without each pixel enumeration. It works 10x faster than CIImageFilter solutions.
#implementation UIImage (Grayscale)
static UIImage *grayscaleImageFromCIImage(CIImage *image, CGFloat scale)
{
CIImage *blackAndWhite = [CIFilter filterWithName:#"CIColorControls" keysAndValues:kCIInputImageKey, image, kCIInputBrightnessKey, #0.0, kCIInputContrastKey, #1.1, kCIInputSaturationKey, #0.0, nil].outputImage;
CIImage *output = [CIFilter filterWithName:#"CIExposureAdjust" keysAndValues:kCIInputImageKey, blackAndWhite, kCIInputEVKey, #0.7, nil].outputImage;
CGImageRef ref = [[CIContext contextWithOptions:nil] createCGImage:output fromRect:output.extent];
UIImage *result = [UIImage imageWithCGImage:ref scale:scale orientation:UIImageOrientationUp];
CGImageRelease(ref);
return result;
}
static UIImage *grayscaleImageFromCGImage(CGImageRef imageRef, CGFloat scale)
{
NSInteger width = CGImageGetWidth(imageRef) * scale;
NSInteger height = CGImageGetHeight(imageRef) * scale;
NSMutableData *pixels = [NSMutableData dataWithLength:width*height];
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceGray();
CGContextRef context = CGBitmapContextCreate(pixels.mutableBytes, width, height, 8, width, colorSpace, 0);
CGContextDrawImage(context, CGRectMake(0, 0, width, height), imageRef);
CGImageRef ref = CGBitmapContextCreateImage(context);
UIImage *result = [UIImage imageWithCGImage:ref scale:scale orientation:UIImageOrientationUp];
CGContextRelease(context);
CGColorSpaceRelease(colorSpace);
CGImageRelease(ref);
return result;
}
- (UIImage *)grayscaleImage
{
if (self.CIImage) {
return grayscaleImageFromCIImage(self.CIImage, self.scale);
} else if (self.CGImage) {
return grayscaleImageFromCGImage(self.CGImage, self.scale);
}
return nil;
}
#end
I would like to implement an OCR application that would recognize text from Photos.
I succeeded in Compiling and Integration the Tesseract Engine in iOS, I succeeded in getting reasonable detection when photographing clear documents (or a photoshot of this text from the screen) but for other text such as signposts, shop signs, colour background, the detection failed.
The Question is What kind of image processing preparations are necessary to get better recognition. For example, I expect that we need to transform the images into grayscale /B&W as well as fixing contrast etc.
How can this be done in iOS, Is there a package for this?
I'm currently working on the same thing.
I found that a PNG saved in photoshop worked fine, but an image which was originally sourced from the camera then imported into the app never worked.
Don't ask me to explain it - but applying this function made these images work. Maybe it'll work for you too.
// this does the trick to have tesseract accept the UIImage.
UIImage * gs_convert_image (UIImage * src_img) {
CGColorSpaceRef d_colorSpace = CGColorSpaceCreateDeviceRGB();
/*
* Note we specify 4 bytes per pixel here even though we ignore the
* alpha value; you can't specify 3 bytes per-pixel.
*/
size_t d_bytesPerRow = src_img.size.width * 4;
unsigned char * imgData = (unsigned char*)malloc(src_img.size.height*d_bytesPerRow);
CGContextRef context = CGBitmapContextCreate(imgData, src_img.size.width,
src_img.size.height,
8, d_bytesPerRow,
d_colorSpace,
kCGImageAlphaNoneSkipFirst);
UIGraphicsPushContext(context);
// These next two lines 'flip' the drawing so it doesn't appear upside-down.
CGContextTranslateCTM(context, 0.0, src_img.size.height);
CGContextScaleCTM(context, 1.0, -1.0);
// Use UIImage's drawInRect: instead of the CGContextDrawImage function, otherwise you'll have issues when the source image is in portrait orientation.
[src_img drawInRect:CGRectMake(0.0, 0.0, src_img.size.width, src_img.size.height)];
UIGraphicsPopContext();
/*
* At this point, we have the raw ARGB pixel data in the imgData buffer, so
* we can perform whatever image processing here.
*/
// After we've processed the raw data, turn it back into a UIImage instance.
CGImageRef new_img = CGBitmapContextCreateImage(context);
UIImage * convertedImage = [[UIImage alloc] initWithCGImage:
new_img];
CGImageRelease(new_img);
CGContextRelease(context);
CGColorSpaceRelease(d_colorSpace);
free(imgData);
return convertedImage;
}
I've also gone a lot of experimentation preparing the image for tesseract. Resizing, converting to grayscale, then adjusting brightness and contrast seems to work best.
I've also tried this GPUImage library. https://github.com/BradLarson/GPUImage
And the GPUImageAverageLuminanceThresholdFilter seems to give me a great adjusted image, but tesseract doesn't seem to work well with it.
I've also put in opencv into my project and plan to try out it's image routines. Possibly even some box detection to find the text area (i'm hoping this will speed up tesseract).
I have used the code above but added two other function calls as well to convert the image so that it will work with the Tesseract.
Firstly I used an image resize script to convert to 640 x 640 which seems to be more manageable for the Tesseract.
-(UIImage *)resizeImage:(UIImage *)image {
CGImageRef imageRef = [image CGImage];
CGImageAlphaInfo alphaInfo = CGImageGetAlphaInfo(imageRef);
CGColorSpaceRef colorSpaceInfo = CGColorSpaceCreateDeviceRGB();
if (alphaInfo == kCGImageAlphaNone)
alphaInfo = kCGImageAlphaNoneSkipLast;
int width, height;
width = 640;//[image size].width;
height = 640;//[image size].height;
CGContextRef bitmap;
if (image.imageOrientation == UIImageOrientationUp | image.imageOrientation == UIImageOrientationDown) {
bitmap = CGBitmapContextCreate(NULL, width, height, CGImageGetBitsPerComponent(imageRef), CGImageGetBytesPerRow(imageRef), colorSpaceInfo, alphaInfo);
} else {
bitmap = CGBitmapContextCreate(NULL, height, width, CGImageGetBitsPerComponent(imageRef), CGImageGetBytesPerRow(imageRef), colorSpaceInfo, alphaInfo);
}
if (image.imageOrientation == UIImageOrientationLeft) {
NSLog(#"image orientation left");
CGContextRotateCTM (bitmap, radians(90));
CGContextTranslateCTM (bitmap, 0, -height);
} else if (image.imageOrientation == UIImageOrientationRight) {
NSLog(#"image orientation right");
CGContextRotateCTM (bitmap, radians(-90));
CGContextTranslateCTM (bitmap, -width, 0);
} else if (image.imageOrientation == UIImageOrientationUp) {
NSLog(#"image orientation up");
} else if (image.imageOrientation == UIImageOrientationDown) {
NSLog(#"image orientation down");
CGContextTranslateCTM (bitmap, width,height);
CGContextRotateCTM (bitmap, radians(-180.));
}
CGContextDrawImage(bitmap, CGRectMake(0, 0, width, height), imageRef);
CGImageRef ref = CGBitmapContextCreateImage(bitmap);
UIImage *result = [UIImage imageWithCGImage:ref];
CGContextRelease(bitmap);
CGImageRelease(ref);
return result;
}
So that the radians work ensure you declare it above the #implementation
static inline double radians (double degrees) {return degrees * M_PI/180;}
Then I convert to grayscale.
I found this article Convert image to grayscale on converting to grayscale.
I have used the code from here successfully and can now read different colour text and different colour backgrounds
I have modified the code slightly to work as a function within a class rather than as its own class which the other person did
- (UIImage *) toGrayscale:(UIImage*)img
{
const int RED = 1;
const int GREEN = 2;
const int BLUE = 3;
// Create image rectangle with current image width/height
CGRect imageRect = CGRectMake(0, 0, img.size.width * img.scale, img.size.height * img.scale);
int width = imageRect.size.width;
int height = imageRect.size.height;
// the pixels will be painted to this array
uint32_t *pixels = (uint32_t *) malloc(width * height * sizeof(uint32_t));
// clear the pixels so any transparency is preserved
memset(pixels, 0, width * height * sizeof(uint32_t));
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
// create a context with RGBA pixels
CGContextRef context = CGBitmapContextCreate(pixels, width, height, 8, width * sizeof(uint32_t), colorSpace,
kCGBitmapByteOrder32Little | kCGImageAlphaPremultipliedLast);
// paint the bitmap to our context which will fill in the pixels array
CGContextDrawImage(context, CGRectMake(0, 0, width, height), [img CGImage]);
for(int y = 0; y < height; y++) {
for(int x = 0; x < width; x++) {
uint8_t *rgbaPixel = (uint8_t *) &pixels[y * width + x];
// convert to grayscale using recommended method: http://en.wikipedia.org/wiki/Grayscale#Converting_color_to_grayscale
uint32_t gray = 0.3 * rgbaPixel[RED] + 0.59 * rgbaPixel[GREEN] + 0.11 * rgbaPixel[BLUE];
// set the pixels to gray
rgbaPixel[RED] = gray;
rgbaPixel[GREEN] = gray;
rgbaPixel[BLUE] = gray;
}
}
// create a new CGImageRef from our context with the modified pixels
CGImageRef image = CGBitmapContextCreateImage(context);
// we're done with the context, color space, and pixels
CGContextRelease(context);
CGColorSpaceRelease(colorSpace);
free(pixels);
// make a new UIImage to return
UIImage *resultUIImage = [UIImage imageWithCGImage:image
scale:img.scale
orientation:UIImageOrientationUp];
// we're done with image now too
CGImageRelease(image);
return resultUIImage;
}
I want to take allow the user to take a picture and then show the greyscale version. However, it is very slow because the image file is too big/resolution is too high.
How can I reduce the quality of the image when the user takes the picture?
Heres the code I am using for the transformation:
- (UIImage *)convertImageToGrayScale:(UIImage *)image
{
// Create image rectangle with current image width/height
CGRect imageRect = CGRectMake(0, 0, image.size.width, image.size.height);
// Grayscale color space
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceGray();
// Create bitmap content with current image size and grayscale colorspace
CGContextRef context = CGBitmapContextCreate(nil, image.size.width, image.size.height, 8, 0, colorSpace, kCGImageAlphaNone);
// Draw image into current context, with specified rectangle
// using previously defined context (with grayscale colorspace)
CGContextDrawImage(context, imageRect, [image CGImage]);
/* changes start here */
// Create bitmap image info from pixel data in current context
CGImageRef grayImage = CGBitmapContextCreateImage(context);
// release the colorspace and graphics context
CGColorSpaceRelease(colorSpace);
CGContextRelease(context);
// make a new alpha-only graphics context
context = CGBitmapContextCreate(nil, image.size.width, image.size.height, 8, 0, nil, kCGImageAlphaOnly);
// draw image into context with no colorspace
CGContextDrawImage(context, imageRect, [image CGImage]);
// create alpha bitmap mask from current context
CGImageRef mask = CGBitmapContextCreateImage(context);
// release graphics context
CGContextRelease(context);
// make UIImage from grayscale image with alpha mask
UIImage *grayScaleImage = [UIImage imageWithCGImage:CGImageCreateWithMask(grayImage, mask) scale:image.scale orientation:image.imageOrientation];
// release the CG images
CGImageRelease(grayImage);
CGImageRelease(mask);
// return the new grayscale image
return grayScaleImage;
/* changes end here */
}
How about downsampling the UIImage before passing it on to the grayscale translation? Something like:
NSData *imageAsData = UIImageJPEGRepresentation(imageFromCamera, 0.5);
UIImage *downsampledImaged = [UIImage imageWithData:imageAsData];
You could use other compression qualities other than 0.5 of course.
If you are using AVFoundation to capture the image you can set the quality of the image to be captured by changing the capture session preset like the following:
AVCaptureSession *session = [[AVCaptureSession alloc] init];
session.sessionPreset = AVCaptureSessionPresetLow;
There is a table of which presents correspond to which resolution in the AVFoundation Programming Guide.
I have a single-color image that has partial transparency. I have both normal and #2X versions of the image. I would like to be able to tint the image a different color, in code. The code below works fine for the normal image, but the #2X ends up with artifacts. The normal image might have a similar issue If so, I can't detect it on account of resolution.
+(UIImage *) newImageFromMaskImage:(UIImage *)mask inColor:(UIColor *) color {
CGImageRef maskImage = mask.CGImage;
CGFloat width = mask.size.width;
CGFloat height = mask.size.height;
CGRect bounds = CGRectMake(0,0,width,height);
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
CGContextRef bitmapContext = CGBitmapContextCreate(NULL, width, height, 8, 0, colorSpace, kCGImageAlphaPremultipliedLast);
CGContextClipToMask(bitmapContext, bounds, maskImage);
CGContextSetFillColorWithColor(bitmapContext, color.CGColor);
CGContextFillRect(bitmapContext, bounds);
CGImageRef mainViewContentBitmapContext = CGBitmapContextCreateImage(bitmapContext);
CGContextRelease(bitmapContext);
UIImage *result = [UIImage imageWithCGImage:mainViewContentBitmapContext];
return result;
}
If it matters, the mask image is loaded using UIImage imageNamed:. Also, I confirmed that the #2X image is loading when run on the retina simulator.
Update: The above code works. The artifacts I was seeing were caused by additional transforms done by the consumer of the images. This question could be deleted since it's not really a question anymore or left for posterity.
I have updated the code above to account for retina resolution images:
- (UIImage *) changeColorForImage:(UIImage *)mask toColor:(UIColor*)color {
CGImageRef maskImage = mask.CGImage;
CGFloat width = mask.scale * mask.size.width;
CGFloat height = mask.scale * mask.size.height;
CGRect bounds = CGRectMake(0,0,width,height);
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
CGContextRef bitmapContext = CGBitmapContextCreate(NULL, width, height, 8, 0, colorSpace, (CGBitmapInfo)kCGImageAlphaPremultipliedLast);
CGContextClipToMask(bitmapContext, bounds, maskImage);
CGContextSetFillColorWithColor(bitmapContext, color.CGColor);
CGContextFillRect(bitmapContext, bounds);
CGImageRef mainViewContentBitmapContext = CGBitmapContextCreateImage(bitmapContext);
CGContextRelease(bitmapContext);
return [UIImage imageWithCGImage:mainViewContentBitmapContext scale:mask.scale orientation:UIImageOrientationUp];
}
The code in the question is working code. The bug was elsewhere.