I have the following code and i get the following error. Integer literal '255' overflows when stored into 'Int8'
func decodeIDArrayItem(index:Int, tokenArray:UnsafeMutablePointer<CChar>){
var value = tokenArray[index * 4] & 0xFF
value <<= 8;
value |= tokenArray [index * 4 + 1] & 0xFF
value <<= 8;
value |= tokenArray [index * 4 + 2] & 0xFF
value <<= 8;
value |= tokenArray [index * 4 + 3] & 0xFF
}
Any thoughts?
func decodeIDArrayItem(index:Int, tokenArray:UnsafeMutablePointer<CChar>) -> UInt32{
var value:UInt32 = UInt32(tokenArray[index * 4]) & 0xFF
value <<= 8
value |= UInt32(tokenArray [index * 4 + 1]) & 0xFF
value <<= 8
value |= UInt32(tokenArray [index * 4 + 2]) & 0xFF
value <<= 8
value |= UInt32(tokenArray [index * 4 + 3]) & 0xFF
return value
}
I hope you're trying to extract 8 bit data into 32Bit format. You're getting issues because signed char. Any way try with UInt32, it will work fine.
I hope below code will help you.
func decodeIDArrayItem(index:Int, tokenArray:UnsafeMutablePointer<CChar>) -> UInt32{
// convert into 4 byte
// tokenArray[index * 4] of type UInt8 formate
var value:UInt32
let byte1 : UInt32 = UInt32(tokenArray[index * 4]) // 0 index
let byte2 : UInt32 = UInt32(tokenArray[index * 4 + 1])<<8 // 1 index
let byte3 : UInt32 = UInt32(tokenArray[index * 4 + 2])<<16 // 2 index
let byte4 : UInt32 = UInt32(tokenArray[index * 4 + 2])<<32 // 3 index
value = byte1 | byte2 | byte3 | byte4
return value
}
haha! this worked for me
Int8(0xff & 0xff)
nice little hack
Related
I have a u8 buffer that stores a series of values including the RGB components from two u32 values (of which only 24 bits are used).
I'm currently using bitwise operations to extract the individual components, then using #truncate to drop the precision so they'll fit.
var buffer = [_]u8{0} ** max_buffer_size;
const fg = 0xAABBCC;
const bg = 0xAABBCC;
// ...
buffer[idx + 2] = #truncate(u8, fg & 0xFF);
buffer[idx + 3] = #truncate(u8, fg >> 8 & 0xFF);
buffer[idx + 4] = #truncate(u8, fg >> 16 & 0xFF);
buffer[idx + 5] = #truncate(u8, bg & 0xFF);
buffer[idx + 6] = #truncate(u8, bg >> 8 & 0xFF);
buffer[idx + 7] = #truncate(u8, bg >> 16 & 0xFF);
Is there a way to insert the u32 values directly into this memory instead without needing to unpack the values?
// this (obviously) doesn't work (expecting a u8, getting a u32)
buffer[idx + 2] = fg;
You can use writeIntNative or other variants (check std.mem source code) to do this.
E.g.
std.mem.writeIntNative(u32, buffer[idx..][0..#sizeOf(u32)], fg);
I have a representation of a large bit matrix where I'd like to efficiently retrieve the majority bit for each matrix column (^= bit value that occurs most often). The background is that the matrix rows represent ORB feature descriptors and the value I'm looking for resembles the mean in the Hamming domain.
The implementation I'm currently working with looks like this
// holds column-sum for each bit
std::vector<int> sum(32 * 8, 0);
// cv::Mat mat is a matrix of values € [0, 255] filled elsewhere
for (size_t i = 0; i < mat.cols; ++i)
{
const cv::Mat &d = mat.row(i);
const unsigned char *p = d.ptr<unsigned char>();
// count bits set column-wise
for (int j = 0; j < d.cols; ++j, ++p)
{
if (*p & (1 << 7)) ++sum[j * 8];
if (*p & (1 << 6)) ++sum[j * 8 + 1];
if (*p & (1 << 5)) ++sum[j * 8 + 2];
if (*p & (1 << 4)) ++sum[j * 8 + 3];
if (*p & (1 << 3)) ++sum[j * 8 + 4];
if (*p & (1 << 2)) ++sum[j * 8 + 5];
if (*p & (1 << 1)) ++sum[j * 8 + 6];
if (*p & (1)) ++sum[j * 8 + 7];
}
}
cv::Mat mean = cv::Mat::zeros(1, 32, CV_8U);
unsigned char *p = mean.ptr<unsigned char>();
const int N2 = (int)mat.rows / 2 + mat.rows % 2;
for (size_t i = 0; i < sum.size(); ++i)
{
if (sum[i] >= N2)
{
// set bit in mean only if the corresponding matrix column
// contains more 1s than 0s
*p |= 1 << (7 - (i % 8));
}
if (i % 8 == 7) ++p;
}
The bottleneck is the big loop with all the bit shifting. Is there any way or known bit magic to make this any faster?
I write an app for iOS using the TI SensorTag2. Right now i'm stuck with the conversion of data i read over bluetooth to Float for the app.
Following Code is from the released source code of TI for an Objective C App.
-(NSString *) calcValue:(NSData *) value {
char vals[value.length];
[value getBytes:vals length:value.length];
Point3D gyroPoint;
gyroPoint.x = ((float)((int16_t)((vals[0] & 0xff) | (((int16_t)vals[1] << 8) & 0xff00)))/ (float) 32768) * 255 * 1;
gyroPoint.y = ((float)((int16_t)((vals[2] & 0xff) | (((int16_t)vals[3] << 8) & 0xff00)))/ (float) 32768) * 255 * 1;
gyroPoint.z = ((float)((int16_t)((vals[4] & 0xff) | (((int16_t)vals[5] << 8) & 0xff00)))/ (float) 32768) * 255 * 1;
self.gyro = gyroPoint;
Point3D accPoint;
accPoint.x = (((float)((int16_t)((vals[6] & 0xff) | (((int16_t)vals[7] << 8) & 0xff00)))/ (float) 32768) * 8) * 1;
accPoint.y = (((float)((int16_t)((vals[8] & 0xff) | (((int16_t)vals[9] << 8) & 0xff00))) / (float) 32768) * 8) * 1;
accPoint.z = (((float)((int16_t)((vals[10] & 0xff) | (((int16_t)vals[11] << 8) & 0xff00)))/ (float) 32768) * 8) * 1;
self.acc = accPoint;
Point3D magPoint;
magPoint.x = (((float)((int16_t)((vals[12] & 0xff) | (((int16_t)vals[13] << 8) & 0xff00))) / (float) 32768) * 4912);
magPoint.y = (((float)((int16_t)((vals[14] & 0xff) | (((int16_t)vals[15] << 8) & 0xff00))) / (float) 32768) * 4912);
magPoint.z = (((float)((int16_t)((vals[16] & 0xff) | (((int16_t)vals[17] << 8) & 0xff00))) / (float) 32768) * 4912);
self.mag = magPoint;
return [NSString stringWithFormat:#"ACC : X: %+6.1f, Y: %+6.1f, Z: %+6.1f\nMAG : X: %+6.1f, Y: %+6.1f, Z: %+6.1f\nGYR : X: %+6.1f, Y: %+6.1f, Z: %+6.1f",self.acc.x,self.acc.y,self.acc.z,self.mag.x,self.mag.y,self.mag.z,self.gyro.x,self.gyro.y,self.gyro.z];
}
When i try to convert this code to Swift, i get an error "Integer literal '65280' overflows when stored into Int16
let xF: Float = ((Float((Int16(bytes[6]) & 0xff) | ((Int16(bytes[7]) << 8) & 0xff00)) / Float(32768)) * 8) * 1
As i understand that, it combines the 2 Int8 variables into a single Int16 and it should work, i just don't find where i made the error. The part with "& 0xff00" is marked and when i understand it right this is here so only the first 8 bits contain 1's , the rest is 0's
I had it working with code from the android app for SensorTag2, but that code crashes the app from time to time, when i do also read data from gyroscope, so i wanted to use that iOS Code
let x = (Int16(bytes[7]) << 8) + Int16(bytes[6])
let xF = Float(x) / (32768.0 / 8.0)
Maybe somebody here can point me in the right direction to solve my problem
One bad thing in this line:
let xF: Float = ((Float((Int16(bytes[6]) & 0xff) | ((Int16(bytes[7]) << 8) & 0xff00)) / Float(32768)) * 8) * 1
is this: ((Int16(bytes[7]) << 8) & 0xff00).
You know Int16 can represent numbers -32768...32767, and the value of 0xff00 is 65280. As the error message is saying, it is too large for Int16.
(Remember Swift does no implicit conversions for numeric types.)
With making bytes as unsigned:
let bytes = UnsafePointer<UInt8>(data.bytes)
You have no need to use &.
But you need to pass a signed value to Float.init, so your code needs to be something like this:
let xF: Float = ((Float(Int16(bitPattern: UInt16(bytes[6]) | (UInt16(bytes[7]) << 8))) / Float(32768)) * 8) * 1
(Or else, negative values in bytes make your app crash.)
I couldn't use the UInt solution, because i get the data as raw bytes that are encoded as signed int with 16bit.
In ObjectiveC they are read into a char-array, that i converted to Int8
But i found another solution and now i just read the bytes into a Int16 array, that way two following bytes will be treated as a single Int16, like the ObjectiveC code made them already.
var bytes: [Int16] = [Int16](count: data.length, repeatedValue: 0)
let dataLength = data.length
data.getBytes(&bytes, length: dataLength * sizeof(Int16))
That way i don't need the bitshifting and bit operations.
now i have an array of 9 Int16 values, instead of 16 Int8, that i had to convert to Int16.
Also a test with the TI SensorTag App resulted in the same numbers
I converted an RGB matrix to YUV matrix using this formula:
Y = (0.257 * R) + (0.504 * G) + (0.098 * B) + 16
Cr = V = (0.439 * R) - (0.368 * G) - (0.071 * B) + 128
Cb = U = -(0.148 * R) - (0.291 * G) + (0.439 * B) + 128
I then did a 4:2:0 chroma subsample on the matrix. I think I did this correctly, I took 2x2 submatrices from the YUV matrix, ordered the values from least to greatest, and took the average between the 2 values in the middle.
I then used this formula, from Wikipedia, to access the Y, U, and V planes:
size.total = size.width * size.height;
y = yuv[position.y * size.width + position.x];
u = yuv[(position.y / 2) * (size.width / 2) + (position.x / 2) + size.total];
v = yuv[(position.y / 2) * (size.width / 2) + (position.x / 2) + size.total + (size.total / 4)];
I'm using OpenCV so I tried to interpret this as best I can:
y = src.data[(i*channels)+(j*step)];
u = src.data[(j%4)*step + ((i%2)*channels+1) + max];
v = src.data[(j%4)*step + ((i%2)*channels+2) + max + (max%4)];
src is the YUV subsampled matrix. Did I interpret that formula correctly?
Here is how I converted the colours back to RGB:
bgr.data[(i*channels)+(j*step)] = (1.164 * (y - 16)) + (2.018 * (u - 128)); // B
bgr.data[(i*channels+1)+(j*step)] = (1.164 * (y - 16)) - (0.813 * (v - 128)) - (0.391 * (u - 128)); // G
bgr.data[(i*channels+2)+(j*step)] = (1.164 * (y - 16)) + (1.596 * (v - 128)); // R
The problem is my image does not return to its original colours.
Here are the images for reference:
http://i.stack.imgur.com/vQkpT.jpg (Subsampled)
http://i.stack.imgur.com/Oucc5.jpg (Output)
I see that I should be converting from YUV444 to RGB now but I don't quite I understand what the clip function does in the sample I found on Wiki.
C = Y' − 16
D = U − 128
E = V − 128
R = clip(( 298 * C + 409 * E + 128) >> 8)
G = clip(( 298 * C - 100 * D - 208 * E + 128) >> 8)
B = clip(( 298 * C + 516 * D + 128) >> 8)
Does the >> mean I should shift bits?
I'd appreciate any help/comments! Thanks
Update
Tried doing the YUV444 conversion but it just made my image appear in shades of green.
y = src.data[(i*channels)+(j*step)];
u = src.data[(j%4)*step + ((i%2)*channels+1) + max];
v = src.data[(j%4)*step + ((i%2)*channels+2) + max + (max%4)];
c = y - 16;
d = u - 128;
e = v - 128;
bgr.data[(i*channels+2)+(j*step)] = clip((298*c + 409*e + 128)/256);
bgr.data[(i*channels+1)+(j*step)] = clip((298*c - 100*d - 208*e + 128)/256);
bgr.data[(i*channels)+(j*step)] = clip((298*c + 516*d + 128)/256);
And my clip function:
int clip(double value)
{
return (value > 255) ? 255 : (value < 0) ? 0 : value;
}
I had the same problem when decoding WebM frames to RGB. I finally found the solution after hours of searching.
Take SCALEYUV function from here: http://www.telegraphics.com.au/svn/webpformat/trunk/webpformat.h
Then to decode the RGB data from YUV, see this file:
http://www.telegraphics.com.au/svn/webpformat/trunk/decode.c
Search for "py = img->planes[0];", there are two algorithms to convert the data. I only tried the simple one (after "// then fall back to cheaper method.").
Comments in the code also refer to this page: http://www.poynton.com/notes/colour_and_gamma/ColorFAQ.html#RTFToC30
Works great for me.
You won't get back perfectly the same image since UV does compress the image.
You don't say if the result is completely wrong (ie an error) or just not perfect
R = clip(( 298 * C + 409 * E + 128) >> 8)
G = clip(( 298 * C - 100 * D - 208 * E + 128) >> 8)
B = clip(( 298 * C + 516 * D + 128) >> 8)
The >> 8 is a bit shift, equivalent to dividing by 256. This is just to allow you to do all the arithmatic in integer units rather than floating point for speed
Was experimenting with formulas present on wiki and found that mixed formula:
byte c = (byte) (y - 16);
byte d = (byte) (u - 128);
byte e = (byte) (v - 128);
byte r = (byte) (c + (1.370705 * (e)));
byte g = (byte) (c - (0.698001 * (d)) - (0.337633 * (e)));
byte b = (byte) (c + (1.732446 * (d)));
produces "better" errors for my images, simply makes some black points pure green (i.e. rgb = 0x00FF00) which is better for detection and correction ...
wiki source: https://en.wikipedia.org/wiki/YUV#Y.27UV420p_.28and_Y.27V12_or_YV12.29_to_RGB888_conversion
I know the following formula can be used to convert RGB images to YUV images. In the following formula, R, G, B, Y, U, V are all 8-bit unsigned integers, and intermediate values are 16-bit unsigned integers.
Y = ( ( 66 * R + 129 * G + 25 * B + 128) >> 8) + 16
U = ( ( -38 * R - 74 * G + 112 * B + 128) >> 8) + 128
V = ( ( 112 * R - 94 * G - 18 * B + 128) >> 8) + 128
But when the formula is used in OpenCL it's a different story.
1. 8-bit memory write access is an optional extension, which means some OpenCL implementations may not support it.
2. even the above extension is supported, it's deadly slow compared with 32-bit write access.
In order to get better performance, every 4 pixels will be processed at the same time, so the input is 12 8-bit integers and the output is 3 32-bit unsigned integers(the first one stands for 4 Y samples, the second one stands for 4 U samples, the last one stands for 4 V samples).
My question is how to get these 3 32-bit integers directly from the 12 8-bit integers? Is there a formula to get these 3 32-bit integers, or I just need to use the old formula to get 12 8-bit integer results(4 Y, 4 U, 4 V) and construct the 3 32-bit integers with bit-wise operation?
Even though this question was asked 2 years ago, i think some working code would help here. In terms of the initial concerns about bad performance when directly accessing 8-bit values, it's better to perform 32-bit direct access when possible.
Some time ago I've developed and used the following OpenCL kernel to convert ARGB (typical windows bitmap pixel layout) to the y-plane (full sized), u/v-half-plane (quarter sized) memory layout as input for libx264 encoding.
__kernel void ARGB2YUV (
__global unsigned int * sourceImage,
__global unsigned int * destImage,
unsigned int srcHeight,
unsigned int srcWidth,
unsigned int yuvStride // must be srcWidth/4 since we pack 4 pixels into 1 Y-unit (with 4 y-pixels)
)
{
int i,j;
unsigned int RGBs [ 4 ];
unsigned int posSrc, RGB, Value4 = 0, Value, yuvStrideHalf, srcHeightHalf, yPlaneOffset, posOffset;
unsigned char red, green, blue;
unsigned int posX = get_global_id(0);
unsigned int posY = get_global_id(1);
if ( posX < yuvStride ) {
// Y plane - pack 4 y's within each work item
if ( posY >= srcHeight )
return;
posSrc = (posY * srcWidth) + (posX * 4);
RGBs [ 0 ] = sourceImage [ posSrc ];
RGBs [ 1 ] = sourceImage [ posSrc + 1 ];
RGBs [ 2 ] = sourceImage [ posSrc + 2 ];
RGBs [ 3 ] = sourceImage [ posSrc + 3 ];
for ( i=0; i<4; i++ ) {
RGB = RGBs [ i ];
blue = RGB & 0xff; green = (RGB >> 8) & 0xff; red = (RGB >> 16) & 0xff;
Value = ( ( 66 * red + 129 * green + 25 * blue ) >> 8 ) + 16;
Value4 |= (Value << (i * 8));
}
destImage [ (posY * yuvStride) + posX ] = Value4;
return;
}
posX -= yuvStride;
yuvStrideHalf = yuvStride >> 1;
// U plane - pack 4 u's within each work item
if ( posX >= yuvStrideHalf )
return;
srcHeightHalf = srcHeight >> 1;
if ( posY < srcHeightHalf ) {
posSrc = ((posY * 2) * srcWidth) + (posX * 8);
RGBs [ 0 ] = sourceImage [ posSrc ];
RGBs [ 1 ] = sourceImage [ posSrc + 2 ];
RGBs [ 2 ] = sourceImage [ posSrc + 4 ];
RGBs [ 3 ] = sourceImage [ posSrc + 6 ];
for ( i=0; i<4; i++ ) {
RGB = RGBs [ i ];
blue = RGB & 0xff; green = (RGB >> 8) & 0xff; red = (RGB >> 16) & 0xff;
Value = ( ( -38 * red + -74 * green + 112 * blue ) >> 8 ) + 128;
Value4 |= (Value << (i * 8));
}
yPlaneOffset = yuvStride * srcHeight;
posOffset = (posY * yuvStrideHalf) + posX;
destImage [ yPlaneOffset + posOffset ] = Value4;
return;
}
posY -= srcHeightHalf;
if ( posY >= srcHeightHalf )
return;
// V plane - pack 4 v's within each work item
posSrc = ((posY * 2) * srcWidth) + (posX * 8);
RGBs [ 0 ] = sourceImage [ posSrc ];
RGBs [ 1 ] = sourceImage [ posSrc + 2 ];
RGBs [ 2 ] = sourceImage [ posSrc + 4 ];
RGBs [ 3 ] = sourceImage [ posSrc + 6 ];
for ( i=0; i<4; i++ ) {
RGB = RGBs [ i ];
blue = RGB & 0xff; green = (RGB >> 8) & 0xff; red = (RGB >> 16) & 0xff;
Value = ( ( 112 * red + -94 * green + -18 * blue ) >> 8 ) + 128;
Value4 |= (Value << (i * 8));
}
yPlaneOffset = yuvStride * srcHeight;
posOffset = (posY * yuvStrideHalf) + posX;
destImage [ yPlaneOffset + (yPlaneOffset >> 2) + posOffset ] = Value4;
return;
}
This code performs only global 32-bit memory access while 8-bit processing happens within each work item.
Oh.. and the proper code to invoke the kernel
unsigned int width = 1024;
unsigned int height = 768;
unsigned int frameSize = width * height;
const unsigned int argbSize = frameSize * 4; // ARGB pixels
const unsigned int yuvSize = frameSize + (frameSize >> 1); // Y,U,V planes
const unsigned int yuvStride = width >> 2; // since we pack 4 RGBs into "one" YYYY
// Allocates ARGB buffer
ocl_rgb_buffer = clCreateBuffer ( context, CL_MEM_READ_WRITE, argbSize, 0, &error );
// ... error handling ...
ocl_yuv_buffer = clCreateBuffer ( context, CL_MEM_READ_WRITE, yuvSize, 0, &error );
// ... error handling ...
error = clSetKernelArg ( kernel, 0, sizeof(cl_mem), &ocl_rgb_buffer );
error |= clSetKernelArg ( kernel, 1, sizeof(cl_mem), &ocl_yuv_buffer );
error |= clSetKernelArg ( kernel, 2, sizeof(unsigned int), &height);
error |= clSetKernelArg ( kernel, 3, sizeof(unsigned int), &width);
error |= clSetKernelArg ( kernel, 4, sizeof(unsigned int), &yuvStride);
// ... error handling ...
const size_t local_ws[] = { 16, 16 };
const size_t global_ws[] = { yuvStride + (yuvStride >> 1), height };
error = clEnqueueNDRangeKernel ( queue, kernel, 2, NULL, global_ws, local_ws, 0, NULL, NULL );
// ... error handling ...
Note: have a look at the work item calculations. Some additional code needs to be added (e.g. using mod so as to add sufficient spare items) to make sure that work item sizes fit to local work sizes.
Like this? Use int4 unless your platform can use int3. Also you can pack 5 pixels into an int16 so you are wasting 1/16 instead of 1/4 of the memory bandwidth.
__kernel void rgb2yuv( __global int3* input, __global int3* output){
rgb = input[get_global_id(0)];
R = rgb.x;
G = rgb.y;
B = rgb.z;
yuv.x = ( ( 66 * R + 129 * G + 25 * B + 128) >> 8) + 16;
yuv.y = ( ( -38 * R - 74 * G + 112 * B + 128) >> 8) + 128;
yuv.z = ( ( 112 * R - 94 * G - 18 * B + 128) >> 8) + 128;
output[get_global_id(0)] = yuv;
}
Along with opencl specification data type int3 doesn't exists.
Page 123:
Supported values of n are 2, 4, 8, and 16...
In your kernel variables rgb, R, G, B, and yuv should be at least __private int4.
OpenCL 1.1 added support for typen where n = 3. However, I strongly recommend you don't use it. Different vendor implementations have different bugs, and it's not saving you anything.