How to sum __m256 horizontally? - sse

I would like to horizontally sum the components of a __m256 vector using AVX instructions.
In SSE I could use
_mm_hadd_ps(xmm,xmm);
_mm_hadd_ps(xmm,xmm);
to get the result at the first component of the vector, but this does not scale with the 256 bit version of the function (_mm256_hadd_ps).
What is the best way to compute the horizontal sum of a __m256 vector?

This version should be optimal for both Intel Sandy/Ivy Bridge and AMD Bulldozer, and later CPUs.
// x = ( x7, x6, x5, x4, x3, x2, x1, x0 )
float sum8(__m256 x) {
// hiQuad = ( x7, x6, x5, x4 )
const __m128 hiQuad = _mm256_extractf128_ps(x, 1);
// loQuad = ( x3, x2, x1, x0 )
const __m128 loQuad = _mm256_castps256_ps128(x);
// sumQuad = ( x3 + x7, x2 + x6, x1 + x5, x0 + x4 )
const __m128 sumQuad = _mm_add_ps(loQuad, hiQuad);
// loDual = ( -, -, x1 + x5, x0 + x4 )
const __m128 loDual = sumQuad;
// hiDual = ( -, -, x3 + x7, x2 + x6 )
const __m128 hiDual = _mm_movehl_ps(sumQuad, sumQuad);
// sumDual = ( -, -, x1 + x3 + x5 + x7, x0 + x2 + x4 + x6 )
const __m128 sumDual = _mm_add_ps(loDual, hiDual);
// lo = ( -, -, -, x0 + x2 + x4 + x6 )
const __m128 lo = sumDual;
// hi = ( -, -, -, x1 + x3 + x5 + x7 )
const __m128 hi = _mm_shuffle_ps(sumDual, sumDual, 0x1);
// sum = ( -, -, -, x0 + x1 + x2 + x3 + x4 + x5 + x6 + x7 )
const __m128 sum = _mm_add_ss(lo, hi);
return _mm_cvtss_f32(sum);
}
haddps is not efficient on any CPU; the best you can do is one shuffle (to extract the high half) and one add, repeat until one element left. Narrowing to 128-bit as the first step benefits AMD before Zen2, and is not a bad thing anywhere.
See Fastest way to do horizontal SSE vector sum on x86 for more details about efficiency.

This can be done with the following code:
ymm2 = _mm256_permute2f128_ps(ymm , ymm , 1);
ymm = _mm256_add_ps(ymm, ymm2);
ymm = _mm256_hadd_ps(ymm, ymm);
ymm = _mm256_hadd_ps(ymm, ymm);
but there might be a better solution.

Related

Is there a way to check if an XYZ triplet is a valid color?

The XYZ color space encompasses all possible colors, not just those which can be generated by a particular device like a monitor. Not all XYZ triplets represent a color that is physically possible. Is there a way, given an XYZ triplet, to determine if it represents a real color?
I wanted to generate a CIE 1931 chromaticity diagram (seen bellow) for myself, but wasn't sure how to go about it. It's easy to, for example, take all combinations of sRGB triplets and then transform them into the xy coordinates of the chromaticity diagram and then plot them. You cannot use this same approach in the XYZ color space though since not all combinations are valid colors. So far the best I have come up with is a stochastic approach, where I generate a random spectral distribution by summing a random number of random Gaussians, then converting it to XYZ using the standard observer functions.
Having thought about it a little more I felt the obvious solution is to generate a list of xy points around the edge of spectral locus, corresponding to pure monochromatic colors. It seems to me that this can be done by directly inputting the visible frequencies (~380-780nm) into the CIE XYZ standard observer color matching functions. Treating these points like a convex polygon you could determine if a point is within the spectral locus using one algorithm or another. In my case, since what I really wanted to do is simply generate the chromaticity diagram, I simply input these points into a graphics library's polygon drawing routine and then for each pixel of the polygon I can transform it into sRGB.
I believe this solution is similar to the one used by the library that Kel linked in a comment. I'm not entirely sure, as I am not familiar with Python.
function RGBfromXYZ(X, Y, Z) {
const R = 3.2404542 * X - 1.5371385 * Y - 0.4985314 * Z
const G = -0.969266 * X + 1.8760108 * Y + 0.0415560 * Z
const B = 0.0556434 * X - 0.2040259 * Y + 1.0572252 * Z
return [R, G, B]
}
function XYZfromYxy(Y, x, y) {
const X = Y / y * x
const Z = Y / y * (1 - x - y)
return [X, Y, Z]
}
function srgb_from_linear(x) {
if (x <= 0.0031308) {
return x * 12.92
} else {
return 1.055 * Math.pow(x, 1/2.4) - 0.055
}
}
// Analytic Approximations to the CIE XYZ Color Matching Functions
// from Sloan http://jcgt.org/published/0002/02/01/paper.pdf
function xFit_1931(x) {
const t1 = (x - 442) * (x < 442 ? 0.0624 : 0.0374)
const t2 = (x -599.8) * (x < 599.8 ? 0.0264 : 0.0323)
const t3 = (x - 501.1) * (x < 501.1 ? 0.0490 : 0.0382)
return 0.362 * Math.exp(-0.5 * t1 * t1) + 1.056 * Math.exp(-0.5 * t2 * t2) - 0.065 * Math.exp(-0.5 * t3 * t3)
}
function yFit_1931(x) {
const t1 = (x - 568.8) * (x < 568.8 ? 0.0213 : 0.0247)
const t2 = (x - 530.9) * (x < 530.9 ? 0.0613 : 0.0322)
return 0.821 * Math.exp(-0.5 * t1 * t1) + 0.286 * Math.exp(-0.5 * t2 * t2)
}
function zFit_1931(x) {
const t1 = (x - 437) * (x < 437 ? 0.0845 : 0.0278)
const t2 = (x - 459) * (x < 459 ? 0.0385 : 0.0725)
return 1.217 * Math.exp(-0.5 * t1 * t1) + 0.681 * Math.exp(-0.5 * t2 * t2)
}
const canvas = document.createElement("canvas")
document.body.append(canvas)
canvas.width = canvas.height = 512
const ctx = canvas.getContext("2d")
const locus_points = []
for (let i = 440; i < 650; ++i) {
const [X, Y, Z] = [xFit_1931(i), yFit_1931(i), zFit_1931(i)]
const x = (X / (X + Y + Z)) * canvas.width
const y = (Y / (X + Y + Z)) * canvas.height
locus_points.push([x, y])
}
ctx.beginPath()
ctx.moveTo(...locus_points[0])
locus_points.slice(1).forEach(point => ctx.lineTo(...point))
ctx.closePath()
ctx.fill()
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height)
for (let y = 0; y < canvas.height; ++y) {
for (let x = 0; x < canvas.width; ++x) {
const alpha = imageData.data[(y * canvas.width + x) * 4 + 3]
if (alpha > 0) {
const [X, Y, Z] = XYZfromYxy(1, x / canvas.width, y / canvas.height)
const [R, G, B] = RGBfromXYZ(X, Y, Z)
const r = Math.round(srgb_from_linear(R / Math.sqrt(R**2 + G**2 + B**2)) * 255)
const g = Math.round(srgb_from_linear(G / Math.sqrt(R**2 + G**2 + B**2)) * 255)
const b = Math.round(srgb_from_linear(B / Math.sqrt(R**2 + G**2 + B**2)) * 255)
imageData.data[(y * canvas.width + x) * 4 + 0] = r
imageData.data[(y * canvas.width + x) * 4 + 1] = g
imageData.data[(y * canvas.width + x) * 4 + 2] = b
}
}
}
ctx.putImageData(imageData, 0, 0)

Convert cv::Vec4f line to cv::Vec2f

I have a pair of Cartesian coordinates that represent a line in an image. I would like to convert this line to polar form and draw it over the image.
e.g
cv::Vec4f line {10,20,60,70};
float x1 = line[0];
float y1 = line[1];
float x2 = line[2];
float y2 = line[3];
I want this line to be represented in cv::Vec2f form(rho,theta).
Taking care of rho & theta with all possible slopes.
Given are the image dimensions :: w and h;
w = image.cols
h = image.rows
How can I achieve this.
N.B: We can also assume that the line can be an extended one running across the image.
for (size_t i = 0; i < lines.size(); i++)
{
int x1 = lines[i][0];
int y1 = lines[i][1];
int x2 = lines[i][2];
int y2 = lines[i][3];
float d = sqrt(((y1-y2)*(y1-y2)) + ((x2-x1)*(x2-x1)) );
float rho = (y1*x2 - y2*x1)/d;
float theta = atan2(x2 - x1,y1-y2) ;
if(rho < 0){
theta *= -1;
rho *= -1;
}
linv2f.push_back(cv::Vec2f(rho,theta));
}
The above approach doesnt give me results when I plot the lines I dont get the lines that are overlapping their original vec4f form.
I use this to convert vec2f to vec4f for testing :
cv::Vec4f cvtVec2fLine(const cv::Vec2f& data, const cv::Mat& img)
{
float const rho = data[0];
float const theta = data[1];
cv::Point pt1,pt2;
if((theta < CV_PI/4. || theta > 3. * CV_PI/4.)){
pt1 = cv::Point(rho / std::cos(theta), 0);
pt2 = cv::Point( (rho - img.rows * std::sin(theta))/std::cos(theta), img.rows);
}else {
pt1 = cv::Point(0, rho / std::sin(theta));
pt2 = cv::Point(img.cols, (rho - img.cols * std::cos(theta))/std::sin(theta));
}
cv::Vec4f l;
l[0] = pt1.x;
l[1] = pt1.y;
l[2] = pt2.x;
l[3] = pt2.y;
return l;
}
rho-theta equation has form
x * Cos(Theta) + y * Sin(Theta) - Rho = 0
We want to represent equation 'by two points' into rho-theta form (page 92 in pdf here). If we have
x * A + y * B - C = 0
and need coefficients in trigonometric form, we can divide all equation by magnitude of (A,B) coefficient vector.
D = Length(A,B) = Math.Hypot(A,B)
x * A/D + y * B/D - C/D = 0
note that (A/D)^2 + (B/D)^2 = 1 - basic trigonometric equality, so we can consider A/D and B/D as cosine and sine of some angle theta.
Your line equation is
(y-y1) * (x2-x1) - (x-x1) * (y2-y1) = 0
or
x * (y1-y2) + y * (x2-x1) - (y1 * x2 - y2 * x1) = 0
let
D = Sqrt((y1-y2)^2 + (x2-x1)^2)
so
Theta = ArcTan2(x2-x1, y1-y2)
Rho = (y1 * x2 - y2 * x1) / D
edited
If Rho is negative, change sign of Rho and shift Theta by Pi
Example:
x1=1,y1=0, x2=0,y2=1
Theta = atan2(-1,-1)=-3*Pi/4
D=Sqrt(2)
Rho=-Sqrt(2)/2 negative =>
Rho = Sqrt(2)/2
Theta = Pi/4
Back substitutuon - find points of intersection with axes
0 * Sqrt(2)/2 + y0 * Sqrt(2)/2 - Sqrt(2)/2 = 0
x=0 y=1
x0 * Sqrt(2)/2 + 0 * Sqrt(2)/2 - Sqrt(2)/2 = 0
x=1 y=0

Specify an origin to warpPerspective() function in OpenCV 2.x

I try to specify a different origin for the warpPerspective() function than the basic (0,0), in order to apply the transform independently of the support image size. I added a CvPoint parameter to the original code, but I can't find where to use these coordinates. I tried to use them in the computation of X0, Y0 and W0 but it didn't work, this only shift the transformed image in the resulting image. Any idea?
Here the code:
void warpPerspective( const Mat& src, Mat& dst, const Mat& M0, Size dsize,
int flags, int borderType, const Scalar& borderValue, CvPoint origin )
{
dst.create( dsize, src.type() );
const int BLOCK_SZ = 32;
short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
double M[9];
Mat _M(3, 3, CV_64F, M);
int interpolation = flags & INTER_MAX;
if( interpolation == INTER_AREA )
interpolation = INTER_LINEAR;
CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3 );
M0.convertTo(_M, _M.type());
if( !(flags & WARP_INVERSE_MAP) )
invert(_M, _M);
int x, y, x1, y1, width = dst.cols, height = dst.rows;
int bh0 = std::min(BLOCK_SZ/2, height);
int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width);
bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height);
for( y = 0; y < height; y += bh0 )
{
for( x = 0; x < width; x += bw0 )
{
int bw = std::min( bw0, width - x);
int bh = std::min( bh0, height - y);
Mat _XY(bh, bw, CV_16SC2, XY), _A;
Mat dpart(dst, Rect(x, y, bw, bh));
for( y1 = 0; y1 < bh; y1++ )
{
short* xy = XY + y1*bw*2;
double X0 = M[0]*x + M[1]*(y + y1) + M[2];
double Y0 = M[3]*x + M[4]*(y + y1) + M[5];
double W0 = M[6]*x + M[7]*(y + y1) + M[8];
if( interpolation == INTER_NEAREST )
for( x1 = 0; x1 < bw; x1++ )
{
double W = W0 + M[6]*x1;
W = W ? 1./W : 0;
int X = saturate_cast<int>((X0 + M[0]*x1)*W);
int Y = saturate_cast<int>((Y0 + M[3]*x1)*W);
xy[x1*2] = (short)X;
xy[x1*2+1] = (short)Y;
}
else
{
short* alpha = A + y1*bw;
for( x1 = 0; x1 < bw; x1++ )
{
double W = W0 + M[6]*x1;
W = W ? INTER_TAB_SIZE/W : 0;
int X = saturate_cast<int>((X0 + M[0]*x1)*W);
int Y = saturate_cast<int>((Y0 + M[3]*x1)*W);
xy[x1*2] = (short)(X >> INTER_BITS);
xy[x1*2+1] = (short)(Y >> INTER_BITS);
alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
(X & (INTER_TAB_SIZE-1)));
}
}
}
if( interpolation == INTER_NEAREST )
remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
else
{
Mat _A(bh, bw, CV_16U, A);
remap( src, dpart, _XY, _A, interpolation, borderType, borderValue );
}
}
}
}
Ok, I found it myself! You have 2 things to do:
compute the destination dimensions in source referential, and do the remap using these dimensions ;
increment the computed points coordinates.
Here is the code thus transformed:
void warpPerspective( const Mat& src, Mat& dst, const Mat& M0, Size dsize,
int flags, int borderType, const Scalar& borderValue, CvPoint origin )
{
dst.create( dsize, src.type() );
const int BLOCK_SZ = 32;
short XY[BLOCK_SZ*BLOCK_SZ*2], A[BLOCK_SZ*BLOCK_SZ];
double M[9];
Mat _M(3, 3, CV_64F, M);
int interpolation = flags & INTER_MAX;
if( interpolation == INTER_AREA )
interpolation = INTER_LINEAR;
CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3 );
M0.convertTo(_M, _M.type());
if( !(flags & WARP_INVERSE_MAP) )
invert(_M, _M);
int x, xDest, y, yDest, x1, y1, width = dst.cols, height = dst.rows;
int bh0 = std::min(BLOCK_SZ/2, height);
int bw0 = std::min(BLOCK_SZ*BLOCK_SZ/bh0, width);
bh0 = std::min(BLOCK_SZ*BLOCK_SZ/bw0, height);
for( y = -origin.y, yDest = 0; y < height; y += bh0, yDest += bh0 )
{
for( x = -origin.x, xDest = 0; x < width; x += bw0, xDest += bw0 )
{
int bw = std::min( bw0, width - x);
int bh = std::min( bh0, height - y);
// to avoid dimensions errors
if (bw <= 0 || bh <= 0)
break;
Mat _XY(bh, bw, CV_16SC2, XY), _A;
Mat dpart(dst, Rect(xDest, yDest, bw, bh));
for( y1 = 0; y1 < bh; y1++ )
{
short* xy = XY + y1*bw*2;
double X0 = M[0]*x + M[1]*(y + y1) + M[2];
double Y0 = M[3]*x + M[4]*(y + y1) + M[5];
double W0 = M[6]*x + M[7]*(y + y1) + M[8];
if( interpolation == INTER_NEAREST )
for( x1 = 0; x1 < bw; x1++ )
{
double W = W0 + M[6]*x1;
W = W ? 1./W : 0;
int X = saturate_cast<int>((X0 + M[0]*x1)*W);
int Y = saturate_cast<int>((Y0 + M[3]*x1)*W);
xy[x1*2] = (short)X;
xy[x1*2+1] = (short)Y;
}
else
{
short* alpha = A + y1*bw;
for( x1 = 0; x1 < bw; x1++ )
{
double W = W0 + M[6]*x1;
W = W ? INTER_TAB_SIZE/W : 0;
int X = saturate_cast<int>((X0 + M[0]*x1)*W);
int Y = saturate_cast<int>((Y0 + M[3]*x1)*W);
xy[x1*2] = (short)(X >> INTER_BITS) + origin.x;
xy[x1*2+1] = (short)(Y >> INTER_BITS) + origin.y;
alpha[x1] = (short)((Y & (INTER_TAB_SIZE-1))*INTER_TAB_SIZE +
(X & (INTER_TAB_SIZE-1)));
}
}
}
if( interpolation == INTER_NEAREST )
remap( src, dpart, _XY, Mat(), interpolation, borderType, borderValue );
else
{
Mat _A(bh, bw, CV_16U, A);
remap( src, dpart, _XY, _A, interpolation, borderType, borderValue );
}
}
}
}
with this function:
CvPoint transformPoint(const CvPoint pointToTransform, const CvMat* matrix) {
double coordinates[3] = {pointToTransform.x, pointToTransform.y, 1};
CvMat originVector = cvMat(3, 1, CV_64F, coordinates);
CvMat transformedVector = cvMat(3, 1, CV_64F, coordinates);
cvMatMul(matrix, &originVector, &transformedVector);
CvPoint outputPoint = cvPoint((int)(cvmGet(&transformedVector, 0, 0) / cvmGet(&transformedVector, 2, 0)), (int)(cvmGet(&transformedVector, 1, 0) / cvmGet(&transformedVector, 2, 0)));
return outputPoint;
}
A much simpler and cleaner solution is to modify the perspective transformation. You can do a translation which moves the origin to the desired position, then do the perspective transformation and finally do the inverse translation.
Here is a small example program in python, which rotates an image by 45 degrees around the point(100, 100):
import cv2
import numpy as np
def translation_mat(dx, dy):
return np.array([1, 0, dx, 0, 1, dy, 0, 0, 1]).reshape((3,3))
def main():
img = cv2.imread(r"pigeon.png", cv2.IMREAD_GRAYSCALE)
# a simple rotation by 45 degrees
rot = np.array([np.sin(np.pi/4), -np.cos(np.pi/4), 0, np.cos(np.pi/4), np.sin(np.pi/4), 0, 0, 0, 1]).reshape((3,3))
t1 = translation_mat(-100, -100)
t2 = translation_mat(100, 100)
rot_shifted = t2.dot(rot.dot(t1))
size = (img.shape[1], img.shape[0])
img1 = cv2.warpPerspective(img, rot, size)
img2 = cv2.warpPerspective(img, rot_shifted, size)
cv2.imshow("Original image", img)
cv2.imshow("Rotated around (0,0)", img1)
cv2.imshow("Rotated around(100, 100)", img2)
cv2.waitKey(0)
if __name__ == '__main__':
main()
Not that you read the order of transformations from right to left.
rot_shifted = t2.dot(rot.dot(t1))
will apply t1 first, then rot, and then t2.
For those of you looking for this piece in Python, here's a start. I'm not 100% sure it works as I've stripped some optimizations from it. Also there is an issue with lineair interpolation, I simply didn't use it but you might want to take a closer look if you do.
import cv2
import numpy as np
def warp_perspective(src, M, (width, height), (origin_x, origin_y),
flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT,
borderValue=0, dst=None):
"""
Implementation in Python using base code from
http://stackoverflow.com/questions/4279008/specify-an-origin-to-warpperspective-function-in-opencv-2-x
Note there is an issue with linear interpolation.
"""
B_SIZE = 32
if dst == None:
dst = np.zeros((height, width, 3), dtype=src.dtype)
# Set interpolation mode.
interpolation = flags & cv2.INTER_MAX
if interpolation == cv2.INTER_AREA:
raise Exception('Area interpolation is not supported!')
# Prepare matrix.
M = M.astype(np.float64)
if not(flags & cv2.WARP_INVERSE_MAP):
M = cv2.invert(M)[1]
M = M.flatten()
x_dst = y_dst = 0
for y in xrange(-origin_y, height, B_SIZE):
for x in xrange(-origin_x, width, B_SIZE):
print (x, y)
# Block dimensions.
bw = min(B_SIZE, width - x_dst)
bh = min(B_SIZE, height - y_dst)
# To avoid dimension errors.
if bw <= 0 or bh <= 0:
break
# View of the destination array.
dpart = dst[y_dst:y_dst+bh, x_dst:x_dst+bw]
# Original code used view of array here, but we're using numpy array's.
XY = np.zeros((bh, bw, 2), dtype=np.int16)
A = np.zeros((bh, bw), dtype=np.uint16)
for y1 in xrange(bh):
X0 = M[0]*x + M[1]*(y + y1) + M[2]
Y0 = M[3]*x + M[4]*(y + y1) + M[5]
W0 = M[6]*x + M[7]*(y + y1) + M[8]
if interpolation == cv2.INTER_NEAREST:
for x1 in xrange(bw):
W = np.float64(W0 + M[6]*x1);
if W != 0:
W = np.float64(1.0)/W
X = np.int32((X0 + M[0]*x1)*W)
Y = np.int32((Y0 + M[3]*x1)*W)
XY[y1, x1][0] = np.int16(X)
XY[y1, x1][1] = np.int16(Y)
else:
for x1 in xrange(bw):
W = np.float64(W0 + M[6]*x1);
if W != 0:
W = cv2.INTER_TAB_SIZE/W
X = np.int32((X0 + M[0]*x1)*W)
Y = np.int32((Y0 + M[3]*x1)*W)
XY[y1, x1][0] = np.int16((X >> cv2.INTER_BITS) + origin_x)
XY[y1, x1][1] = np.int16((Y >> cv2.INTER_BITS) + origin_y)
A[y1, x1] = np.int16(((Y & (cv2.INTER_TAB_SIZE-1))*cv2.INTER_TAB_SIZE + (X & (cv2.INTER_TAB_SIZE-1))))
if interpolation == cv2.INTER_NEAREST:
cv2.remap(src, XY, None, interpolation, dst=dpart,
borderMode=borderMode, borderValue=borderValue)
else:
cv2.remap(src, XY, A, interpolation, dst=dpart,
borderMode=borderMode, borderValue=borderValue)
x_dst += B_SIZE
x_dst = 0
y_dst += B_SIZE
return dst

Drawing a line between two points

Here's what I got so far. I rewrote the code to simplify things a bit. Previous code wasn't actually the real, basic algorithm. It had fluff that I didn't need. I answered the question about pitch, and below you'll see some images of my test results.
local function Line (buf, x1, y1, x2, y2, color, pitch)
-- identify the first pixel
local n = x1 + y1 * pitch
-- // difference between starting and ending points
local dx = x2 - x1;
local dy = y2 - y1;
local m = dy / dx
local err = m - 1
if (dx > dy) then -- // dx is the major axis
local j = y1
local i = x1
while i < x2 do
buf.buffer[j * pitch + i] = color
if (err >= 0) then
i = i + 1
err = err - 1
end
j = j + 1
err = err + m
end
else -- // dy is the major axis
local j = x1
local i = y1
while i < y2 do
buf.buffer[i * pitch + j] = color
if (err >= 0) then
i = i + 1
err = err - 1
end
j = j + 1
err = err + m
end
end
end
-- (visdata[2][1][576], int isBeat, int *framebuffer, int *fbout, int w, int h
function LibAVSSuperScope:Render(visdata, isBeat, framebuffer, fbout, w, h)
local size = 5
Line (self.buffer, 0, 0, 24, 24, 0xffff00, 24)
do return end
end
Edit: Oh I just realized something. 0,0 is in the lower left-hand corner. So the function's sort of working, but it's overlapping and slanted.
Edit2:
Yeah, this whole thing's broken. I'm plugging numbers into Line() and getting all sort of results. Let me show you some.
Here's Line (self.buffer, 0, 0, 23, 23, 0x00ffff, 24 * 2)
And here's Line (self.buffer, 0, 1, 23, 23, 0x00ffff, 24 * 2)
Edit: Wow, doing Line (self.buffer, 0, 24, 24, 24, 0x00ffff, 24 * 2) uses way too much CPU time.
Edit: Here's another image using this algorithm. The yellow dots are starting points.
Line (self.buffer, 0, 0, 24, 24, 0xff0000, 24)
Line (self.buffer, 0, 12, 23, 23, 0x00ff00, 24)
Line (self.buffer, 12, 0, 23, 23, 0x0000ff, 24)
Edit: And yes, that blue line wraps around.
This one works.
Line (self.buffer, 0, 0, 23, 23, 0xff0000, 24 * 2)
Line (self.buffer, 0, 5, 23, 23, 0x00ff00, 24)
Line (self.buffer, 12, 0, 23, 23, 0x0000ff, 24)
--
local function Line (buf, x0, y0, x1, y1, color, pitch)
local dx = x1 - x0;
local dy = y1 - y0;
buf.buffer[x0 + y0 * pitch] = color
if (dx ~= 0) then
local m = dy / dx;
local b = y0 - m*x0;
if x1 > x0 then
dx = 1
else
dx = -1
end
while x0 ~= x1 do
x0 = x0 + dx
y0 = math.floor(m*x0 + b + 0.5);
buf.buffer[x0 + y0 * pitch] = color
end
end
end
Here's the spiral.
The one below dances around like a music visualization, but we're just feeding it random data. I think the line quality could be better.
This is what I settled on. I just had to find valid information on that Bresenham algorithm. Thanks cs-unc for the information about various line algorithms, from simple to complex.
function LibBuffer:Line4(x0, y0, x1, y1, color, pitch)
local dx = x1 - x0;
local dy = y1 - y0;
local stepx, stepy
if dy < 0 then
dy = -dy
stepy = -1
else
stepy = 1
end
if dx < 0 then
dx = -dx
stepx = -1
else
stepx = 1
end
self.buffer[x0 + y0 * pitch] = color
if dx > dy then
local fraction = dy - bit.rshift(dx, 1)
while x0 ~= x1 do
if fraction >= 0 then
y0 = y0 + stepy
fraction = fraction - dx
end
x0 = x0 + stepx
fraction = fraction + dy
self.buffer[floor(y0) * pitch + floor(x0)] = color
end
else
local fraction = dx - bit.rshift(dy, 1)
while y0 ~= y1 do
if fraction >= 0 then
x0 = x0 + stepx
fraction = fraction - dy
end
y0 = y0 + stepy
fraction = fraction + dx
self.buffer[floor(y0) * pitch + floor(x0)] = color
end
end
end
Here's what this one looks like.

Find the tangent of a point on a cubic bezier curve

For a cubic Bézier curve, with the usual four points a, b, c and d,
for a given value t,
how to most elegantly find the tangent at that point?
The tangent of a curve is simply its derivative. The parametric equation that Michal uses:
P(t) = (1 - t)^3 * P0 + 3t(1-t)^2 * P1 + 3t^2 (1-t) * P2 + t^3 * P3
should have a derivative of
dP(t) / dt = -3(1-t)^2 * P0 + 3(1-t)^2 * P1 - 6t(1-t) * P1 - 3t^2 * P2 + 6t(1-t) * P2 + 3t^2 * P3
Which, by the way, appears to be wrong in your earlier question. I believe you're using the slope for a quadratic Bezier curve there, not cubic.
From there, it should be trivial to implement a C function that performs this calculation, like Michal has already provided for the curve itself.
Here is fully tested code to copy and paste:
It draws approxidistant points along the curve, and it draws the tangents.
bezierInterpolation finds the points
bezierTangent finds the tangents
There are TWO VERSIONS of bezierInterpolation supplied below:
bezierInterpolation works perfectly.
altBezierInterpolation is exactly the same, BUT it is written in an expanded, clear, explanatory manner. It makes the arithmetic much easier to understand.
Use either of those two routines: the results are identical.
In both cases, use bezierTangent to find the tangents. (Note: Michal's fabulous code base here.)
A full example of how to use with drawRect: is also included.
// MBBezierView.m original BY MICHAL stackoverflow #4058979
#import "MBBezierView.h"
CGFloat bezierInterpolation(
CGFloat t, CGFloat a, CGFloat b, CGFloat c, CGFloat d) {
// see also below for another way to do this, that follows the 'coefficients'
// idea, and is a little clearer
CGFloat t2 = t * t;
CGFloat t3 = t2 * t;
return a + (-a * 3 + t * (3 * a - a * t)) * t
+ (3 * b + t * (-6 * b + b * 3 * t)) * t
+ (c * 3 - c * 3 * t) * t2
+ d * t3;
}
CGFloat altBezierInterpolation(
CGFloat t, CGFloat a, CGFloat b, CGFloat c, CGFloat d)
{
// here's an alternative to Michal's bezierInterpolation above.
// the result is absolutely identical.
// of course, you could calculate the four 'coefficients' only once for
// both this and the slope calculation, if desired.
CGFloat C1 = ( d - (3.0 * c) + (3.0 * b) - a );
CGFloat C2 = ( (3.0 * c) - (6.0 * b) + (3.0 * a) );
CGFloat C3 = ( (3.0 * b) - (3.0 * a) );
CGFloat C4 = ( a );
// it's now easy to calculate the point, using those coefficients:
return ( C1*t*t*t + C2*t*t + C3*t + C4 );
}
CGFloat bezierTangent(CGFloat t, CGFloat a, CGFloat b, CGFloat c, CGFloat d)
{
// note that abcd are aka x0 x1 x2 x3
/* the four coefficients ..
A = x3 - 3 * x2 + 3 * x1 - x0
B = 3 * x2 - 6 * x1 + 3 * x0
C = 3 * x1 - 3 * x0
D = x0
and then...
Vx = 3At2 + 2Bt + C */
// first calcuate what are usually know as the coeffients,
// they are trivial based on the four control points:
CGFloat C1 = ( d - (3.0 * c) + (3.0 * b) - a );
CGFloat C2 = ( (3.0 * c) - (6.0 * b) + (3.0 * a) );
CGFloat C3 = ( (3.0 * b) - (3.0 * a) );
CGFloat C4 = ( a ); // (not needed for this calculation)
// finally it is easy to calculate the slope element,
// using those coefficients:
return ( ( 3.0 * C1 * t* t ) + ( 2.0 * C2 * t ) + C3 );
// note that this routine works for both the x and y side;
// simply run this routine twice, once for x once for y
// note that there are sometimes said to be 8 (not 4) coefficients,
// these are simply the four for x and four for y,
// calculated as above in each case.
}
#implementation MBBezierView
- (void)drawRect:(CGRect)rect {
CGPoint p1, p2, p3, p4;
p1 = CGPointMake(30, rect.size.height * 0.33);
p2 = CGPointMake(CGRectGetMidX(rect), CGRectGetMinY(rect));
p3 = CGPointMake(CGRectGetMidX(rect), CGRectGetMaxY(rect));
p4 = CGPointMake(-30 + CGRectGetMaxX(rect), rect.size.height * 0.66);
[[UIColor blackColor] set];
[[UIBezierPath bezierPathWithRect:rect] fill];
[[UIColor redColor] setStroke];
UIBezierPath *bezierPath = [[[UIBezierPath alloc] init] autorelease];
[bezierPath moveToPoint:p1];
[bezierPath addCurveToPoint:p4 controlPoint1:p2 controlPoint2:p3];
[bezierPath stroke];
[[UIColor brownColor] setStroke];
// now mark in points along the bezier!
for (CGFloat t = 0.0; t <= 1.00001; t += 0.05) {
[[UIColor brownColor] setStroke];
CGPoint point = CGPointMake(
bezierInterpolation(t, p1.x, p2.x, p3.x, p4.x),
bezierInterpolation(t, p1.y, p2.y, p3.y, p4.y));
// there, use either bezierInterpolation or altBezierInterpolation,
// identical results for the position
// just draw that point to indicate it...
UIBezierPath *pointPath =
[UIBezierPath bezierPathWithArcCenter:point
radius:5 startAngle:0 endAngle:2*M_PI clockwise:YES];
[pointPath stroke];
// now find the tangent if someone on stackoverflow knows how
CGPoint vel = CGPointMake(
bezierTangent(t, p1.x, p2.x, p3.x, p4.x),
bezierTangent(t, p1.y, p2.y, p3.y, p4.y));
// the following code simply draws an indication of the tangent
CGPoint demo = CGPointMake( point.x + (vel.x*0.3),
point.y + (vel.y*0.33) );
// (the only reason for the .3 is to make the pointers shorter)
[[UIColor whiteColor] setStroke];
UIBezierPath *vp = [UIBezierPath bezierPath];
[vp moveToPoint:point];
[vp addLineToPoint:demo];
[vp stroke];
}
}
#end
to draw that class...
MBBezierView *mm = [[MBBezierView alloc]
initWithFrame:CGRectMake(400,20, 600,700)];
[mm setNeedsDisplay];
[self addSubview:mm];
Here are the two routines to calculate approximately equidistant points, and the tangents of those, along a bezier cubic.
For clarity and reliability, these routines are written in the simplest, most explanatory, way possible.
CGFloat bezierPoint(CGFloat t, CGFloat a, CGFloat b, CGFloat c, CGFloat d)
{
CGFloat C1 = ( d - (3.0 * c) + (3.0 * b) - a );
CGFloat C2 = ( (3.0 * c) - (6.0 * b) + (3.0 * a) );
CGFloat C3 = ( (3.0 * b) - (3.0 * a) );
CGFloat C4 = ( a );
return ( C1*t*t*t + C2*t*t + C3*t + C4 );
}
CGFloat bezierTangent(CGFloat t, CGFloat a, CGFloat b, CGFloat c, CGFloat d)
{
CGFloat C1 = ( d - (3.0 * c) + (3.0 * b) - a );
CGFloat C2 = ( (3.0 * c) - (6.0 * b) + (3.0 * a) );
CGFloat C3 = ( (3.0 * b) - (3.0 * a) );
CGFloat C4 = ( a );
return ( ( 3.0 * C1 * t* t ) + ( 2.0 * C2 * t ) + C3 );
}
The four precalculated values, C1 C2 C3 C4, are sometimes called the coefficients of the bezier. (Recall that a b c d are usually called the four control points.)
Of course, t runs from 0 to 1, for example every 0.05.
Simply call these routines once for X, and then once separately for Y.
Hope it helps someone!
Important facts:
(1) It is an absolute fact that: unfortunately, there is, definitely, NO method, provided by Apple, to extract points from a UIBezierPath. True as of 2019.
(2) Don't forget it's as easy as pie to animate something along a UIBezierPath. Google many examples.
(3) Many ask, "Can't CGPathApply be used to extract the points from a UIBezierPath?" No, CGPathApply is totally unrelated: it simply gives you a list of your "instructions in making any path" (so, "start here", "draw a straight line to this point", etc etc.) The name is confusing but CGPathApply is totally unrelated to bezier paths.
For game programmers - as #Engineer points out you may well want the normal of the tangent, fortunately Apple has vector math built-in:
https://developer.apple.com/documentation/accelerate/simd/working_with_vectors
https://developer.apple.com/documentation/simd/2896658-simd_normalize
I found it too error-prone to use the supplied equations. Too easy to miss a subtle t or misplaced bracket.
By contrast, Wikipedia provides a much clearer, cleaner, derivative IMHO:
...which implements easily in code as:
3f * oneMinusT * oneMinusT * (p1 - p0)
+ 6f * t * oneMinusT * (p2 - p1)
+ 3f * t * t * (p3 - p2)
(assuming you have vector-minus configured in your language of choice; question isn't marked as ObjC specifically, and iOS now has several langs available)
Here goes my Swift implementation.
Which I tried my best to optimize for speed, by eliminating all redundant math operations. i.e. make the minimal numbers of calls to math operations. And use the least possible number of multiplications (which are much more expensive than sums).
There are 0 multiplications to create the bezier.
Then 3 multiplications to get a point of bezier.
And 2 multiplications to get a tangent to the bezier.
struct CubicBezier {
private typealias Me = CubicBezier
typealias Vector = CGVector
typealias Point = CGPoint
typealias Num = CGFloat
typealias Coeficients = (C: Num, S: Num, M: Num, L: Num)
let xCoeficients: Coeficients
let yCoeficients: Coeficients
static func coeficientsOfCurve(from c0: Num, through c1: Num, andThrough c2: Num, to c3: Num) -> Coeficients
{
let _3c0 = c0 + c0 + c0
let _3c1 = c1 + c1 + c1
let _3c2 = c2 + c2 + c2
let _6c1 = _3c1 + _3c1
let C = c3 - _3c2 + _3c1 - c0
let S = _3c2 - _6c1 + _3c0
let M = _3c1 - _3c0
let L = c0
return (C, S, M, L)
}
static func xOrYofCurveWith(coeficients coefs: Coeficients, at t: Num) -> Num
{
let (C, S, M, L) = coefs
return ((C * t + S) * t + M) * t + L
}
static func xOrYofTangentToCurveWith(coeficients coefs: Coeficients, at t: Num) -> Num
{
let (C, S, M, _) = coefs
return ((C + C + C) * t + S + S) * t + M
}
init(from start: Point, through c1: Point, andThrough c2: Point, to end: Point)
{
xCoeficients = Me.coeficientsOfCurve(from: start.x, through: c1.x, andThrough: c2.x, to: end.x)
yCoeficients = Me.coeficientsOfCurve(from: start.y, through: c1.y, andThrough: c2.y, to: end.y)
}
func x(at t: Num) -> Num {
return Me.xOrYofCurveWith(coeficients: xCoeficients, at: t)
}
func y(at t: Num) -> Num {
return Me.xOrYofCurveWith(coeficients: yCoeficients, at: t)
}
func dx(at t: Num) -> Num {
return Me.xOrYofTangentToCurveWith(coeficients: xCoeficients, at: t)
}
func dy(at t: Num) -> Num {
return Me.xOrYofTangentToCurveWith(coeficients: yCoeficients, at: t)
}
func point(at t: Num) -> Point {
return .init(x: x(at: t), y: y(at: t))
}
func tangent(at t: Num) -> Vector {
return .init(dx: dx(at: t), dy: dy(at: t))
}
}
Use like:
let bezier = CubicBezier.init(from: .zero, through: .zero, andThrough: .zero, to: .zero)
let point02 = bezier.point(at: 0.2)
let point07 = bezier.point(at: 0.7)
let tangent01 = bezier.tangent(at: 0.1)
let tangent05 = bezier.tangent(at: 0.5)
I couldn't get any of this to work until I realized that for parametric equations, (dy/dt)/(dx/dt) = dy/dx

Resources