How to pass an integer constant as generic parameter - delphi

I need to pass an integer constant as a generic parameter. For example, here is a code for working with a non-standard floating-point numbers (float16) (the code is taken from the githab and slightly modified):
const
ES = 5; // exponent size (bits)
MS = 10; // mantissa size (bits)
// plus sign (1 bit)
ESMS = ES + MS;
function FloatXXToSingle(const _in: cardinal): Single;
var
t1, t2, t3: cardinal;
begin
t1 := _in and (Cardinal(1 shl ESMS) - 1); // Non-sign bits
t2 := _in and (1 shl ESMS); // Sign bit
t3 := _in and ((1 shl ES - 1) shl MS); // Exponent
t1 := t1 shl (23 - MS); // Align mantissa on MSB
t2 := t2 shl (31 - ESMS); // Shift sign bit into position
t1 := t1 + cardinal((127-(1 shl (ES-1)-1)) shl 23); // Adjust bias
if t3 = 0 then // Denormals-as-zero
t1 := 0;
t1 := t1 or t2; // Re-insert sign bit
pCardinal(#Result)^ := t1;
end;
I want to create a generic on its basis, so that I can set the number of bits for the exponent and mantissa myself.
Now I'm doing something like this:
type
TFloat<TES, TMS> = class
const ES = sizeof(TES);
const MS = sizeof(TMS);
const ESMS = ES + MS;
class function ToSingle(const _in: cardinal): Single;
end;
class function TFloat<TES, TMS>.ToSingle(const _in: cardinal): Single;
var
t1, t2, t3: cardinal;
begin
t1 := _in and (Cardinal(1 shl ESMS) - 1); // Non-sign bits
t2 := _in and (1 shl ESMS); // Sign bit
t3 := _in and ((1 shl ES - 1) shl MS); // Exponent
t1 := t1 shl (23 - MS); // Align mantissa on MSB
t2 := t2 shl (31 - ESMS); // Shift sign bit into position
t1 := t1 + cardinal((127-(1 shl (ES-1)-1)) shl 23); // Adjust bias
if t3 = 0 then // Denormals-as-zero
t1 := 0;
t1 := t1 or t2; // Re-insert sign bit
pCardinal(#Result)^ := t1;
end;
But to use this, I have to write something like this:
type
TMyFloat = TFloat<packed record a:Integer; b:Byte; end, packed record a: Extended; end>;
begin
writeln(TMyFloat.ToSingle($1234));
end.
But this method is not at all elegant. Perhaps there is a way to directly pass two numbers to the generic: the size of the exponent and the mantissa?

Related

Is Delphi's Skewness correct

In Delphi one can calculate Skewness using System.Math's function MomentSkewKurtosis().
var m1, m2, m3, m4, skew, k: Extended;
System.Math.MomentSkewKurtosis([1.1,
3.345,
12.234,
11.945,
14.235,
16.876,
20.213,
11.001,
7.098,
21.234], m1, m2, m3, m4, skew, k);
This will prints skew equal to -0.200371489809269.
Minitab prints the value -0.24
SigmaXL prints the value -0.23611
The reason is that Delphi does not not perform adjustment.
Here is my implementation which calculates adjustment:
function Skewness(const X: array of Extended; const Adjusted: Boolean): Extended;
begin
var AMean := Mean(X);
var xi_minus_mean_power_3 := 0.0;
var xi_minus_mean_power_2 := 0.0;
for var i := Low(X) to High(X) do
begin
xi_minus_mean_power_3 := xi_minus_mean_power_3 + IntPower((X[i] - AMean), 3);
xi_minus_mean_power_2 := xi_minus_mean_power_2 + IntPower((X[i] - AMean), 2);
end;
// URL : https://www.gnu.org/software/octave/doc/v4.0.1/Descriptive-Statistics.html
{ mean ((x - mean (x)).^3)
skewness (X) = ------------------------.
std (x).^3
}
var N := Length(X);
Result := xi_minus_mean_power_3 / N /
IntPower(Sqrt(1 / N * xi_minus_mean_power_2), 3);
// URL : https://www.gnu.org/software/octave/doc/v4.0.1/Descriptive-Statistics.html
{ sqrt (N*(N-1)) mean ((x - mean (x)).^3)
skewness (X, 0) = -------------- * ------------------------.
(N - 2) std (x).^3
}
if Adjusted then
Result := Result * Sqrt(N * Pred(N)) / (N - 2);
end;
The helper routine IntPower is as follows:
function IntPower(const X: Extended; const N: Integer): Extended;
/// <remarks>
/// Calculate any float to non-negative integer power. Developed by Rory Daulton and used with permission. Last modified December 1998.
/// </remarks>
function IntPow(const Base: Extended; const Exponent: LongWord): Extended;
{ Heart of Rory Daulton's IntPower: assumes valid parameters &
non-negative exponent }
{$IFDEF WIN32}
asm
fld1 // Result := 1
cmp eax, 0 // eax := Exponent
jz ##3
fld Base
jmp ##2
##1: fmul ST, ST // X := Base * Base
##2: shr eax,1
jnc ##1
fmul ST(1),ST // Result := Result * X
jnz ##1
fstp st // pop X from FPU stack
##3:
fwait
end;
{$ENDIF}
{$IFDEF WIN64}
begin
Result := Power(Base, Exponent);
end;
{$ENDIF}
begin
if N = 0 then
Result := 1
else if (X = 0) then
begin
if N < 0 then
raise EMathError.Create('Zero cannot be raised to a negative power.')
else
Result := 0
end
else if (X = 1) then
Result := 1
else if X = -1 then
begin
if Odd (N) then
Result := -1
else
Result := 1
end
else if N > 0 then
Result := IntPow (X, N)
else
begin
var P: LongWord;
if N <> Low (LongInt) then
P := Abs(N)
else
P := LongWord(High(LongInt)) + 1;
try
Result := IntPow(X, P);
except
on EMathError do
begin
Result := IntPow(1 / X, P); // try again with another method, perhaps less precise
Exit;
end;
end;
Result := 1 / Result;
end;
end;
With that function the adjusted skewness becomes the accurate -0.237611357234441 matching Matlab and Minitab.
The only explanation I found is:
https://octave.org/doc/v4.0.1/Descriptive-Statistics.html
"The adjusted skewness coefficient is obtained by replacing the sample
second and third central moments by their bias-corrected versions."
Same goes with Kurtosis:
function Kurtosis(const X: array of Extended; const Adjusted: Boolean): Extended;
begin
var AMean := Mean(X);
var xi_minus_mean_power_4 := 0.0;
var xi_minus_mean_power_2 := 0.0;
for var i := Low(X) to High(X) do
begin
xi_minus_mean_power_4 := xi_minus_mean_power_4 + IntPower((X[i] - AMean), 4);
xi_minus_mean_power_2 := xi_minus_mean_power_2 + IntPower((X[i] - AMean), 2);
end;
{ mean ((x - mean (x)).^4)
k1 = ------------------------
std (x).^4
}
var N := Length(X);
Result := xi_minus_mean_power_4 / N /
IntPower(1 / N * xi_minus_mean_power_2, 2);
{ N - 1
k0 = 3 + -------------- * ((N + 1) * k1 - 3 * (N - 1))
(N - 2)(N - 3)
}
if Adjusted then
// Mathlab, Minitab and SigmaXL do not add 3 (which is the kurtosis for normal distribution
Result := {3 + }(N - 1) / ((N - 2) * (N - 3)) * ((N + 1) * Result - 3 * (N - 1));
end;
What is the reason for such adjustments and why Delphi decided not to implement it?

Are points on max. two lines?

I have a time problem with my program. Given a set of points, it has to say whether all of those points are lying on two different lines.
I wrote code, which has points in array and removes one by one and try calculate it's vector.
But this solution is slow, because it must control all cases of lines. On input with 10,000 points it takes over 10 seconds.
Can someone please tell me if, is here better solution for this problem?
I made this code in Pascal:
uses
math;
type
TPoint = record
x, y: real;
end;
TList = array of TPoint;
function xround(value: real; places: integer): real;
var
muldiv: real;
begin
muldiv := power(10, places);
xround := round(value * muldiv) / muldiv;
end;
function samevec(A, B, C: TPoint): boolean;
var
bx, by: real; // vec A -> B
cx, cy: real; // vec A -> C
lb, lc: real; // len AB, len AC
begin
bx := B.x - A.x;
by := B.y - A.y;
cx := C.x - A.x;
cy := C.y - A.y;
lb := sqrt(bx * bx + by * by);
lc := sqrt(cx * cx + cy * cy);
// normalize
bx := xround(bx / lb, 3);
by := xround(by / lb, 3);
cx := xround(cx / lc, 3);
cy := xround(cy / lc, 3);
samevec := ((bx = cx) and (by = cy)) or ((bx = -cx) and (by = -cy));
end;
function remove(var list: TList; idx: integer): TPoint;
var
i: integer;
begin
remove.x := 0;
remove.y := 0;
if idx < length(list) then
begin
remove := list[idx];
for i := idx to length(list) - 2 do
list[i] := list[i + 1];
setlength(list, length(list) - 1);
end;
end;
var
i, j, lines: integer;
list, work: TList;
A, B: TPoint;
begin
while not eof(input) do
begin
setlength(list, length(list) + 1);
with list[length(list) - 1] do
readln(x, y);
end;
if length(list) < 3 then
begin
writeln('ne');
exit;
end;
lines := 0;
for i := 1 to length(list) - 1 do
begin
work := copy(list, 0, length(list));
lines := 1;
B := remove(work, i);
A := remove(work, 0);
for j := length(work) - 1 downto 0 do
if samevec(A, B, work[j]) then
remove(work, j);
if length(work) = 0 then
break;
lines := 2;
A := remove(work, 0);
B := remove(work, 0);
for j := length(work) - 1 downto 0 do
if samevec(A, B, work[j]) then
remove(work, j);
if length(work) = 0 then
break;
lines := 3; // or more
end;
if lines = 2 then
writeln('YES')
else
writeln('NO');
end.
Thanks, Ferko
APPENDED:
program line;
{$APPTYPE CONSOLE}
uses
math,
sysutils;
type point=record
x,y:longint;
end;
label x;
var
Points,otherPoints:array[0..200001] of point;
n,n2,i,j,k,i1,i2:longint;
function sameLine(A,B,C:point):boolean;
var
ABx,ACx,ABy,ACy,k:longint;
begin
ABx:=B.X-A.X;
ACx:=C.X-A.X;
ABy:=B.Y-A.Y;
ACy:=C.Y-A.Y;
k:=ABx*ACy-ABy*ACx;
if (k=0) then sameLine:=true
else sameLine:=false;
end;
begin
readln(n);
if (n<=4) then begin
writeln('YES');
halt;
end;
for i:=1 to n do readln(Points[i].x,Points[i].y);
for i:=1 to 5 do for j:=i+1 to 5 do for k:=j+1 to 5 do if not (sameLine(Points[i],Points[j],Points[k])) then begin
i1:=i;
i2:=j;
goto x;
end;
writeln('NO');
halt;
x:
n2:=0;
for i:=1 to n do begin
if ((i=i1) or (i=i2)) then continue;
if not sameLine(Points[i1],Points[i2],Points[i]) then begin
inc(n2,1);
otherPoints[n2]:=Points[i];
end;
end;
if (n2<=2) then begin
writeln('YES');
halt;
end;
for i:=3 to n2 do begin
if not sameLine(otherPoints[1],otherPoints[2],otherPoints[i]) then begin
writeln('NO');
halt;
end;
end;
writeln('YES');
end.
Three points A, B and C lie on the same straight line, if vectors AB and AC are collinear or anti-collinear. We can check for collinearity using cross product of vectors - it should be zero.
#LU RD already described this approach is comment, but author probably missed it.
Note that method doesn't suffer from division by zero - there is no division at all.
ABx := B.X - A.X;
ACx := C.X - A.X;
ABy := B.Y - A.Y;
ACy := C.Y - A.Y;
Cross := ABx * ACy - ABy * ACx;
// for integer coordinates
if Cross = 0 then
A,B,C are collinear
If coordinates are float, one must consider some tolerance level. Variants:
//better if available:
if Math.IsZero(Cross)
if Math.SameValue(Cross, 0)
//otherwise
if Abs(Cross) <= SomeEpsilonValue
If coordinate range is very large, numerical error might be significant, so it is worth to normalize tolerance by squared magnitude of coordinate differences:
if Math.IsZero(Cross / Max(ABx * ABx + ABy * ABy, ACx * ACx + ACy * ACy))
I guess the answer to the Q should be devided into two parts.
I. How to know that the given three points belong to the same line?
The answer to this part of the Q was given by #Lurd and then expanded by Mbo.
Let us name their solution function BelongToOneLine(Pnts: array [1..3] of TPoint): boolean; We can consider this part solved.
II. How to decrease time consumption of the algorithm or in other words: how to avoid calling BelongToOneLilne with every possible combination of points as parameters?
Here is the algorithm.
We select 5 distinct points from the task set. 5 is enough (check combination possibilities).
We find the answer to the question if there are at least three points from given five that belong to a single line.
if No - then we do not need to iterate the remaining poins - the answer is that we require more then two lines.
if Yes - (say poins Pt1, Pt2 and Pt3 belong to the same line and Pt4 and Pt5 - don't).
Then we store the points that do not belong to the line Pt1-Pt2-Pt3 from the group-of-five in a distinct array of "outsider" points (or store their indexes in the main array). It may have Length = 0 by the end of this step. This will not affect the rest of the algo.
We get the boolean result of the function BelongToOneLine([Pt1, Pt2, Pt[i]]).
if Yes - we skip the point - it belongs to the line Pt1-Pt2-Pt3.
if No - we store this point in the "outsiders" array.
We watch the length of the OutsidersArray.
if it is <= 2 then the answer to the whole Q is Yes, they do belong to 2 or less lines.
if >2 then we iterate the function BelongToOneLine([OutsiderPt1, OutsiderPt2, OutsiderPt[i]]) until High(OutsiderArray) or until when OutsiderPt[i] does not belong to OutsiderPt1-OutsiderPt2 line. All points of OutsiderArray must belong to the same line otherwise the answer to the whole Q will be negative.
Math note
Without optimization the inerations count will be n! / ((n - k)! * k!).
With the optimization it will be:
5! / ((5-3)! * 3!) + (n - 3) + P(q)outsiders * n that is about 15000 for n = 10000. Most negative count - about 20000.
And another optimization note
Replace declaration of TPoint with integer variables.
Search Results
Featured snippet from the web
For n=1: you need two lines to intersect, so the maximum number of intersections is 0. n=2: Two distinct lines will always intersect in at most one point irrespective of dimensions. ... Explanation: Each set of 2 lines can intersect at one point. Or one point is common intersection for 2 lines.

Lift UInt64 limits with strings in Delphi

I'm reaching my limit with UInt64 and I was wondering if there are functions which do simple operating options such as +/- , etc. with just strings because they can store just as much RAM as you have... (theoretically)
For example I would like to calculate
24758800785707605497982484480 + 363463464326426 and get the result as a string.
I kinda know how to solve this problems with strings using the number system 0123456789 and kinda do digit by digit and overflow the next position - which would cost a lot more power, but I wouldn't mind this issue...
I would like to have this ability to do such calculations until my RAM just blows up (which would be the real limit...)
Are there such functions which already do that?
Arbitrarily large integers are not supported at the language level in Delphi, but a bit of Googling turns up http://www.delphiforfun.org/programs/Library/big_integers.htm, which can support them as alibrary.
On super computers, its called BCD math (Binary Coded Decimals) and each half-byte of RAM represents a decimal digit [0..9] - not an efficient use of RAM, but huge computations take minimal time (i.e. about 3 mSecs to multiply 2 million digit numbers. A BCD Emulator on a fast PC takes 5 or 6 minutes.
I never need to add big numbers, but I do multiply. Actually I call this routine iteratively to compute for example, 1000000 factorial (a 5,565,709 million digit answer. Str6Product refers to how it chops up a pair of string numbers. s1 and s2 have a practical length limit of about 2^31. The function is limited by what a "string can hold". Whatever that limit is, I've never gotten there.
//==============================================================================
function Str6Product(s1: string; s2: string): string; // 6-13 5:15 PM
var
so,snxt6 : string;
z1,z3, i, j, k : Cardinal; // Cardinal is 32-bit unsigned
x1,x3,xm : Cardinal;
countr : Cardinal;
a1, a2, a3 : array of Int64;
inum, icarry : uInt64; // uInt64 is 64-bit signed
begin
s1 := '00000'+s1;
s2 := '00000'+s2;
z1 := length(s1); // set size of Cardinal arrays
z3 := z1 div 6;
x1 := length(s2); // set size of Cardinal arrays
x3 := x1 div 6;
xm := max(x3,z3);
SetLength(a1,xm+1);
SetLength(a2,xm+1);
// try to keep s1 and s2 about the
// same length for best performance
for i := 1 to xm do begin // from rt 2 lft - fill arrays
// with 4-byte integers
if i <= z3 then a1[i] := StrToInt(copy (s1, z1-i*6+1, 6));
if i <= x3 then a2[i] := StrToInt(copy (s2, x1-i*6+1, 6));
if i > z3 then a1[i] := 0;
if i > x3 then a2[i] := 0;
end;
k := max(xm-x3, xm-z3); // k prevents leading zeroes
SetLength(a3,xm+xm+1);
icarry := 0; countr := 0;
icMax := 0; inMax := 0;
for i := 1 to xm do begin // begin 33 lines of "string mult" engine
inum := 0;
for j := 1 to i do
inum := inum + (a1[i-j+1] * a2[j]);
icarry := icarry + inum;
if icMax < icarry then icMax := icarry;
if inMax < inum then inMax := inum;
inum := icarry mod 1000000;
icarry := icarry div 1000000;
countr := countr + 1;
a3[countr] := inum;
end;
if xm > 1 then begin
for i := xm downto k+1 do begin // k or 2
inum := 0;
for j := 2 to i do
inum := inum + (a1[xm+j-i] * a2[xm-j+2]);
icarry := icarry + inum;
if icMax < icarry then icMax := icarry;
if inMax < inum then inMax := inum;
inum := icarry mod 1000000;
icarry := icarry div 1000000;
countr := countr + 1;
a3[countr] := inum;
end;
end;
if icarry >= 1 then begin
countr := countr + 1;
a3[countr] := icarry;
end;
so := IntToStr(a3[countr]);
for i := countr-1 downto 1 do begin
snxt6 := IntToStr(a3[i]+1000000);
so := so+ snxt6[2]+ snxt6[3]+ snxt6[4]+ snxt6[5]+ snxt6[6]+ snxt6[7];
end;
while so[1] = '0' do // leading zeroes may exist
so := copy(so,2,length(so));
result := so;
end;
//==============================================================================
Test call:
StrText := Str6Product ('742136061320987817587158718975871','623450632948509826743508972875');
I should have added that you should be able to add large numbers using the same methodology - From right to left, fragment the strings into 16 byte chunks then convert those chunks to uInt64 variables. Add the least significant digits first and if it produces a 17th byte, carry that over to the 2nd least significant chunk, add those two PLUS any carry over etc. When otherwise done, convert each 16-byte chunk back to string and concatenate accordingly.
The conversions to and from integer to string and vice-versa is a pain, but necessary for big number arithmetic.

Delphi - Lenze Standard addressing - Code number conversion

From Lenze manual
Code number (C1, C2)
Standard addressing
The meaning of the code numbers and the assigned parameters can be obtained from
the code table (see chapter 8.2). When transmitting data, the code number are
coded as follows:
The following calculation determines the two ASCII digits from the code number
(value range: 0..6229) (value range: 48dec 127dec):
C1 = INTEGER((REMAINDER(code number/790))/10) + 48dec
C2 = REMAINDER(REMAINDER(code number/790)/10) +
INTEGER(code number/790) x 10 + 48dec
Procedure for calculating C1 and C2 from codenumber.
procedure pCodeNumberToC1C2(CodeNumber: Word; var C1, C2: Byte);
begin
C1 := Byte((CodeNumber mod 790) div 10) + 48;
C2 := ((CodeNumber mod 790) mod 10) + 48 + 10 * Byte(CodeNumber div 790);
end;
But, how to calculate it the other way without the aweful:
function fC1C2ToCodeNumber(iC1, iC2: Byte): Word;
var
C1, C2: Byte;
i: Integer;
Begin
Result := 0;
For i := 0 to 6229 Do Begin
pCodeNumberToC1C2(i, C1, C2);
if (C1 = iC1) and (C2 = iC2) Then Result := i;
End;
Result := cn;
End;
Let's
N = p * 790 + q
then
c1 = 48 + q div 10
c2 = 48 + q mod 10 + 10 * p
so
p = (c2-48) div 10
q = (c2-48) mod 10 + (c1-48) * 10
test:
var
c1, c2: Byte;
n, p, q, t: Word;
begin
for t := 0 to 6229 do begin
n := t;
pCodeNumberToC1C2(n, c1, c2);
p := (c2-48) div 10;
q := (c2-48) mod 10 + (c1-48) * 10;
n := 790*p+q;
if n <> t then
Memo1.Lines.Add('Failed at ' + IntToStr(t))
end;
Final:
function C1C2ToCodeNumber(C1, C2: Byte): Word;
begin
Result := ((C2 - 48) div 10) * 790 + ((C2 - 48) mod 10 + (C1 - 48) * 10);
end;
As an alternative to arithmetic you could consider a lookup table. At the cost of memory, this gives you better performance. The code looks like this:
const
CodeNumberTable: array [48..126, 48..127] of Word = (
.... code removed because of Sack Overflow post size limitation
);
const
MinC1 = low(CodeNumberTable);
MinC2 = high(CodeNumberTable);
MaxC1 = low(CodeNumberTable[MinC1]);
MaxC2 = high(CodeNumberTable[MinC1]);
type
EInvalidParameters = class(Exception);
function fC1C2ToCodeNumber(iC1, iC2: Byte): Word;
begin
if not InRange(iC1, MinC1, MaxC1) then
raise EInvalidParameters.CreateFmt(
'iC1 (%d) must be in the range %d to %d',
[iC1, MinC1, MaxC1]
);
if not InRange(iC2, MinC2, MaxC2) then
raise EInvalidParameters.CreateFmt(
'iC2 (%d) must be in the range %d to %d',
[iC2, MinC2, MaxC2]
);
Result := CodeNumberTable[iC1, iC2];
if Result=high(Word) then
raise EInvalidParameters.CreateFmt(
'CodeNumber not defined for iC1=%d, ic2=%d',
[iC1, iC2]
);
end;
I can supply the table via paste bin if you are interested.

Fast linear list that excludes duplicates

I have the following code:
procedure TCellBlock.GeneratePtoQ;
var
x,y: integer;
i: integer;
Change: cardinal;
begin
i:= 0;
//Walk the grid of changed (alive) cells
for x:= GridMaxX downto 1 do begin
for y:= GridMaxY downto 1 do begin
if Active[cIndexP][x, y] then begin
Active[cIndexP][x,y]:= false;
//Put active items on the stack.
ToDo[i]:= x shl 16 or y;
Inc(i);
end; {if}
end; {for y}
end; {for x}
while i > 0 do begin
Dec(i);
y:= ToDo[i] and $FFFF;
x:= ToDo[i] shr 16;
//Calculate the cell, Change = (oldval XOR newval)
Change:= Grid[x,y].GeneratePtoQ;
//Mark the cells in the grid that need to be recalculated next generation.
Active[cIndexQ][x,y]:= Active[cIndexQ][x,y] or (Change <> 0);
Active[cIndexQ][x+1,y+1]:= Active[cIndexQ][x+1,y+1] or ((Change and $cc000000) <> 0);
Active[cIndexQ][x+1,y]:= Active[cIndexQ][x+1,y] or ((Change and $ff000000) <> 0);
Active[cIndexQ][x,y+1]:= Active[cIndexQ][x,y+1] or ((Change and $cccccccc) <> 0);
end; {while}
end;
The above is a code snippet of a test program that calculates conway's game of life.
The code needs to be as fast as possible. And for this purpose I'm trying different approaches.
It walks though a grid of active cells, looks to see which cells are active and puts those
on a stack.
Next it processes the items on the stack and sees which cells have changed.
If a cell has changed it updates the changes into the grid for the next generation.
I store cells in 32bit cardinals (4 bits Y, 8 bits X) and the P (even) generations are offset 1,1 pixel relative to the Q (odd) generations, this way I only have to take 3 neighbors into account instead of 8.
Question
I want to get rid of the grid, I just want to deal with the stack.
How do I implement a stack that eliminates duplicates?
Note that it needs to be as fast as possible and I'm not above using dirty tricks to get that.
if i understood what you asked you want the stack to have no duplication values. i'm not a delphi person but if it was java i would created a hashmap/ map tree and add each value to the map and before adding it to the stack check if it's already in the hash. you can also add all the values th the hash iterate it but you will loose the order of the hash.
Personally I'd take a completely different approach. First I don't see how you don't have to take all neighbours into account just because of using a 1,1 offset and then I doubt that bitshifting tricks make the algorithm much faster (often enough it's the contrary, but then it could be mem bandwidth constrained in which case we'd win a bit)
So I'd just go for the one thing that should bring by far the largest performance gain: Making the algorithm multithreaded. In our world of Quad/Hex/Octacores worrying about a few percent performance increases while wasting 300% or more seems silly. So if we'd ignore the active grids and check all fields the algorithm would be trivial with some great scaling, especially since one could easily vectorize the algorithm, but then that's not especially work efficient so I'd try some different approaches towards multithreading an algorithm that only takes the active cells in account.
First instead of getting rid of the grid I'd double it: One src and one dest grid - that are swapped each round. No locking to access the grid necessary, don't have to worry about when updating the fields and no stale entries (important for multithreading we want to use the cache after all).
Now the simplest solution would be to use some kind of concurrent list structure (no idea about delphi libraries) for the active cells and let each thread steal from it and add new active cells to another. With a good lock-free implementation of a concurrent queue (basically whatever the replacement of this is in delphi) or something similar could be quite nice and simple. For a better performance instead of adding single nodes to the list, I'd think about adding whole chunks to the list, say in sizes of 10 or so - more work with less overhead but if we make the chunks too large we lose parallelism.
I can think of other solutions like giving every thread one list of active cells to work through (or more exactly one list for all and different offsets) but then we have to between each run gather all new entries (not much synchronization overhead but some copying) into a list - worth a try I assume.
If your goal is speed (and only speed). There is a few tricks that can speed things up a LOT. My own implementation of the Conway's Game of Life use those tricks to make it faster. Note that it is VERY expensive on memory.
Each cells are an object
Each cell object contains its X/Y coordinates
Each cell object contains a "live" counters of the number of Alive neighbors. (When a cell turns On/Off, it notify it's neighbor so they update their counters.
To make #3 works, when the next generation is calculated, cells are not turned On/Off right away. They are instead stacked into a list until all cells are calculated.
Each cell has a counter which indicate which is the last generation they changed on. That avoid calculating the same cell twice. (My alternative to the stack that eliminates duplicates)
The list of #5 is reused on the next generation, as only the neighbors of a cell that changed on the previous generation can change on the current one.
There are some of the tricks I use to speed up the generation. Some of the tricks listed here will get you a lot more than multithreading your implementation. But using both those and multithread will get the most performance possible.
As for the multithread subject, read Voo's entry.
I've been thinking about it and I think I have a solution.
some background
Here's how the data is in laid out in memory
00 A 08 B 10 18 The bits of Individual int32's are layout like this:
01 | 09 | 11 19 00 04 08 0C 10 14 18 1C // N-Mask: $33333333
02 | 0A | 12 1A 01 05 09 0D 11 15 19 1D // S-Mask: $cccccccc
03 | 0B | 13 1B 02 06 0A 0E 12 16 1A 1E // W-Mask: $000000ff
04 | 0C | 14 1C 03 07 0B 0F 13 17 1B 1F // E-Mask: $ff000000
05 | 0D | 15 1D //SE-Mask: $cc000000
06 | 0E | 16 1E //NW-Mask: $00000033
07 V 0F V 17 1F I can mask of different portions if need be.
-- Figure A: Grid -- -- Figure B: cell -- -- Table C: masks --
I haven't decided on the size of the building block, but this is the general idea.
Even generations are called P, odd generations are called Q.
They are staggered like this
+----------------+<<<<<<<< P 00 04 08 0C //I use a 64K lookup
|+---------------|+ 01 05* 09* 0D //table to lookup
|| || 02 06* 0A* 0E //the inner* 2x2 bits from
|| || 03 07 0B 0F //a 4x4 grid.
+----------------+| //I need to do 8 lookups for a 32 bit cell
+----------------+<<<<<<<< Q
- Figure D: Cells are staggered - -- Figure E: lookup --
This way when generating P -> Q, I only need to look at P itself and its S, SE, E neighbors, instead of all 8 neighbors, ditto for Q -> P. I need only look at Q itself and its N, NW and W neighbors.
Also notice that the staggering saves me time in translating the result of the lookup, because I have to do less bit shifting to put the results in place.
When I loop though a grid (Figure A) I walk though the cells (Figure B) in the order shown in figure A. Always in strictly increasing order in a P-cycle and always in decreasing order in a Q-cycle.
In fact the Q cycle works in exactly the opposite order from the P-cycle, this speeds things up by reusing the cache as much as possible.
I want to minimize using pointers as much as possible, because pointers cannot be predicted and are not accessed sequentially (they jump all over the place) So I want to use arrays, stacks and queues as much as possible.
What data do to need to keep track of
I need to keep track of only the cells that change. If a cell (that is an int32) does not change from one generation to the next I remove it from consideration.
This is what the code in the question does. It uses a grid to keep track of the changes, but I want to use a stack, not a grid; and I only want to deal with active cells I don't want to know about stable or dead cells.
Some background on the data
Notice how the cell itself is always monotonically increasing. As is its S-neighbor, as well as the E and SE-neighbor. I can use this info to cheat.
The solution
I use a stack to keep track of the cell itself and its S neighbor and a queue to keep track of its E and SE neighbor and when I'm done I merge the two.
Suppose in the Grid the following cells come out as active after I've calculated them:
00, 01, 08 and 15
I make the following two stacks:
stack A stack B
00 08 a) -A: Cell 00 itself in stack A and its E-neighbor in B
01 09 a) Cell 00's S neighbor in stack A and its SE-n'bor in B
02 0A b) -B: Cell 01 is already in the stack, we only add S/SE
08 10 c) -C: Cell 08 goes into the stack as normal
09 11 c) We'll sort out the merge later.
15 1D d) -D: Cell 15 and neighbors go on as usual.
16 1E d)
Now I push members from stack A and B onto a new stack C so that stack C has
no duplicates and it strictly increasing:
Here's the pseudo code to process the two queues:
a:= 0; b:= 0; c:=0;
while not done do begin
if stack[a] <= stack[b] then begin
stack[c]:= stack[a]; inc(a); inc(c);
if stack[a] = stack[b] then inc(b);
end
else begin
stack[c]:= stack[b]; inc(b); inc(c);
end;
end; {while}
And even better
I don't have to actually do the two stacks and the merging as two separate steps, if I make A a stack and B a queue, I can do the second step described in the pseudo code and the building of the two stacks in one pass.
Note
As a cell changes its S, E or SE border does not necessary need to change, but I can test for that using the masks in table C, and only add the cells that really need checking in the next generation to the list.
Benefits
Using this scheme, I only ever have to walk through one stack with active cells when calculating cells, so I don't waste time looking at dead or inactive cells.
I only do sequential memory accesses, maximizing cache usage.
Building the stack with new changes for the next generations only requires one extra temporary queue, which I process in strictly sequential order.
I do no sorting and the minimum of comparisons.
I don't have to keep track of the neighbors of each individual cells (int32), I only need to keep track of the neighbors (S,E,SE, N,W,NW) of the grids, this keeps the memory overhead to a minimum.
I don't need to keep track of a cells status, I only need to count dead cells (A cell is either dead, because it was dead before, or because it changed into dead. All the active cells are in my TODO stack, this saves bookkeeping time and memory space.
The algorithm runs in o(n) time where (n) is the number of active cells, it excludes dead cells, stable cells and cells that oscillate with period 2.
I only ever deal with 32 bit cardinals, which is the much faster than using int16's.
Mostly #Ken, the complete sourcecode for the test program:
Note that 99,9% of the time is spend in displaying, because I haven't done anything to
optimize that.
I've created a new SDI-main app and posted the code in that and because I'm lazy I haven't bothered to rename or repaint any controls.
Project file: sdiapp.dpr
program Sdiapp;
uses
Forms,
SDIMAIN in 'SDIMAIN.pas'; {Form1}
{$R *.RES}
begin
Application.Initialize;
Application.CreateForm(TForm1, Form1);
Application.Run;
end.
Main form: sdimain.pas
unit SDIMAIN;
interface
uses Windows, Classes, Graphics, Forms, Controls, Menus,
Dialogs, StdCtrls, Buttons, ExtCtrls, ComCtrls, ImgList, StdActns,
ActnList, ToolWin;
{--------------------------------------------
p and q are bit arrays of 16x16 bits, grouped
as in 8 int32's as follows
P00 P04 P08 P0c P10 P14 P18 P1c
P01 P05 P09 P0d P11 P15 P19 P1d
P02 P06 P0a P0e P12 P16 P1a P1e
P03 P07 P0b P0f P13 P17 P1b P1f
|
+----> The bits per int32 are grouped as follows
The int32's are grouped as follows
P0 P1
P2 P3
P4 P5
P6 P7
P and Q are staggered as follows:
+---------------------------------+ <---- P
| +-------------------------------|-+ <----Q
| | | |
| | | |
... ...
| | | |
+-|-------------------------------+ |
+---------------------------------+
Generations start counting from 0,
all even generations are stored in P.
all odd generations are stored in Q.
When generating P->Q, the S, SE and E neighbors are checked.
When generating Q->P, the N, NW and W neighbors are checked.
The westernmost P edge in a grid is stored inside that grid.
Ditto for all easternmost Q edges.
--------------------------------------------}
const
cClearQState = $fffffff0;
cClearPState = $fffff0ff;
cIndexQ = 1;
cIndexP = 0;
ChangeSelf = 0;
ChangeNW = 1;
ChangeW = 2;
ChangeN = 3;
ChangeSE = 1;
ChangeE = 2;
ChangeS = 3;
const
//A Grid is 128 x 128 pixels.
GridSizeX = 512 div 8; //should be 128/8, 1024 for testing.
GridSizeY = GridSizeX * 2; //32 totaal: 16x32x4bytes = 2048 x 2 (p+q) = 4k per block.
GridMaxX = GridSizeX - 1;
GridMaxY = GridSizeY - 1;
NumberOfCells = GridSizeX * GridSizeY;
CellSizeX = 8;
CellSizeY = 4;
CellMaxX = CellSizeX - 1;
CellMaxY = CellSizeY - 1;
type
TUnit = Cardinal;
TBytes = array[0..3] of byte;
TChange = array[0..3] of boolean;
type
TCellBlock = class;
TFlags = record
case boolean of
true: (whole: cardinal);
false: (part: array[0..3] of byte);
end;
//TActiveList = array[0..GridMaxX, 0..GridMaxY] of boolean;
//TActive = array[0..1] of TActiveList;
TToDoList = array[-1..NumberOfCells] of cardinal; //Padding on both sides.
TNewRow = TFlags;
PCell = ^TCell;
TCell = record
public
p: TUnit;
q: TUnit;
procedure SetPixel(x,y: integer; InP: Boolean = true);
function GeneratePtoQ: cardinal; inline;
function GenerateQtoP: cardinal; inline;
end;
//A grid contains pointers to an other grid, a unit or nil.
//A grid can contain grids (and nils) or units (and nils), but not both.
PGrid = ^TGrid;
TGrid = array[0..GridMaxX,0..GridMaxY] of TCell;
TCellBlock = class(TPersistent)
private
FHasCells: boolean;
FLevel: integer;
FGrid: TGrid;
ToDoP: TToDoList;
ToDoQ: TToDoList;
PCount: integer;
QCount: integer;
FParent: TCellBlock;
FMyX,FMyY: integer;
N,W,NW: TCellBlock;
S,E,SE: TCellBlock;
procedure GeneratePtoQ; virtual;
procedure GenerateQtoP; virtual;
procedure UpdateFlagsPtoQ; virtual;
procedure UpdateFlagsQtoP; virtual;
procedure Generate; virtual;
procedure Display(ACanvas: TCanvas); virtual;
procedure SetPixel(x,y: integer);
property Grid: TGrid read FGrid write FGrid;
public
constructor Create(AParent: TCellBlock);
destructor Destroy; override;
property Parent: TCellBlock read FParent;
property HasCells: boolean read FHasCells;
property Level: integer read FLevel;
property MyX: integer read FMyX;
property MyY: integer read FMyY;
end;
TCellParent = class(TCellBlock)
private
procedure GeneratePtoQ; override;
procedure GenerateQtoP; override;
//procedure Display(Startx,StartY: integer; ACanvas: TCanvas); override;
public
constructor CreateFromChild(AChild: TCellBlock; ChildX, ChildY: integer);
constructor CreateFromParent(AParent: TCellParent);
destructor Destroy; override;
end;
type
TForm1 = class(TForm)
ToolBar1: TToolBar;
ToolButton9: TToolButton;
ToolButton1: TToolButton;
ToolButton2: TToolButton;
ToolButton3: TToolButton;
ToolButton4: TToolButton;
ActionList1: TActionList;
FileNew1: TAction;
FileOpen1: TAction;
FileSave1: TAction;
FileSaveAs1: TAction;
FileExit1: TAction;
EditCut1: TEditCut;
EditCopy1: TEditCopy;
EditPaste1: TEditPaste;
HelpAbout1: TAction;
StatusBar: TStatusBar;
ImageList1: TImageList;
Image1: TImage;
Timer1: TTimer;
Label1: TLabel;
procedure FileNew1Execute(Sender: TObject);
procedure FileSave1Execute(Sender: TObject);
procedure FileExit1Execute(Sender: TObject);
procedure Timer1Timer(Sender: TObject);
procedure FileOpen1Execute(Sender: TObject);
procedure ToolButton4Click(Sender: TObject);
private
MyBlock: TCellBlock;
MyBitmap: TBitmap;
BitmapData: array[0..1024,0..(1024 div 32)] of integer;
procedure InitLookupTable;
procedure RestartScreen;
public
{ Public declarations }
end;
var
Form1: TForm1;
const
cLiveCell = $88888888;
cLiveVerticalP = $40404040;
cLiveVerticalQ = $04040404;
cLiveTop = $00000088;
cLiveBottom = $88000000;
cLivePCorner = $00000040;
cLiveQCorner = $04000000;
cUnstableCell = $22222222;
cUnstableVerticalP = $10101010;
cUnstableVerticalQ = $01010101;
cUnstableTop = $00000022;
cUnstableBottom = $22000000;
cUnstablePCorner = $00000010;
cUnstableQCorner = $01000000;
cAllDead = $00000000;
cAllLive = $ffffffff;
cLiveRow = $8;
cLive2x2 = $4;
cUnstableRow = $2;
cUnstable8x4 = $22;
cUnstable2x2 = $1;
cUnstable2x4 = $11;
cStateMask: array [0..7] of cardinal =
($fffffff0, $ffffff0f, $fffff0ff, $ffff0fff, $fff0ffff, $ff0fffff, $f0ffffff, $0fffffff);
var
LookupTable: array[0..$FFFF] of byte;
Generation: int64;
implementation
uses about, sysutils, clipbrd, Math;
{$R *.dfm}
type
bool = longbool;
procedure getCPUticks(var i : int64);
begin
asm
mov ECX,i;
RDTSC; //cpu clock in EAX,EDX
mov [ECX],EAX;
mov [ECX+4],EDX;
end;
end;
function IntToBin(AInt: integer): string;
var
i: integer;
begin
i:= SizeOf(AInt)*8;
Result:= StringOfChar('0',i);
while (i > 0) do begin
if Odd(AInt) then Result[i]:= '1';
AInt:= AInt shr 1;
Dec(i);
end; {while}
end;
constructor TCellBlock.Create(AParent: TCellBlock);
begin
inherited Create;
FParent:= AParent;
ToDoQ[-1]:= $ffffffff;
ToDoP[-1]:= $ffffffff;
end;
destructor TCellBlock.Destroy;
begin
inherited Destroy;
end;
procedure TCell.SetPixel(x: Integer; y: Integer; InP: Boolean = true);
var
Mask: cardinal;
Offset: Integer;
begin
//0,0 is the topleft pixel, no correction for p,q fase.
x:= x mod 8;
y:= y mod 4;
Offset:= x * 4 + y;
Mask:= 1 shl Offset;
if (InP) then p:= p or Mask else q:= q or Mask;
end;
procedure TCellBlock.SetPixel(x: Integer; y: Integer);
var
GridX, GridY: integer;
x1,y1: integer;
i: integer;
begin
x:= x + (GridSizeX div 2) * CellSizeX;
y:= y + (GridSizeY div 2) * CellSizeY;
if Odd(Generation) then begin
Dec(x); Dec(y);
QCount:= 0;
end
else PCount:= 0;
GridX:= x div CellSizeX;
GridY:= y div CellSizeY;
if (GridX in [0..GridMaxX]) and (GridY in [0..GridMaxY]) then begin
Grid[GridX,GridY].SetPixel(x,y);
i:= 0;
for x1:= 1 to GridMaxX-1 do begin
for y1:= 1 to GridMaxY-1 do begin
case Odd(Generation) of
false: begin
ToDoP[i]:= (x1 shl 16 or y1);
Inc(PCount);
end;
true: begin
ToDoQ[i]:= (x1 shl 16 or y1);
Inc(QCount);
end;
end; {case}
Inc(i);
end; {for y}
end; {for x}
end; {if}
end;
//GeneratePtoQ
//This procedure generates the Q data and QState-flags
//using the P-data and PState-flags.
procedure TCellBlock.Generate;
begin
if Odd(Generation) then GenerateQtoP
else GeneratePtoQ;
Inc(Generation);
end;
const
MaskS = $cccccccc;
MaskE = $ff000000;
MaskSE = $cc000000;
procedure TCellBlock.GeneratePtoQ;
var
x,y: integer;
i: integer;
Change: cardinal;
ToDoA: TToDoList;
ToDoB: TToDoList;
A, B: integer;
done: boolean;
Address: cardinal;
begin
i:= 0;
A:= 0; B:= 0;
ToDoA[-1]:= $ffffffff;
ToDoB[-1]:= $ffffffff;
while (i < PCount) do begin
y:= ToDoP[i] and $FFFF;
x:= ToDoP[i] shr 16;
Inc(i);
if (x = GridMaxX) or (y = GridMaxY) then continue; //Skip the loop.
Change:= Grid[x,y].GeneratePtoQ;
if (Change <> 0) then begin
Address:= (x shl 16 or y);
if ToDoA[A-1] <> Address then begin
ToDoA[A]:= Address; Inc(A);
end;
if (Change and MaskS) <> 0 then begin
ToDoA[A]:= Address + 1;
Inc(A);
end; {if S changed}
if ((Change and MaskE) <> 0) then begin
Address:= Address + (1 shl 16);
if ToDoB[B-1] <> Address then begin
ToDoB[B]:= Address;
Inc(B);
end;
if ((Change and MaskSE) <> 0) then begin
ToDoB[B]:= Address + 1;
Inc(B);
end; {if SE changed}
end; {if E changed}
end; {if whole cell changed}
end; {while}
ToDoA[A]:= $ffffffff;
ToDoB[B]:= $ffffffff;
ToDoB[B+1]:= $ffffffff;
a:= 0; b:= 0; QCount:= 0;
Done:= (ToDoA[a] = $ffffffff) and (ToDoB[b] = $ffffffff);
while not done do begin
if ToDoA[a] <= ToDoB[b] then begin
ToDoQ[QCount]:= ToDoA[a]; inc(a); inc(QCount);
if ToDoA[a] = ToDoB[b] then inc(b);
end
else begin
ToDoQ[QCount]:= ToDoB[b]; inc(b); inc(QCount);
end;
Done:= (ToDoA[a] = $ffffffff) and (ToDoB[b] = $ffffffff);
end; {while}
end;
const
MaskN = $33333333;
MaskW = $000000ff;
MaskNW = $00000033;
procedure TCellBlock.GenerateQtoP;
var
x,y: integer;
i: integer;
Change: cardinal;
ToDoA: TToDoList;
ToDoB: TToDoList;
A, B: integer;
done: boolean;
Address: cardinal;
begin
i:= 0;
A:= 0; B:= 0;
ToDoA[-1]:= $ffffffff;
ToDoB[-1]:= $ffffffff;
while (i < QCount) do begin
y:= ToDoQ[i] and $FFFF;
x:= ToDoQ[i] shr 16;
Inc(i);
if (x = 0) or (y = 0) then Continue; //Skip the rest of the loop.
Change:= Grid[x,y].GenerateQtoP;
if (Change <> 0) then begin
Address:= (x shl 16 or y);
if ToDoA[A-1] <> Address then begin
ToDoA[A]:= Address; Inc(A);
end;
if (Change and MaskN) <> 0 then begin
ToDoA[A]:= Address - 1;
Inc(A);
end; {if N changed}
if ((Change and MaskW) <> 0) then begin
Address:= Address - (1 shl 16);
if ToDoB[B-1] <> Address then begin
ToDoB[B]:= Address;
Inc(B);
end;
if ((Change and MaskNW) <> 0) then begin
ToDoB[B]:= Address - 1;
Inc(B);
end; {if NW changed}
end; {if W changed}
end; {if whole cell changed}
end; {while}
ToDoA[A]:= $ffffffff;
ToDoB[B]:= $ffffffff;
ToDoB[B+1]:= $ffffffff;
a:= 0; b:= 0; PCount:= 0;
Done:= (ToDoA[a] = $ffffffff) and (ToDoB[b] = $ffffffff);
while not done do begin
if ToDoA[a] <= ToDoB[b] then begin
ToDoP[PCount]:= ToDoA[a]; inc(a); inc(PCount);
if ToDoA[a] = ToDoB[b] then inc(b);
end
else begin
ToDoP[PCount]:= ToDoB[b]; inc(b); inc(PCount);
end;
Done:= (ToDoA[a] = $ffffffff) and (ToDoB[b] = $ffffffff);
end; {while}
end;
(*
procedure TCellBlock.GenerateQtoP;
var
x,y: integer;
i: integer;
Change: cardinal;
begin
i:= 0;
for x:= 0 to GridMaxX - 1 do begin
for y:= 0 to GridMaxY -1 do begin
if Active[cIndexQ][x, y] then begin
Active[cIndexQ][x, y]:= false;
ToDo[i]:= x shl 16 or y;
Inc(i);
end; {if}
end; {for y}
end; {for x}
while i > 0 do begin
Dec(i);
y:= ToDo[i] and $FFFF;
x:= ToDo[i] shr 16;
Change:= Grid[x,y].GenerateQtoP;
Active[cIndexP][x,y]:= Active[cIndexP][x,y] or (Change <> 0);
Active[cIndexP][x-1,y-1]:= Active[cIndexP][x-1,y-1] or ((Change and $00000033) <> 0);
Active[cIndexP][x-1,y]:= Active[cIndexP][x-1,y] or ((Change and $000000ff) <> 0);
Active[cIndexP][x,y-1]:= Active[cIndexP][x,y-1] or ((Change and $33333333) <> 0);
end; {while}
end; (**)
procedure TCellBlock.UpdateFlagsPtoQ;
begin
//nog in te vullen.
end;
procedure TCellBlock.UpdateFlagsQtoP;
begin
//nog in te vullen
end;
function TCell.GeneratePtoQ: cardinal;
var
NewQ: cardinal;
Change: cardinal;
const
Mask1 = $f;
Mask2 = $ff;
Mask4 = $ffff;
Row1Mask = $33333333; //0011-0011-0011-0011-0011-0011-0011-0011
Row2Mask = $cccccccc; //1100-1100-1100-1100-1100-1100-1100-1100
function MakeNewBrick(p0,p1,p2,p3: cardinal): cardinal; inline;
var
Row1, Row2: cardinal;
begin
//Generate new Brick using a 2x2 grid of bricks ordered like:
//p0 p1
//p2 p3
//First row inside P0
if (p0 <> 0) then Row1:=
LookupTable[p0 and $ffff] or
LookupTable[(p0 shr 8) and $ffff] shl 8 or
LookupTable[(p0 shr 16)] shl 16 or
LookupTable[(p0 shr 24) or (p1 and $ff) shl 8] shl 24
else Row1:= LookupTable[(p1 and $ff) shl 8] shl 24;
(**)
p0:= ((p0 and $cccccccc)) or ((p2 and $33333333));
p1:= ((p1 and $cc)) or ((p3 and $33));
if (p0 <> 0) then Row2:=
LookupTable[p0 and $ffff] or
LookupTable[(p0 shr 8) and $ffff] shl 8 or
LookupTable[(p0 shr 16)] shl 16 or
LookupTable[(p0 shr 24) or ((p1 and $ff) shl 8)] shl 24
else Row2:= LookupTable[(p1 and $ff) shl 8] shl 24;
Result:= (Row1 and Row1Mask) or (Row2 and Row2Mask);
end;
begin
NewQ:= MakeNewBrick(Self.p, PGrid(#Self)^[1,0].p, PGrid(#Self)^[0,1].p, PGrid(#Self)^[1,1].p);
Result:= NewQ xor q;
q:= NewQ;
end;
function TCell.GenerateQtoP: cardinal;
var
Offset: integer;
NewP: cardinal;
Change: cardinal;
const
Row1Mask = $33333333; //0011-0011-0011-0011-0011-0011-0011-0011
Row2Mask = $cccccccc; //1100-1100-1100-1100-1100-1100-1100-1100
function MakeNewBrick(q0,q1,q2,q3: cardinal): cardinal; inline;
var
Row1, Row2: cardinal;
begin
//Generate new Brick using a 2x2 grid of bricks ordered like:
//q3 q2
//q1 q0
if (q0 <> 0) then Row1:=
LookupTable[(q0 shr 16)] shl 26 or
LookupTable[(q0 shr 8 ) and $ffff] shl 18 or
LookupTable[(q0 ) and $ffff] shl 10 or
LookupTable[((q0 and $ff) shl 8) or (q1 shr 24)] shl 2
else Row1:= LookupTable[(q1 shr 24)] shl 2;
(*
q0:= ((q0 and $33333333) shl 2) or ((q2 and $cccccccc) shr 2);
q1:= ((q1 and $33000000) shl 2) or ((q3 and $cc000000) shr 2);
if (q0 <> 0) then Row2:=
LookupTable[(q0 shr 16) and $ffff] shl 24 or
LookupTable[(q0 shr 8) and $ffff] shl 16 or
LookupTable[(q0 ) and $ffff] shl 8 or
LookupTable[((q0 and $ff) shl 8) or (q1 shr 24)]
else Row2:= LookupTable[(q1 shr 24)];
(**)
q0:= ((q0 and $33333333)) or ((q2 and $cccccccc));
q1:= ((q1 and $33000000)) or ((q3 and $cc000000));
if (q0 <> 0) then Row2:=
LookupTable[(q0 shr 16)] shl 22 or
LookupTable[(q0 shr 8) and $ffff] shl 14 or
LookupTable[(q0 ) and $ffff] shl 6 or
LookupTable[((q0 and $ff) shl 8) or (q1 shr 24)] shr 2
else Row2:= LookupTable[(q1 shr 24)] shr 2;
Result:= (Row1 and Row2Mask) or (Row2 and Row1Mask);
end;
begin
Offset:= -1;
NewP:= MakeNewBrick(Self.q, PGrid(#Self)^[Offset,0].q, PGrid(#Self)^[0,Offset].q, PGrid(#Self)^[Offset, Offset].q);
Result:= NewP xor P;
P:= NewP;
end;
procedure TCellBlock.Display(ACanvas: TCanvas);
var
GridX,GridY: integer;
//Offset: integer;
procedure DisplayCell(ACell: TCell);
var
x,y,x1,y1: integer;
Row, Mask: integer;
DoPixel: boolean;
Offset: integer;
DrawOffset: integer;
InP: boolean;
begin
DrawOffset:= (Generation and 1);
InP:= not(Odd(Generation));
for y:= 0 to CellMaxY do begin
for x:= 0 to CellMaxX do begin
//if (x = 0) or (y = 0) then ACanvas.Pixels[GridX*16+x+Offset,GridY*16+y+Offset]:= clBtnFace;
//0,0 is the topleft pixel, no correction for p,q fase.
x1:= x mod 8;
y1:= y mod 4;
Offset:= x1 * 4 + y1;
Mask:= 1 shl Offset;
if (InP) then DoPixel:= (ACell.p and Mask) <> 0
else DoPixel:= (ACell.q and Mask) <> 0;
if DoPixel then ACanvas.Pixels[GridX*CellSizeX+x+DrawOffset, GridY*CellSizeY+y+DrawOffset]:= clBlack;
end; {for x}
end; {for y}
end; (**)
begin
ACanvas.Rectangle(-1,-1,1000,1000);
FillChar(Form1.BitmapData, SizeOf(Form1.BitmapData), #0);
for GridY:= 0 to GridMaxY do begin
for GridX:= 0 to GridMaxX do begin
if Int64(Grid[GridX, GridY]) <> 0 then begin
DisplayCell(Grid[GridX,GridY]);
end;
end;
end;
end;
//--------------------------------------
//A Parent is every layer above the ground level
//the tree grows from the bottom up.
//A new parent is placed on top of the last one and
//always has one and only one child to start with, from there
//the tree grows down again.
constructor TCellParent.CreateFromChild(AChild: TCellBlock; ChildX: Integer; ChildY: Integer);
begin
inherited Create(nil);
end;
constructor TCellParent.CreateFromParent(AParent: TCellParent);
begin
inherited Create(AParent);
end;
destructor TCellParent.Destroy;
begin
inherited Destroy;
end;
procedure TCellParent.GeneratePtoQ;
begin
end;
procedure TCellParent.GenerateQtoP;
begin
end;
//The bitmap for the lookup table is as follows:
// 0 2 4 6
// +----+
// 1 |3 5| 7
// 8 |A C| E
// +----+
// 9 B D F
// The inner 2x2 cells are looked up.
// so 0241358AC make up bit 3 etc.
procedure TForm1.InitLookupTable;
const
//Masks for normal order.
MaskNW = $0757; //0000-0111-0101-0111
MaskSW = $0EAE; //0000-1110-1010-1110
MaskNE = $7570; //0111-0101-0111-0000
MaskSE = $EAE0; //1110-1010-1110-0000
//Bitlocations for normal order
BitNW = $0020; //0000-0000-0010-0000
BitSW = $0040; //0000-0000-0100-0000
BitNE = $0200; //0000-0020-0000-0000
BitSE = $0400; //0000-0100-0000-0000
//Lookup table also has a shifted order. here the bottom half of the N word
//and the top half of the south word combine.
//Like so:
// 2 6 A E
// 3 7 B F
// 0 4 8 C
// 1 5 9 D
//Mask for split order.
Mask2NW = $0D5D; // 0000-1101-0101-1101
Mask2SW = $0BAB; // 0000-1011-1010-1011
Mask2NE = $D5D0; // 1101-0101-1101-0000
Mask2SE = $BAB0; // 1011-1010-1011-0000
//Bitlocations for split order
Bit2NW = $0080; // 0000-0000-1000-0000
Bit2SW = $0010; // 0000-0000-0001-0000
Bit2NE = $0800; // 0000-1000-0000-0000
Bit2SE = $0100; // 0000-0001-0000-0000
ResultNW = $01;
ResultSW = $02;
ResultNE = $10;
ResultSE = $20;
Result2NW = $04;
Result2SW = $08;
Result2NE = $40;
Result2SE = $80;
var
i: integer;
iNW, iNE, iSW, iSE: cardinal;
Count: integer;
ResultByte: byte;
function GetCount(a: integer): integer;
var
c: integer;
begin
Result:= 0;
for c:= 0 to 15 do begin
if Odd(a shr c) then Inc(Result);
end; {for c}
end; {GetCount}
begin
//Fill the normal lookup.
for i:= 0 to $ffff do begin
ResultByte:= 0;
iNW:= i and MaskNW;
Count:= GetCount(iNW);
case Count of //count excluding bit itself
3: ResultByte:= ResultNW;
2: if ((i and BitNW) <> 0) then ResultByte:= ResultNW;
end;
iSW:= i and MaskSW;
Count:= GetCount(iSW);
case Count of
3: ResultByte:= ResultByte or ResultSW;
2: if ((i and BitSW) <> 0) then ResultByte:= ResultByte or ResultSW;
end;
iNE:= i and MaskNE;
Count:= GetCount(iNE);
case Count of
3: ResultByte:= ResultByte or ResultNE;
2: if ((i and BitNE) <> 0) then ResultByte:= ResultByte or ResultNE;
end;
iSE:= i and MaskSE;
Count:= GetCount(iSE);
case Count of
3: ResultByte:= ResultByte or ResultSE;
2: if ((i and BitSE) <> 0) then ResultByte:= ResultByte or ResultSE;
end;
LookupTable[i]:= ResultByte;
end; {for i}
//Fill the shifted lookup.
for i:= 0 to $ffff do begin
ResultByte:= 0;
iNW:= i and Mask2NW;
Count:= GetCount(iNW);
case Count of //count excluding bit itself
3: ResultByte:= Result2NW;
2: if ((i and Bit2NW) <> 0) then ResultByte:= Result2NW;
end;
iSW:= i and Mask2SW;
Count:= GetCount(iSW);
case Count of
3: ResultByte:= ResultByte or Result2SW;
2: if ((i and Bit2SW) <> 0) then ResultByte:= ResultByte or Result2SW;
end;
iNE:= i and Mask2NE;
Count:= GetCount(iNE);
case Count of
3: ResultByte:= ResultByte or Result2NE;
2: if ((i and Bit2NE) <> 0) then ResultByte:= ResultByte or Result2NE;
end;
iSE:= i and Mask2SE;
Count:= GetCount(iSE);
case Count of
3: ResultByte:= ResultByte or Result2SE;
2: if ((i and Bit2SE) <> 0) then ResultByte:= ResultByte or Result2SE;
end;
LookupTable[i]:= LookupTable[i] or ResultByte;
end; {for i} (**)
end;
procedure TForm1.RestartScreen;
begin
MyBlock.Free;
MyBlock:= TCellBlock.Create(nil);
//MyBlock.SetPixel(5,7);
//MyBlock.SetPixel(6,7);
//MyBlock.SetPixel(7,7);
//MyBlock.SetPixel(7,6);
//MyBlock.SetPixel(6,5);
MyBlock.SetPixel(10,0);
MyBlock.SetPixel(11,0);
MyBlock.SetPixel(9,1);
MyBlock.SetPixel(10,1);
MyBlock.SetPixel(10,2);
end;
procedure TForm1.Timer1Timer(Sender: TObject);
begin
if Assigned(MyBlock) then begin
MyBlock.Generate;
MyBlock.Display(Image1.Canvas);
end;
end;
procedure TForm1.ToolButton4Click(Sender: TObject);
begin
if Assigned(MyBlock) then begin
MyBlock.Generate;
MyBlock.Display(Image1.Canvas);
end;
end;
procedure TForm1.FileNew1Execute(Sender: TObject);
begin
InitLookupTable;
FillChar(BitmapData, SizeOf(BitmapData), #0);
MyBitmap:= TBitmap.Create;
MyBitmap.SetSize(1024,1024);
MyBitmap.PixelFormat:= pf1bit;
MyBitmap.Monochrome:= true;
//MyBitmap.Handle:= CreateBitmap(1000,1000,1,2,nil);
Generation:= 0;
RestartScreen;
MyBlock.Display(Image1.Canvas);
//if (Sender = FileNew1) then Timer1.Enabled:= not(Timer1.Enabled);
end;
procedure TForm1.FileOpen1Execute(Sender: TObject);
var
i,a: integer;
start, eind: int64;
Diff: double;
LowDiff: double;
begin
LowDiff:= MaxInt;
for a:= 0 to 10 do begin
FileNew1Execute(Sender);
GetCPUTicks(start);
for i:= 0 to 1000 do begin
MyBlock.Generate;
end;
GetCPUTicks(eind);
//Label1.Caption:= IntToStr(Eind - Start);
Diff:= Eind - start;
LowDiff:= Min(Diff, LowDiff);
Label1.Caption:= Format('%10.0n',[lowdiff]) + ' CPU cycles per 1,000 generations';
Clipboard.AsText:= Label1.Caption;
end; {for a}
MyBlock.Display(Image1.Canvas);
end;
procedure TForm1.FileSave1Execute(Sender: TObject);
begin
Timer1.Enabled:= not(Timer1.Enabled);
end;
procedure TForm1.FileExit1Execute(Sender: TObject);
begin
Close;
end;
initialization
Generation:= 0;
end.
Stackoverflow does not allow me to post the form file due to a size limit, but I hope you can manage without.

Resources