Fast find a DWORD position in array of byte delphi - delphi-2010

finding fastest way to find dword in array of byte
I have this dword for example. I implemented a method but its slow because I convert byte array to hex string and search the dword as string, the conversion takes up most of the time!!
need to find pos of this dword --> 01 49 08 EF 48 C0 C6 91
var
myarray:array of byte;
p:integer;
begin
p:= pos('014908EF48C0C691',array2hex(myarray));
end;
i need to find the position of the dword fast search in byte array with out converting !

A DWORD is 4 bytes, so 01 49 08 EF 48 C0 C6 91 is too large to be a single DWORD. It is either 2 DWORD values, or it is a (U)Int64.
But either way, converting the array to a string is definitely the wrong way to go. Just search the raw array data as-is instead, eg:
const
bytesToFind: array[0..7] of Byte = ($01, $49, $08, $EF, $48, $C0, $C6, $91);
var
myarray: array of byte;
I, FoundAtIndex, Len: integer;
begin
myarray := ...;
Len := Length(myarray);
FoundAtIndex := -1;
for I := 0 to Len-8 do
begin
if (myarray[I] = $01) and
((I+8) <= Len) and
CompareMem(#myarray[I], #bytesToFind, 8) then
begin
FoundAtIndex := I;
Break;
end;
end;
if FoundAtIndex <> -1 then
begin
// use FoundAtIndex as needed...
end else
begin
// not found...
end;
end;

Related

How to call NtOpenFile?

I'm trying to call NtOpenFile, and it's failing with error:
STATUS_OBJECT_PATH_SYNTAX_BAD = NTSTATUS($C000003B);
Object Path Component was not a directory object.
The basic gist is:
//The file we'll test with
filename: UnicodeString := 'C:\Windows\Explorer.exe'; //23 characters
//Convert the filename to counted UNICODE_STRING
cs: UNICODE_STRING;
cs.Length := Length(filename) * sizeof(WideChar); //46 bytes
cs.MaximumLength := cs.Length + 2; //48 bytes
cs.Buffer := PWideChar(Filename); //"C:\Windows\Explorer.exe"
//Define the OBJECT_ATTRIBUTES
oa: OBJECT_ATTRIBUTES := Default(OBJECT_ATTRIBUTES);
oa.Length := sizeof(OBJECT_ATTRIBUTES); //24 bytes
oa.Attributes := OBJ_CASE_INSENSITIVE;
oa.ObjectName := #cs; //UNICODE_STRING
//Open the file (by Object Attributes) and get a file handle
hFile: THandle;
iosb: IO_STATUS_BLOCK;
status: NTSTATUS := NtOpenFile(#hFile, FILE_READ_ATTRIBUTES, #oa, #iosb, FILE_SHARE_READ, 0);
What am I doing wrong?
Basic gist (C#-style psuedocode)
//The file we'll test with
UnicodeString filename = "C:\Windows\Explorer.exe"; //23 characters
//Convert the filename to counted UNICODE_STRING
UNICODE_STRING cs;
cs.Length = Length(filename) * sizeof(WideChar); //46 bytes
cs.MaximumLength = cs.Length + 2; //48 bytes
cs.Buffer = Filename; //"C:\Windows\Explorer.exe"
//Define the OBJECT_ATTRIBUTES
OBJECT_ATTRIBUTES oa = Default(OBJECT_ATTRIBUTES);
oa.Length = sizeof(OBJECT_ATTRIBUTES); //24 bytes
oa.Attributes = OBJ_CASE_INSENSITIVE;
oa.ObjectName = cs; //UNICODE_STRING
//Open the file (by Object Attributes) and get a file handle
THandle hFile;
IO_STATUS_BLOCK iosb;
NTSTATUS status = NtOpenFile(out hFile, FILE_READ_ATTRIBUTES, ref oa, out iosb, FILE_SHARE_READ, 0);
Other styles of filenames
Filename
Result
Description
"C:\Windows\Explorer.exe"
STATUS_OBJECT_PATH_SYNTAX_BAD
Object Path Component was not a directory object.
"\global??\C:\Windows\Explorer.exe"
0xC0000033
Object Name invalid
"\??\C:\Windows\Explorer.exe"
0xC0000033
Object Name invalid
OBJECT_ATTRIBUTES raw memory dump
Because there can be issues of memory layout, alignment, padding, and missing members, lets dump the raw oa memory:
18 00 00 00 ;Length. 0x00000018 = 24 bytes (sizeof)
00 00 00 00 ;RootDirectory. 0x00000000 = NULL
28 FF 19 00 ;ObjectName. PUNICODE_STRING 0x0019FF28
40 00 00 00 ;Attributes. (0x00000040 = OBJ_CASE_INSENSITIVE)
00 00 00 00 ;SecurityDescriptor 0x0000000 = NULL
00 00 00 00 ;SecurityQualityOfService 0x00000000 = NULL
**0x0019FF28:**
3C FF 19 00 ;PUNICODE_STRING 0x0019FF3C
**0x0019FF3C**
36 00 ; String length in bytes 0x0036
38 00 ; Buffer size in bytes 0x0038
E8 B6 4E 00 ; PWideChar 0x004EB6E8 ==> "C:\Windows\Explorer.exe"
Which, now I see my problem: PUNICODE_STRING -> PUNICODE_STRING.
CMRE
program NtOpenFileDemo;
{$APPTYPE CONSOLE}
{$R *.res}
{$ALIGN 8}
{$MINENUMSIZE 4}
uses
SysUtils, Windows, ComObj;
type
NTSTATUS = Cardinal;
const
STATUS_SUCCESS = 0;
type
UNICODE_STRING = packed record
Length: Word; // Length of the string, in bytes (not including the null terminator)
MaximumLength: Word; // Size of the buffer, in bytes
Buffer: PWideChar; //really a PWideChar
end;
PUNICODE_STRING = ^UNICODE_STRING;
type
IO_STATUS_BLOCK = packed record
Status: NTSTATUS;
Information: Pointer;
end;
PIO_STATUS_BLOCK = ^IO_STATUS_BLOCK;
OBJECT_ATTRIBUTES = packed record
Length: Cardinal;
RootDirectory: THandle;
ObjectName: PUNICODE_STRING;
Attributes: Cardinal;
SecurityDescriptor: Pointer;
SecurityQualityOfService: Pointer;
end;
POBJECT_ATTRIBUTES = ^OBJECT_ATTRIBUTES;
const
// Define share access rights to files and directories
FILE_SHARE_READ = $00000001;
FILE_SHARE_WRITE = $00000002;
FILE_SHARE_DELETE = $00000004;
FILE_SHARE_VALID_FLAGS = $00000007;
// Valid values for the Attributes field
const
OBJ_INHERIT = $00000002;
OBJ_PERMANENT = $00000010;
OBJ_EXCLUSIVE = $00000020;
OBJ_CASE_INSENSITIVE = $00000040;
OBJ_OPENIF = $00000080;
OBJ_OPENLINK = $00000100;
OBJ_KERNEL_HANDLE = $00000200;
OBJ_VALID_ATTRIBUTES = $000003F2;
function NtOpenFile(FileHandle: PHandle; DesiredAccess: ACCESS_MASK; ObjectAttributes: POBJECT_ATTRIBUTES;
IoStatusBlock: PIO_STATUS_BLOCK; ShareAccess: DWORD; OpenOptions: DWORD): NTSTATUS; stdcall; external 'ntdll.dll';
function NtClose(Handle: THandle): NTSTATUS; stdcall; external 'ntdll.dll';
function FormatNTStatusMessage(const NTStatusMessage: NTSTATUS): string;
var
Buffer: PChar;
Len: Integer;
hMod: HMODULE;
function MAKELANGID(p, s: WORD): WORD;
begin
Result := WORD(s shl 10) or p;
end;
begin
{
KB259693: How to translate NTSTATUS error codes to message strings
Let the OS initialize the Buffer variable. Need to LocalFree it afterward.
}
hMod := SafeLoadLibrary('ntdll.dll');
Len := FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER or
FORMAT_MESSAGE_FROM_SYSTEM or
// FORMAT_MESSAGE_IGNORE_INSERTS or
// FORMAT_MESSAGE_ARGUMENT_ARRAY or
FORMAT_MESSAGE_FROM_HMODULE,
Pointer(hMod),
NTStatusMessage,
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
#Buffer, 0, nil);
try
//Remove the undesired line breaks and '.' char
while (Len > 0) and (CharInSet(Buffer[Len - 1], [#0..#32, '.'])) do Dec(Len);
//Convert to Delphi string
SetString(Result, Buffer, Len);
finally
//Free the OS allocated memory block
LocalFree(HLOCAL(Buffer));
end;
FreeLibrary(hMod);
end;
procedure TestCase;
var
filename: UnicodeString;
cs: PUNICODE_STRING;
hFile: THandle;
oa: OBJECT_ATTRIBUTES;
iosb: IO_STATUS_BLOCK;
status: NTSTATUS;
begin
filename := 'C:\Windows\Explorer.exe'; //23 characters
{
Convert the filename to an "Object Attributes" structure
OBJECT_ATTRIBUTES.Length <-- 24
OBJECT_ATTRIBUTES.Attributes <-- OBJ_CASE_INSENSITIVE
OBJECT_ATTRIBUTES.ObjectName.Length <-- 46
OBJECT_ATTRIBUTES.ObjectName.MaximumLength <-- 48
OBJECT_ATTRIBUTES.ObjectName.Buffer <-- "C:\Windows\Explorer.exe"
}
cs.Length := Length(Filename) * sizeof(WideChar);
cs.MaximumLength := cs.Length + 2; //the null terminator
cs.Buffer := PWideChar(Filename);
oa := Default(OBJECT_ATTRIBUTES);
oa.Length := sizeof(OBJECT_ATTRIBUTES);
oa.Attributes := OBJ_CASE_INSENSITIVE;
oa.ObjectName := #cs;
//Open the file (by Object Attributes) and get a file handle
status := NtOpenFile(#hFile, FILE_READ_ATTRIBUTES, #oa, #iosb, FILE_SHARE_READ, 0);
if status <> STATUS_SUCCESS then
begin
WriteLn('Error opening file "'+Filename+'": '+FormatNTStatusMessage(status)+' (0x'+IntToHex(status, 8)+')');
Exit;
end;
try
WriteLn('Successfully opened file');
finally
NtClose(hFile);
end;
end;
begin
try
TestCase;
WriteLn('Press enter to close...');
ReadLn;
except
on E: Exception do
Writeln(E.ClassName, ': ', E.Message);
end;
end.
Bonus Reading
MSDN: RtlInitUnicodeString
MSDN: NtOpenFile function
Technet: How to open a file from a kernel mode device driver and how to read from or write to the file archive
MSDN: InitializeObjectAttributes macro
MSDN: OBJECT_ATTRIBUTES structure
Google Project Zero: The Definitive Guide on Win32 to NT Path Conversionarchive
Found it.
Two things:
I didn't know "NT Paths" are different from "DOS Paths"
"C:\Windows\Notepad.exe" → "\??\C:\Windows\Notepad.exe"
To OBJECT_ATTRIBUTES.ObjectName I was assigning #PUNICODE_STRING, rather than #UNICODE_STRING
Use Case - Processes using file
I was using NtOpenFile in order to get the Process IDs that are using a file.
The short version is:
call NtOpenFile to open the file you're interested in
call NtQueryInformationFile with the FileProcessIdsUsingFileInformation constant
iterate each returned Process ID (pid)
call OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, False, pid) to get the process name
function GetProcessIDsUsingFile(Filename: UnicodeString; out ProcessIDs: array of DWORD): string;
var
hFile: THandle;
oa: OBJECT_ATTRIBUTES;
iosb: TIOStatusBlock;
status: NTSTATUS;
fi: FILE_PROCESS_IDS_USING_FILE_INFORMATION;
i: Integer;
pid: DWORD;
hProcess: THandle;
s: string;
dw: DWORD;
cs: UNICODE_STRING;
ntFilename: UnicodeString;
const
PROCESS_QUERY_LIMITED_INFORMATION = $1000;
begin
Result := '';
(*
Decription
Get list of processes that are using a file.
Sample usage
GetProcessIDsUsingFile('C:\Windows\Notepad.exe', {out}pids);
Returns
Result: a text description of the processes using the file
"PID:12345 (Explorer.exe)"
ProcessIDs: an array of process IDs (PIDs)
[12345]
*)
ntFilename := Filename;
{
Convert the "DOS path" into an "NT Path". NT Paths are not the same as DOS paths.
DOS Path: C:\Windows\Explorer.exe
NT Path: \??\C:\Windows\Explorer.exe
The short version is:
\DosDevices is a symbolic link for \??
\?? is a special value understood by the object manager's parser to mean the global \DosDevices directory
\GLOBAL?? is a folder that is home to global device.
And in the end all 3 (eventually) end up in the same place.
But you want to use "??" rather than "\GLOBAL??" because the former understands per-user (i.e. local) drive mappings.
Bonus Reading:
- Google Project Zero: The Definitive Guide on Win32 to NT Path Conversion (Google Project Zero)
https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html
- Google Project Zero: Windows Drivers are True’ly Tricky
https://googleprojectzero.blogspot.com/2015/10/windows-drivers-are-truely-tricky.html
- Nynaeve: The kernel object namespace and Win32, part 3
http://www.nynaeve.net/?p=92
Apparently you can also use some undocumented functions (exported by name) to convert a DOS path to an NT path; there are like 7 variations
- RtlGetFullPathName_U
- RtlDosPathNameToRelativeNtPathName_U
- RtlDosPathNameToNtPathName_U_WithStatus
But i'll just prefix it with "\??\"
}
if Copy(ntFilename, 1, 4) <> '\??\' then
ntFilename := '\??\'+Filename;
{
Convert the filename to an "Object Attributes" structure
OBJECT_ATTRIBUTES.Length <-- 24
OBJECT_ATTRIBUTES.Attributes <-- OBJ_CASE_INSENSITIVE
OBJECT_ATTRIBUTES.ObjectName.Length <-- 46
OBJECT_ATTRIBUTES.ObjectName.MaximumLength <-- 48
OBJECT_ATTRIBUTES.ObjectName.Buffer <-- "C:\Windows\Explorer.exe"
}
cs.Length := Length(ntFilename) * sizeof(WideChar);
cs.MaximumLength := cs.Length + 2; //the null terminator
cs.Buffer := PWideChar(ntFilename);
oa := Default(OBJECT_ATTRIBUTES);
oa.Length := sizeof(OBJECT_ATTRIBUTES);
oa.Attributes := OBJ_CASE_INSENSITIVE;
oa.ObjectName := #cs;
//Open the file (by Object Attributes) and get a file handle
status := NtOpenFile(#hFile, FILE_READ_ATTRIBUTES, #oa, #iosb, FILE_SHARE_READ, 0);
if status <> STATUS_SUCCESS then
raise EOleSysError.Create('Error opening file "'+Filename+'": '+FormatNTStatusMessage(status)+' (0x'+IntToHex(status, 8)+')', status, 0);
try
//Query for information about the file (by handle)
status := NtQueryInformationFile(hFile, #iosb, #fi, sizeof(fi), FileProcessIdsUsingFileInformation);
if status <> STATUS_SUCCESS then
raise EOleSysError.Create('Error querying file "'+Filename+'" information: '+FormatNTStatusMessage(status), status, 0);
for i := 0 to fi.NumberOfProcessIdsInList-1 do
begin
pid := fi.ProcessIdList[i];
if Result <> '' then
Result := Result+#13#10;
Result := Result+'PID: '+IntToStr(pid);
hProcess := OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, False, pid);
if hProcess = 0 then
RaiseLastOSError;
try
SetLength(s, 32767);
dw := GetModuleFileNameEx(hProcess, 0, PChar(s), Length(s));
if dw <= 0 then
RaiseLastOSError;
SetLength(s, dw); //returns characters (not including null terminator)
Result := Result+' ('+s+')';
finally
CloseHandle(hProcess);
end;
end;
finally
NtClose(hFile);
end;
end;

Combine two Bytes to WideChar

Is it possible to combine two Bytes to WideChar and if yes, then how?
For example, letter "ē" in binary is 00010011 = 19 and 00000001 = 1, or 275 together.
var
WChar: WideChar;
begin
WChar := WideChar(275); // Result is "ē"
var
B1, B2: Byte;
WChar: WideChar;
begin
B1 := 19;
B2 := 1;
WChar := CombineBytesToWideChar(B1, B2); // ???
How do I get WideChar from two bytes in Delphi?
WChar := WideChar(MakeWord(B1, B2));
You should just be able to create a type and cast:
type
DoubleByte = packed record
B1: Byte;
B2: Byte;
end;
var
DB: DoubleByte;
WC: WideChar;
begin
DB.B1 := 19;
DB.B2 := 1;
WC = WideChar(DB);
end;
Failing a cast you can use Move() instead and simply copy the memory.

how to improve the code (Delphi) for loading and searching in a dictionary?

I'm a Delphi programmer.
I have made a program who uses dictionaries with words and expressions (loaded in program as "array of string").
It uses a search algorithm based on their "checksum" (I hope this is the correct word).
A string is transformed in integer based on this:
var
FHashSize: Integer; //stores the value of GetHashSize
HashTable, HashTableNoCase: array[Byte] of Longword;
HashTableInit: Boolean = False;
const
AnsiLowCaseLookup: array[AnsiChar] of AnsiChar = (
#$00, #$01, #$02, #$03, #$04, #$05, #$06, #$07,
#$08, #$09, #$0A, #$0B, #$0C, #$0D, #$0E, #$0F,
#$10, #$11, #$12, #$13, #$14, #$15, #$16, #$17,
#$18, #$19, #$1A, #$1B, #$1C, #$1D, #$1E, #$1F,
#$20, #$21, #$22, #$23, #$24, #$25, #$26, #$27,
#$28, #$29, #$2A, #$2B, #$2C, #$2D, #$2E, #$2F,
#$30, #$31, #$32, #$33, #$34, #$35, #$36, #$37,
#$38, #$39, #$3A, #$3B, #$3C, #$3D, #$3E, #$3F,
#$40, #$61, #$62, #$63, #$64, #$65, #$66, #$67,
#$68, #$69, #$6A, #$6B, #$6C, #$6D, #$6E, #$6F,
#$70, #$71, #$72, #$73, #$74, #$75, #$76, #$77,
#$78, #$79, #$7A, #$5B, #$5C, #$5D, #$5E, #$5F,
#$60, #$61, #$62, #$63, #$64, #$65, #$66, #$67,
#$68, #$69, #$6A, #$6B, #$6C, #$6D, #$6E, #$6F,
#$70, #$71, #$72, #$73, #$74, #$75, #$76, #$77,
#$78, #$79, #$7A, #$7B, #$7C, #$7D, #$7E, #$7F,
#$80, #$81, #$82, #$83, #$84, #$85, #$86, #$87,
#$88, #$89, #$8A, #$8B, #$8C, #$8D, #$8E, #$8F,
#$90, #$91, #$92, #$93, #$94, #$95, #$96, #$97,
#$98, #$99, #$9A, #$9B, #$9C, #$9D, #$9E, #$9F,
#$A0, #$A1, #$A2, #$A3, #$A4, #$A5, #$A6, #$A7,
#$A8, #$A9, #$AA, #$AB, #$AC, #$AD, #$AE, #$AF,
#$B0, #$B1, #$B2, #$B3, #$B4, #$B5, #$B6, #$B7,
#$B8, #$B9, #$BA, #$BB, #$BC, #$BD, #$BE, #$BF,
#$C0, #$C1, #$C2, #$C3, #$C4, #$C5, #$C6, #$C7,
#$C8, #$C9, #$CA, #$CB, #$CC, #$CD, #$CE, #$CF,
#$D0, #$D1, #$D2, #$D3, #$D4, #$D5, #$D6, #$D7,
#$D8, #$D9, #$DA, #$DB, #$DC, #$DD, #$DE, #$DF,
#$E0, #$E1, #$E2, #$E3, #$E4, #$E5, #$E6, #$E7,
#$E8, #$E9, #$EA, #$EB, #$EC, #$ED, #$EE, #$EF,
#$F0, #$F1, #$F2, #$F3, #$F4, #$F5, #$F6, #$F7,
#$F8, #$F9, #$FA, #$FB, #$FC, #$FD, #$FE, #$FF);
implementation
function GetHashSize(const Count: Integer): Integer;
begin
if Count < 65 then
Result := 256
else
Result := Round(IntPower(16, Ceil(Log10(Count div 4) / Log10(16))));
end;
function Hash(const Hash: LongWord; const Buf; const BufSize: Integer): LongWord;
var P: PByte;
I: Integer;
begin
P := #Buf;
Result := Hash;
for I := 1 to BufSize do
begin
Result := HashTable[Byte(Result) xor P^] xor (Result shr 8);
Inc(P);
end;
end;
function HashStrBuf(const StrBuf: Pointer; const StrLength: Integer; const Slots: LongWord): LongWord;
var P: PChar;
I, J: Integer;
begin
if not HashTableInit then
InitHashTable;
P := StrBuf;
if StrLength <= 48 then // Hash all characters for short strings
Result := Hash($FFFFFFFF, P^, StrLength)
else
begin
// Hash first 16 bytes
Result := Hash($FFFFFFFF, P^, 16);
// Hash last 16 bytes
Inc(P, StrLength - 16);
Result := Hash(Result, P^, 16);
// Hash 16 bytes sampled from rest of string
I := (StrLength - 48) div 16;
P := StrBuf;
Inc(P, 16);
for J := 1 to 16 do
begin
Result := HashTable[Byte(Result) xor Byte(P^)] xor (Result shr 8);
Inc(P, I + 1);
end;
end;
// Mod into slots
if Slots <> 0 then
Result := Result mod Slots;
end;
procedure InitHashTable;
var I, J: Byte;
R: LongWord;
begin
for I := $00 to $FF do
begin
R := I;
for J := 8 downto 1 do
if R and 1 <> 0 then
R := (R shr 1) xor $EDB88320
else
R := R shr 1;
HashTable[I] := R;
end;
Move(HashTable, HashTableNoCase, Sizeof(HashTable));
for I := Ord('A') to Ord('Z') do
HashTableNoCase[I] := HashTableNoCase[I or 32];
HashTableInit := True;
end;
The result of the HashStrBuf is "and (FHashSize - 1)" and is used as index in an "array of array of Integer" (of FHashSize size) to store the index of the string from that "array of string".
This way, when searches for a string, it's transformed in "checksum" and then the code searches in the "branch" with this index comparing this string with the strings from dictionary who have the same "checksum".
Ideally each string from dictionary should have unique checksum. But in the "real world" about 2/3 share the same "checksum" with other words. Because of that the search is not that fast.
In these dictionaries strings are composed of this characters: ['a'..'z',#224..#246,#248..#254,#154,#156..#159,#179,#186,#191,#190,#185,'0'..'9', '''']
Is there any way to improve the "hashing" so the strings would have more unique "checksums"?
Oh, one way is to increase the size of that "array of array of Integer" (FHashSize) but it cannot be increased too much because it takes a lot of Ram.
Another thing: these dictionaries are stored on HDD only as words/expressions (not the "checksums"). Their "checksum" is generated at program startup. But it takes a lot of seconds to do that...
Is there any way to speed up the startup of the program? Maybe by improving the "hashing" function, maybe by storing the "checksums" on HDD and loading them from there...
Any input would be appreciated...
PS: here is the code to search:
function TDictionary.LocateKey(const Key: AnsiString): Integer;
var i, j, l, H: Integer;
P, Q: PChar;
begin
Result := -1;
l := Length(Key);
H := HashStrBuf(#Key[1], l, 0) and (FHashSize - 1);
P := #Key[1];
for i := 0 to High(FHash[H]) do //FHash is that "array of array of integer"
begin
if l <> FKeys.ItemSize[FHash[H][i]] then //FKeys.ItemSize is an byte array with the lengths of strings from dictionary
Continue;
Q := FKeys.Pointer(FHash[H][i]); //pointer to string in dictionary
for j := 0 to l - 1 do
if (P + j)^ <> (Q + j)^ then
Break;
if j = l then
begin
Result := FHash[H][i];
Exit;
end;
end;
end;
Don't reinvent the wheel!
IMHO your hashing is far from efficient, and your collision algorithm can be improved.
Take a look for instance at the IniFiles unit, and the THashedStringList.
It's a bit old, but a good start for a string list using hashes.
There are a lot of good Delphi implementation of such, like in SuperObject and a lot of other code...
Take a look at our SynBigTable unit, which can handle arrays of data in memory or in file very fast, with full indexed searches. Or our latest TDynArray wrapper around any dynamic array of data, to implement TList-like methods to it, including fast binary search. I'm quite sure it could be faster than your hand-tuned code using hashing, if you use an ordered index then fast binary search.
Post-Scriptum:
About pure hashing speed of a string content, take a look at this function - rename RawByteString into AnsiString, PPtrInt into PPointer, and PtrInt into Integer for Delphi 7:
function Hash32(const Text: RawByteString): cardinal;
function SubHash(P: PCardinalArray): cardinal;
{$ifdef HASINLINE}inline;{$endif}
var s1,s2: cardinal;
i, L: PtrInt;
const Mask: array[0..3] of cardinal = (0,$ff,$ffff,$ffffff);
begin
if P<>nil then begin
L := PPtrInt(PtrInt(P)-4)^; // fast lenght(Text)
s1 := 0;
s2 := 0;
for i := 1 to L shr 4 do begin // 16 bytes (4 DWORD) by loop - aligned read
inc(s1,P^[0]);
inc(s2,s1);
inc(s1,P^[1]);
inc(s2,s1);
inc(s1,P^[2]);
inc(s2,s1);
inc(s1,P^[3]);
inc(s2,s1);
inc(PtrUInt(P),16);
end;
for i := 1 to (L shr 2)and 3 do begin // 4 bytes (DWORD) by loop
inc(s1,P^[0]);
inc(s2,s1);
inc(PtrUInt(P),4);
end;
inc(s1,P^[0] and Mask[L and 3]); // remaining 0..3 bytes
inc(s2,s1);
result := s1 xor (s2 shl 16);
end else
result := 0;
end;
begin // use a sub function for better code generation under Delphi
result := SubHash(pointer(Text));
end;
There is even a pure asm version, even faster, in our SynCommons.pas unit. I don't know any faster hashing function around (it's faster than crc32/adler32/IniFiles.hash...). It's based on adler32, but use DWORD aligned reading and summing for even better speed. This could be improved with SSE asm, of course, but here is a fast pure Delphi hash function.
Then don't forget to use "multiplication"/"binary and operation" for hash resolution, just like in IniFiles. It will reduce the number of iteration to your list of hashs.
But since you didn't provide the search source code, we are not able to know what could be improved here.
If you are using Delphi 7, consider using Julian Bucknall's lovely Delphi data types code, EzDsl (Easy Data Structures Library).
Now you don't have to reinvent the wheel as another wise person has also said.
You can download ezdsl, a version that I have made work with both Delphi 7, and recent unicode delphi versions, here.
In particular the unit name EHash contains a hash table implementation, which has various hashing algorithms plug-inable, or you can write your own plugin function that just does the hashing function of your choice.
As a word to the wise, if you are using a Unicode Delphi version; I would be careful about hashing your unicode strings with a code library like this, without checking how its hashing algorithms perform on your system. The OP here is using Delphi 7, so Unicode is not a factor for the original question.
I think you'll find a database (without checksums) a lot quicker. Maybe try sqlite which will give you a single file database. There are many Delphi Libraries available.

Delphi - Convert byte array to string

How do I convert a byte array to a string (base 256) in Delphi?
Use the built-in SetString command. It sets the string to the required length and copies the bytes. There's no need for the array to be null-terminated. In fact, if the array has zero--valued bytes in it, they'll correctly appear within the string; they won't terminate the string.
SetString(AnsiStr, PAnsiChar(#ByteArray[0]), LengthOfByteArray);
If you have a UnicodeString, then you'll need to halve the length parameter since it measures characters, not bytes:
SetString(UnicodeStr, PWideChar(#ByteArray[0]), LengthOfByteArray div 2);
See also, Converting TMemoryStream to String in Delphi 2009.
I'm not sure what do you mean by Base256. If you want to get hex representation of data, use this:
function bintostr(const bin: array of byte): string;
const HexSymbols = '0123456789ABCDEF';
var i: integer;
begin
SetLength(Result, 2*Length(bin));
for i := 0 to Length(bin)-1 do begin
Result[1 + 2*i + 0] := HexSymbols[1 + bin[i] shr 4];
Result[1 + 2*i + 1] := HexSymbols[1 + bin[i] and $0F];
end;
end;
If you want to just render the data as a string (this doesn't change the content!), where for each byte of data you'd get a single ASCII symbol with that code, do
function bintoAscii(const bin: array of byte): AnsiString;
var i: integer;
begin
SetLength(Result, Length(bin));
for i := 0 to Length(bin)-1 do
Result[1+i] := AnsiChar(bin[i]);
end;
var
LString : string;
LBytes : TArray<byte>;
begin
LBytes := TArray<byte>.Create($01, $02, $03);
LString := TEncoding.ANSI.GetString(ABytes);
end;
Being GetString() the reverse operation of GetBytes().
I think there is another nice way to convert byte arrays in strings - an Indy function called BytesToString contained in IdGlobal. It also allows you to specify StartIndex, Length and TEncoding for your string. I've used it several times and I find it very useful.
function bintostr_r(const bin: array of byte): string;
var i,j:integer;
res:string ;
begin
res:='';
for i:=0 to length(bin)-1 do
begin
for j:=1 to 8 do
res:=Inttostr( ((bin[i] shr (j - 1)) and ((1 shl 1) - 1)) ) +res ;
end;
result:=res;
end;
procedure TForm1.FormCreate(Sender: TObject);
var OrigStat: array [1..6] of byte;
res:integer;
begin
OrigStat[1]:=253; // 11111101
OrigStat[2]:=252;
OrigStat[3]:=251;
OrigStat[4]:=250;
OrigStat[5]:=249;
OrigStat[6]:=248;
Edit9.text:=bintostr_r(OrigStat);
end;
result => 111110001111100111111010111110111111110011111101

how to convert byte array to its hex representation in Delphi

I have TBytes variable with a value [0,0,15,15]. How can I convert it to "00FF" ?
I dont want to use loops, bcoz this logic to be used in time intensive function.
(I tried using BinToHex, but I could not get it working with string variable.)
Thanks & Regards,
Pavan.
// Swapping is necessary because x86 is little-endian.
function Swap32(value: Integer): Integer;
asm
bswap eax
end;
function FourBytesToHex(const bytes: TBytes): string;
var
IntBytes: PInteger;
FullResult: string;
begin
Assert(Length(bytes) = SizeOf(IntBytes^));
IntBytes := PInteger(bytes);
FullResult := IntToHex(Swap32(IntBytes^), 8);
Result := FullResult[2] + FullResult[4] + FullResult[6] + FullResult[8];
end;
If that last line looks a little strange, it's because you requested a four-byte array be turned into a four-character string, whereas in the general case, eight hexadecimal digits are required to represent a four-byte value. I'm simply assumed that your byte values are all below 16, so only one hexadecimal digit is needed. If your example was a typo, then simply replace the last two lines with this one:
Result := IntToHex(Swap32(IntBytes^), 8);
By the way, your requirement forbidding loops will not be met. IntToHex uses a loop internally.
function ByteToHex(InByte:byte):shortstring;
const Digits:array[0..15] of char='0123456789ABCDEF';
begin
result:=digits[InByte shr 4]+digits[InByte and $0F];
end;
Example :
MyHex := ByteTohex($FF);
the result
MyHex is "FF".
MyHex := ByteTohex(255);
the result
MyHex is "FF".
MyHex := ByteTohex($55);
the result
MyHex is "55".
This one is quite fast and works with any array size.. It's like BinToHex, but instead of expecting 0..255 byte values, it only uses the low nibble.
procedure BinToSingleHex(Buffer, Text: PAnsiChar; BufSize: Integer);
const
Convert: array[0..15] of AnsiChar = '0123456789ABCDEF';
var
I: Integer;
begin
for I := 0 to BufSize - 1 do
begin
Text[0] := Convert[Byte(Buffer[I]) and $F];
Inc(Text);
end;
end;
Assembler that does the same:
procedure BinToSingleHex(Buffer, Text: PAnsiChar; BufSize: Integer);assembler;
asm
PUSH ESI
PUSH EDI
MOV ESI,EAX
MOV EDI,EDX
MOV EDX,0
JMP ##1
##0: DB '0123456789ABCDEF'
##1: LODSB
AND DL,AL
AND DL,0FH
MOV AL,##0.Byte[EDX]
STOSB
DEC ECX
JNE ##1
POP EDI
POP ESI
end;
usage:
type THexDigit=0..15;
const ArSize=16;
var Ar:array[0..Pred(ArSize)] of THexDigit=(0,1,2,3,4,5,6,7,8,9,8,7,6,5,4,3);
S:Array[0..Pred(ArSize)] of AnsiChar;
BinToSingleHex(#Ar,S,Length(Ar));
WriteLn(S);
Bit late to the party but why not a simple lookup table?
const
HexChars : Array[0..15] of Char = ('0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F');
Assuming TBytes values of 0..15
Function (ABytea: TBytes): string
begin
Result := HexChars[ABytea[0]];
Result := Result + HexChars[ABytea[1]];
Result := Result + HexChars[ABytea[2]];
Result := Result + HexChars[ABytea[3]];
end;
of course neater with a loop :) and needs modifying for byte values above 15:
begin
Result := HexChars[ABytea[0] shr 4];
Result := Result + HexChars[ABytea[0] and $0F];
Result := Result + HexChars[ABytea[1] shr 4];
Result := Result + HexChars[ABytea[1] and $0F];
Result := Result + HexChars[ABytea[2] shr 4];
Result := Result + HexChars[ABytea[2] and $0F];
Result := Result + HexChars[ABytea[3] shr 4];
Result := Result + HexChars[ABytea[3] and $0F];
end;
Still neater with a loop especially if TBytes gets larger
I had the same problem. My solution using System.SysUtils.TByteHelper.ToHexString (with loop)
function ToHexString(const MinDigits: Integer): string; overload; inline;
Example code:
procedure TForm1.Button2Click(Sender: TObject);
begin
var text:string;
var w:integer:=0;
var bytearray: Tarray<byte>:= [$DE, $AD, $BE, $EF];
repeat
text:= text+ pbyte(#bytearray[w])^.ToHexString(2);
inc(w);
until w >= high(bytearray);
end;
bytearray := $DE $AD $BE $EF
text := DEADBEEF

Resources