We often replace non-desirable characters in a file with another "good" character.
The interface is:
procedure cleanfileASCII2(vfilename: string; vgood: integer; voutfilename: string);
To replace all non-desirables with a space we might call,
cleanfileASCII2(original.txt, 32 , cleaned.txt)
The problem is that this takes a rather long time. Is there
a better way to do it than shown?
procedure cleanfileASCII2(vfilename: string; vgood: integer; voutfilename:
string);
var
F1, F2: file of char;
Ch: Char;
tempfilename: string;
i,n,dex: integer;
begin
//original
AssignFile(F1, vfilename);
Reset(F1);
//outputfile
AssignFile(F2,voutfilename);
Rewrite(F2);
while not Eof(F1) do
begin
Read(F1, Ch);
//
n:=ord(ch);
if ((n<32)or(n>127))and (not(n in [10,13])) then
begin // bad char
if vgood<> -1 then
begin
ch:=chr(vgood);
Write(F2, Ch);
end
end
else //good char
Write(F2, Ch);
end;
CloseFile(F2);
CloseFile(F1);
end;
The problem has to do with how you're treating the buffer. Memory transfers are the most expensive part of any operation. In this case, you're looking at the file byte by byte. By changing to a blockread or buffered read, you will realize an enormous increase in speed. Note that the correct buffer size varies based on where you are reading from. For a networked file, you will find extremely large buffers may be less efficient due to the packet size TCP/IP imposes. Even this has become a bit murky with large packets from gigE but, as always, the best result is to benchmark it.
I converted from standard reads to a file stream just for convenience. You could easily do the same thing with a blockread. In this case, I took a 15MB file and ran it through your routine. It took 131,478ms to perform the operation on a local file. With the 1024 buffer, it took 258ms.
procedure cleanfileASCII3(vfilename: string; vgood: integer; voutfilename:string);
const bufsize=1023;
var
inFS, outFS:TFileStream;
buffer: array[0..bufsize] of byte;
readSize:integer;
tempfilename: string;
i: integer;
begin
if not FileExists(vFileName) then exit;
inFS:=TFileStream.Create(vFileName,fmOpenRead);
inFS.Position:=0;
outFS:=TFileStream.Create(vOutFileName,fmCreate);
while not (inFS.Position>=inFS.Size) do
begin
readSize:=inFS.Read(buffer,sizeof(buffer));
for I := 0 to readSize-1 do
begin
n:=buffer[i];
if ((n<32)or(n>127)) and (not(n in [10,13])) and (vgood<>-1) then
buffer[i]:=vgood;
end;
outFS.Write(buffer,readSize);
end;
inFS.Free;
outFS.Free;
end;
Several improvements:
Buffer the data, read 2k or 16k or similar sized blocks
Use a lookup table
here's a stab, that is untested (no compiler in front of me right now):
procedure cleanfileASCII2(vfilename: string; vgood: integer; voutfilename: string);
var
f1, f2: File;
table: array[Char] of Char;
index, inBuffer: Integer;
buffer: array[0..2047] of Char;
c: Char;
begin
for c := #0 to #31 do
table[c] := ' ';
for c := #32 to #127 do
table[c] := c;
for c := #128 to #255 do
table[c] := ' ';
table[#10] := #10; // exception to spaces <32
table[#13] := #13; // exception to spaces <32
AssignFile(F1, vfilename);
Reset(F1, 1);
AssignFile(F2,voutfilename);
Rewrite(F2, 1);
while not Eof(F1) do
begin
BlockRead(f1, buffer, SizeOf(buffer), inBuffer);
for index := 0 to inBuffer - 1 do
buffer[index] := table[buffer[index]];
BlockWrite(f2, buffer, inBuffer);
end;
Close(f2);
Close(f1);
end;
You could buffer your input and output so you read a chunk of characters (even the whole file, if it's not too big) into an array, then process the array, then write the entire array to the output file.
In most of these cases, the disk IO is the bottleneck, and if you can do fewer large reads instead of many small reads, it will be faster.
Buffering is the correct way to do that. I modified your code to see the difference:
procedure cleanfileASCII2(vfilename: string; vgood: integer; voutfilename:
string);
var
F1, F2: file;
NumRead, NumWritten: Integer;
Buf: array[1..2048] of Char;
Ch: Char;
i, n: integer;
begin
AssignFile(F1, vfilename);
Reset(F1, 1); // Record size = 1
AssignFile(F2, voutfilename);
Rewrite(F2, 1); // Record size = 1
repeat
BlockRead(F1, Buf, SizeOf(Buf), NumRead);
for i := 1 to NumRead do
begin
Ch := Buf[i];
//
n := ord(ch);
if ((n<32)or(n>127))and (not(n in [10,13])) then
begin // bad char
if vgood <> -1 then
begin
ch := chr(vgood);
Buf[i] := Ch;
end
//else //good char
//Write(F2, Ch);
end;
end;
BlockWrite(F2, Buf, NumRead, NumWritten);
until (NumRead = 0) or (NumWritten <> NumRead);
CloseFile(F1);
CloseFile(F2);
end;
I did it this way, ensuring that the file I/O is done all in one go before the processing. The code could do with updating for unicode but it copes with nasty text characters such as nulls and gives you a TStrings capability.
Bri
procedure TextStringToStringsAA( AStrings : TStrings; const AStr: Ansistring);
// A better routine than the stream 'SetTextStr'.
// Nulls (#0) which might be in the file e.g. from corruption in log files
// do not terminate the reading process.
var
P, Start, VeryEnd: PansiChar;
S: ansistring;
begin
AStrings.BeginUpdate;
try
AStrings.Clear;
P := Pansichar( AStr );
VeryEnd := P + Length( AStr );
if P <> nil then
while P < VeryEnd do
begin
Start := P;
while (P < VeryEnd) and not CharInSet(P^, [#10, #13]) do
Inc(P);
SetString(S, Start, P - Start);
AStrings.Add(string(S));
if P^ = #13 then Inc(P);
if P^ = #10 then Inc(P);
end;
finally
AStrings.EndUpdate;
end;
end;
procedure TextStreamToStrings( AStream : TStream; AStrings : TStrings );
// An alternative to AStream.LoadFromStream
// Nulls (#0) which might be in the file e.g. from corruption in log files
// do not terminate the reading process.
var
Size : Integer;
S : Ansistring;
begin
AStrings.BeginUpdate;
try
// Make a big string with all of the text
Size := AStream.Size - AStream.Position;
SetString( S, nil, Size );
AStream.Read(Pointer(S)^, Size);
// Parse it
TextStringToStringsAA( AStrings, S );
finally
AStrings.EndUpdate;
end;
end;
procedure LoadStringsFromFile( AStrings : TStrings; const AFileName : string );
// Loads this strings from a text file
// Nulls (#0) which might be in the file e.g. from corruption in log files
// do not terminate the reading process.
var
ST : TFileStream;
begin
ST := TFileStream.Create( AFileName, fmOpenRead + fmShareDenyNone);
// No attempt is made to prevent other applications from reading from or writing to the file.
try
ST.Position := 0;
AStrings.BeginUpdate;
try
TextStreamToStrings( ST, AStrings );
finally
AStrings.EndUpdate;
end;
finally
ST.Free;
end;
end;
Don't try to optimize without know where.
You shoud use the Sampling Profiler (delphitools.info) to know where is the bottleneck. It's easy to use.
Precompute the vgood chr conversion, before the loop.
Also, You don't need some conversions: Ord() and Chr(). Use always the 'Ch' variable.
if not (ch in [#10, #13, #32..#127]) then
Probably the easiest method would be:
make another file (temporary)
copy all content of basic file to the temp. file (line after line)
detect when it reads chars or words you want to replace and stop copying
enter your edit (to the temp. file)
continue and finish copying basic to temp file
rewrite (delete content of) basic file
copy lines from temp file to basic file
DONE!
vote this post +1 if it helped please
Related
I need to read the last line in some very large textfiles (to get the timestamp from the data). TStringlist would be a simple approach but it returns an out of memory error. I'm trying to use seek and blockread, but the characters in the buffer are all nonsense. Is this something to do with unicode?
Function TForm1.ReadLastLine2(FileName: String): String;
var
FileHandle: File;
s,line: string;
ok: 0..1;
Buf: array[1..8] of Char;
k: longword;
i,ReadCount: integer;
begin
AssignFile (FileHandle,FileName);
Reset (FileHandle); // or for binary files: Reset (FileHandle,1);
ok := 0;
k := FileSize (FileHandle);
Seek (FileHandle, k-1);
s := '';
while ok<>1 do begin
BlockRead (FileHandle, buf, SizeOf(Buf)-1, ReadCount); //BlockRead ( var FileHandle : File; var Buffer; RecordCount : Integer {; var RecordsRead : Integer} ) ;
if ord (buf[1]) <>13 then //Arg to integer
s := s + buf[1]
else
ok := ok + 1;
k := k-1;
seek (FileHandle,k);
end;
CloseFile (FileHandle);
// Reverse the order in the line read
setlength (line,length(s));
for i:=1 to length(s) do
line[length(s) - i+1 ] := s[i];
Result := Line;
end;
Based on www.delphipages.com/forum/showthread.php?t=102965
The testfile is a simple CSV I created in excel ( this is not the 100MB I ultimately need to read).
a,b,c,d,e,f,g,h,i,j,blank
A,B,C,D,E,F,G,H,I,J,blank
1,2,3,4,5,6,7,8,9,0,blank
Mary,had,a,little,lamb,His,fleece,was,white,as,snow
And,everywhere,that,Mary,went,The,lamb,was,sure,to,go
You really have to read the file in LARGE chunks from the tail to the head.
Since it is so large it does not fit the memory - then reading it line by line from start to end would be very slow. With ReadLn - twice slow.
You also has to be ready that the last line might end with EOL or may not.
Personally I would also account for three possible EOL sequences:
CR/LF aka #13#10=^M^J - DOS/Windows style
CR without LF - just #13=^M - Classic MacOS file
LF without CR - just #10=^J - UNIX style, including MacOS version 10
If you are sure your CSV files would only ever be generated by native Windows programs it would be safe to assume full CR/LF be used. But if there can be other Java programs, non-Windows platforms, mobile programs - I would be less sure. Of course pure CR without LF would be the least probable case of them all.
uses System.IOUtils, System.Math, System.Classes;
type FileChar = AnsiChar; FileString = AnsiString; // for non-Unicode files
// type FileChar = WideChar; FileString = UnicodeString;// for UTF16 and UCS-2 files
const FileCharSize = SizeOf(FileChar);
// somewhere later in the code add: Assert(FileCharSize = SizeOf(FileString[1]);
function ReadLastLine(const FileName: String): FileString; overload; forward;
const PageSize = 4*1024;
// the minimal read atom of most modern HDD and the memory allocation atom of Win32
// since the chances your file would have lines longer than 4Kb are very small - I would not increase it to several atoms.
function ReadLastLine(const Lines: TStringDynArray): FileString; overload;
var i: integer;
begin
Result := '';
i := High(Lines);
if i < Low(Lines) then exit; // empty array - empty file
Result := Lines[i];
if Result > '' then exit; // we got the line
Dec(i); // skip the empty ghost line, in case last line was CRLF-terminated
if i < Low(Lines) then exit; // that ghost was the only line in the empty file
Result := Lines[i];
end;
// scan for EOLs in not-yet-scanned part
function FindLastLine(buffer: TArray<FileChar>; const OldRead : Integer;
const LastChunk: Boolean; out Line: FileString): boolean;
var i, tailCRLF: integer; c: FileChar;
begin
Result := False;
if Length(Buffer) = 0 then exit;
i := High(Buffer);
tailCRLF := 0; // test for trailing CR/LF
if Buffer[i] = ^J then begin // LF - single, or after CR
Dec(i);
Inc(tailCRLF);
end;
if (i >= Low(Buffer)) and (Buffer[i] = ^M) then begin // CR, alone or before LF
Inc(tailCRLF);
end;
i := High(Buffer) - Max(OldRead, tailCRLF);
if i - Low(Buffer) < 0 then exit; // no new data to read - results would be like before
if OldRead > 0 then Inc(i); // the CR/LF pair could be sliced between new and previous buffer - so need to start a bit earlier
for i := i downto Low(Buffer) do begin
c := Buffer[i];
if (c=^J) or (c=^M) then begin // found EOL
SetString( Line, #Buffer[i+1], High(Buffer) - tailCRLF - i);
exit(True);
end;
end;
// we did not find non-terminating EOL in the buffer (except maybe trailing),
// now we should ask for more file content, if there is still left any
// or take the entire file (without trailing EOL if any)
if LastChunk then begin
SetString( Line, #Buffer[ Low(Buffer) ], Length(Buffer) - tailCRLF);
Result := true;
end;
end;
function ReadLastLine(const FileName: String): FileString; overload;
var Buffer, tmp: TArray<FileChar>;
// dynamic arrays - eases memory management and protect from stack corruption
FS: TFileStream; FSize, NewPos: Int64;
OldRead, NewLen : Integer; EndOfFile: boolean;
begin
Result := '';
FS := TFile.OpenRead(FileName);
try
FSize := FS.Size;
if FSize <= PageSize then begin // small file, we can be lazy!
FreeAndNil(FS); // free the handle and avoid double-free in finally
Result := ReadLastLine( TFile.ReadAllLines( FileName, TEncoding.ANSI ));
// or TEncoding.UTF16
// warning - TFIle is not share-aware, if the file is being written to by another app
exit;
end;
SetLength( Buffer, PageSize div FileCharSize);
OldRead := 0;
repeat
NewPos := FSize - Length(Buffer)*FileCharSize;
EndOfFile := NewPos <= 0;
if NewPos < 0 then NewPos := 0;
FS.Position := NewPos;
FS.ReadBuffer( Buffer[Low(Buffer)], (Length(Buffer) - OldRead)*FileCharSize);
if FindLastLine(Buffer, OldRead, EndOfFile, Result) then
exit; // done !
tmp := Buffer; Buffer := nil; // flip-flop: preparing to broaden our mouth
OldRead := Length(tmp); // need not to re-scan the tail again and again when expanding our scanning range
NewLen := Min( 2*Length(tmp), FSize div FileCharSize );
SetLength(Buffer, NewLen); // this may trigger EOutOfMemory...
Move( tmp[Low(tmp)], Buffer[High(Buffer)-OldRead+1], OldRead*FileCharSize);
tmp := nil; // free old buffer
until EndOfFile;
finally
FS.Free;
end;
end;
PS. Note one extra special case - if you would use Unicode chars (two-bytes ones) and would give odd-length file (3 bytes, 5 bytes, etc) - you would never be ble to scan the starting single byte (half-widechar). Maybe you should add the extra guard there, like Assert( 0 = FS.Size mod FileCharSize)
PPS. As a rule of thumb you better keep those functions out of the form class, - because WHY mixing them? In general you should separate concerns into small blocks. Reading file has nothing with user interaction - so should better be offloaded to an extra UNIT. Then you would be able to use functions from that unit in one form or 10 forms, in main thread or in multi-threaded application. Like LEGO parts - they give you flexibility by being small and separate.
PPPS. Another approach here would be using memory-mapped files. Google for MMF implementations for Delphi and articles about benefits and problems with MMF approach. Personally I think rewriting the code above to use MMF would greatly simplify it, removing several "special cases" and the troublesome and memory copying flip-flop. OTOH it would demand you to be very strict with pointers arithmetic.
https://en.wikipedia.org/wiki/Memory-mapped_file
https://msdn.microsoft.com/en-us/library/ms810613.aspx
http://torry.net/quicksearchd.php?String=memory+map&Title=No
Your char type is two byte, so that buffer is 16 byte. Then with blockread you read sizeof(buffer)-1 byte into it, and check the first 2 byte char if it is equal to #13.
The sizeof(buffer)-1 is dodgy (where does that -1 come from?), and the rest is valid, but only if your input file is utf16.
Also your read 8 (or 16) characters each time, but compare only one and then do a seek again. That is not very logical either.
If your encoding is not utf16, I suggest you change the type of a buffer element to ansichar and remove the -1
In response to kopiks suggestion, I figured out how to do it with TFilestream, it works ok with the simple test file, though there may be some further tweeks when I use it on a variety of csv files. Also, I don't make any claims that this is the most efficient method.
procedure TForm1.Button6Click(Sender: TObject);
Var
StreamSize, ApproxNumRows : Integer;
TempStr : String;
begin
if OpenDialog1.Execute then begin
TempStr := ReadLastLineOfTextFile(OpenDialog1.FileName,StreamSize, ApproxNumRows);
// TempStr := ReadFileStream('c:\temp\CSVTestFile.csv');
ShowMessage ('approximately '+ IntToStr(ApproxNumRows)+' Rows');
ListBox1.Items.Add(TempStr);
end;
end;
Function TForm1.ReadLastLineOfTextFile(const FileName: String; var StreamSize, ApproxNumRows : Integer): String;
const
MAXLINELENGTH = 256;
var
Stream: TFileStream;
BlockSize,CharCount : integer;
Hash13Found : Boolean;
Buffer : array [0..MAXLINELENGTH] of AnsiChar;
begin
Hash13Found := False;
Result :='';
Stream := TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
StreamSize := Stream.size;
if StreamSize < MAXLINELENGTH then
BlockSize := StreamSize
Else
BlockSize := MAXLINELENGTH;
// for CharCount := 0 to Length(Buffer)-1 do begin
// Buffer[CharCount] := #0; // zeroing the buffer can aid diagnostics
// end;
CharCount := 0;
Repeat
Stream.Seek(-(CharCount+3), 2); //+3 misses out the #0,#10,#13 at the end of the file
Stream.Read( Buffer[CharCount], 1);
Result := String(Buffer[CharCount]) + result;
if Buffer[CharCount] =#13 then
Hash13Found := True;
Inc(CharCount);
Until Hash13Found OR (CharCount = BlockSize);
ShowMessage(Result);
ApproxNumRows := Round(StreamSize / CharCount);
end;
Just thought of a new solution.
Again, there could be better ones, but this one is the best i thought of.
function GetLastLine(textFilePath: string): string;
var
list: tstringlist;
begin
list := tstringlist.Create;
try
list.LoadFromFile(textFilePath);
result := list[list.Count-1];
finally
list.free;
end;
end;
How can I concat an array of strings with Move. I tried this but I just cannot figure how to get Move operation working correctly.
program Project2;
{$POINTERMATH ON}
procedure Concat(var S: String; const A: Array of String);
var
I, J: Integer;
Len: Integer;
begin
Len := 0;
for I := 0 to High(A) do
Len := Len + Length(A[I]);
SetLength(S, Length(S) + Len);
for I := 0 to High(A) do
Move(PWideChar(A[I])[0], S[High(S)], Length(A[I]) * SizeOf(WideChar));
end;
var
S: String;
begin
S := 'test';
Concat(S, ['test', 'test2', 'test3']);
end.
I'd write this function like so:
procedure Concat(var Dest: string; const Source: array of string);
var
i: Integer;
OriginalDestLen: Integer;
SourceLen: Integer;
TotalSourceLen: Integer;
DestPtr: PChar;
begin
TotalSourceLen := 0;
OriginalDestLen := Length(Dest);
for i := low(Source) to high(Source) do begin
inc(TotalSourceLen, Length(Source[i]));
end;
SetLength(Dest, OriginalDestLen + TotalSourceLen);
DestPtr := PChar(Pointer(Dest)) + OriginalDestLen;
for i := low(Source) to high(Source) do begin
SourceLen := Length(Source[i]);
Move(Pointer(Source[i])^, DestPtr^, SourceLen*SizeOf(Char));
inc(DestPtr, SourceLen);
end;
end;
It's fairly self-explanatory. The complications are caused by empty strings. Any attempt to index characters of an empty string will lead to exceptions when range checking is enabled.
To handle that complication, you can add if tests for the case where one of the strings involved in the Move call is empty. I prefer a different approach. I'd rather cast the string variable to be a pointer. That bypasses range checking but also allows the if statement to be omitted.
Move(Pointer(Source[i])^, DestPtr^, SourceLen*SizeOf(Char));
One might wonder what happens if Source[i] is empty. In that case Pointer(Source[i]) is nil and you might expect an access violation. In fact, there is no error because the length of the move as specified by the third argument is zero, and the nil pointer is never actually de-referenced.
The other line of note is here:
DestPtr := PChar(Pointer(Dest)) + OriginalDestLen;
We use PChar(Pointer(Dest)) rather than PChar(Dest). The latter invokes code to check whether or not Dest is empty, and if so yields a pointer to a single null-terminator. We want to avoid executing that code, and obtain the address held in Dest directly, even if it is nil.
In the second loop you forget that S already has the right size to get filled with all the elements so you have to use another variable to know the destination parameter of Move
procedure Concat(var S: String; const A: Array of String);
var
I, Len, Sum: Integer;
begin
Len := 0;
for I := 0 to High(A) do
Inc(Len, Length(A[I]));
Sum := Length(S);
SetLength(S, Sum + Len);
for I := 0 to High(A) do
begin
if Length(A[I]) > 0 then
Move(A[I][1], S[Sum+1], Length(A[I]) * SizeOf(Char));
Inc(Sum, Length(A[I]));
end;
end;
Casting the source parameter to PWideChar is totally superfluous since the Move function use a kind of old generic syntax that allows to pass everything you want (const Parameter without type).
I am trying to remotely read a binary (REG_BINARY) registry value, but I get nothing but junk back. Any ideas what is wrong with this code? I'm using Delphi 2010:
function GetBinaryRegistryData(ARootKey: HKEY; AKey, AValue, sMachine: string; var sResult: string): boolean;
var
MyReg: TRegistry;
RegDataType: TRegDataType;
DataSize, Len: integer;
sBinData: string;
bResult: Boolean;
begin
bResult := False;
MyReg := TRegistry.Create(KEY_QUERY_VALUE);
try
MyReg.RootKey := ARootKey;
if MyReg.RegistryConnect('\\' + sMachine) then
begin
if MyReg.KeyExists(AKey) then
begin
if MyReg.OpenKeyReadOnly(AKey) then
begin
try
RegDataType := MyReg.GetDataType(AValue);
if RegDataType = rdBinary then
begin
DataSize := MyReg.GetDataSize(AValue);
if DataSize > 0 then
begin
SetLength(sBinData, DataSize);
Len := MyReg.ReadBinaryData(AValue, PChar(sBinData)^, DataSize);
if Len <> DataSize then
raise Exception.Create(SysErrorMessage(ERROR_CANTREAD))
else
begin
sResult := sBinData;
bResult := True;
end;
end;
end;
except
MyReg.CloseKey;
end;
MyReg.CloseKey;
end;
end;
end;
finally
MyReg.Free;
end;
Result := bResult;
end;
And I call it like this:
GetBinaryRegistryData(
HKEY_LOCAL_MACHINE,
'\SOFTWARE\Microsoft\Windows NT\CurrentVersion',
'DigitalProductId', '192.168.100.105',
sProductId
);
WriteLn(sProductId);
The result I receive from the WriteLn on the console is:
ñ ♥ ???????????6Z ????1 ???????☺ ???♦ ??3 ? ??? ?
??
Assuming that you are already connected remotely, try using the GetDataAsString function
to read binary data from the registry.
sResult := MyReg.GetDataAsString(AValue);
You're using Delphi 2010, so all your characters are two bytes wide. When you set the length of your result string, you're allocating twice the amount of space you need. Then you call ReadBinaryData, and it fills half your buffer. There are two bytes of data in each character. Look at each byte separately, and you'll probably find that your data looks less garbage-like.
Don't use strings for storing arbitrary data. Use strings for storing text. To store arbitrary blobs of data, use TBytes, which is an array of bytes.
I'm using Delphi7 (non-unicode VCL), I need to store lots of WideStrings inside a TFileStream. I can't use TStringStream as the (wide)strings are mixed with binary data, the format is projected to speed up loading and writing the data ... However I believe that current way I'm loading/writing the strings might be a bottleneck of my code ...
currently I'm writing length of a string, then writing it char by char ...
while loading, first I'm loading the length, then loading char by char ...
So, what is the fastest way to save and load WideString to TFileStream?
Thanks in advance
Rather than read and write one character at a time, read and write them all at once:
procedure WriteWideString(const ws: WideString; stream: TStream);
var
nChars: LongInt;
begin
nChars := Length(ws);
stream.WriteBuffer(nChars, SizeOf(nChars);
if nChars > 0 then
stream.WriteBuffer(ws[1], nChars * SizeOf(ws[1]));
end;
function ReadWideString(stream: TStream): WideString;
var
nChars: LongInt;
begin
stream.ReadBuffer(nChars, SizeOf(nChars));
SetLength(Result, nChars);
if nChars > 0 then
stream.ReadBuffer(Result[1], nChars * SizeOf(Result[1]));
end;
Now, technically, since WideString is a Windows BSTR, it can contain an odd number of bytes. The Length function reads the number of bytes and divides by two, so it's possible (although not likely) that the code above will cut off the last byte. You could use this code instead:
procedure WriteWideString(const ws: WideString; stream: TStream);
var
nBytes: LongInt;
begin
nBytes := SysStringByteLen(Pointer(ws));
stream.WriteBuffer(nBytes, SizeOf(nBytes));
if nBytes > 0 then
stream.WriteBuffer(Pointer(ws)^, nBytes);
end;
function ReadWideString(stream: TStream): WideString;
var
nBytes: LongInt;
buffer: PAnsiChar;
begin
stream.ReadBuffer(nBytes, SizeOf(nBytes));
if nBytes > 0 then begin
GetMem(buffer, nBytes);
try
stream.ReadBuffer(buffer^, nBytes);
Result := SysAllocStringByteLen(buffer, nBytes)
finally
FreeMem(buffer);
end;
end else
Result := '';
end;
Inspired by Mghie's answer, have replaced my Read and Write calls with ReadBuffer and WriteBuffer. The latter will raise exceptions if they are unable to read or write the requested number of bytes.
There is nothing special about wide strings, to read and write them as fast as possible you need to read and write as much as possible in one go:
procedure TForm1.Button1Click(Sender: TObject);
var
Str: TStream;
W, W2: WideString;
L: integer;
begin
W := 'foo bar baz';
Str := TFileStream.Create('test.bin', fmCreate);
try
// write WideString
L := Length(W);
Str.WriteBuffer(L, SizeOf(integer));
if L > 0 then
Str.WriteBuffer(W[1], L * SizeOf(WideChar));
Str.Seek(0, soFromBeginning);
// read back WideString
Str.ReadBuffer(L, SizeOf(integer));
if L > 0 then begin
SetLength(W2, L);
Str.ReadBuffer(W2[1], L * SizeOf(WideChar));
end else
W2 := '';
Assert(W = W2);
finally
Str.Free;
end;
end;
WideStrings contain a 'string' of WideChar's, which use 2 bytes each. If you want to store the UTF-16 (which WideStrings use internally) strings in a file, and be able to use this file in other programs like notepad, you need to write a byte order mark first: #$FEFF.
If you know this, writing can look like this:
Stream1.Write(WideString1[1],Length(WideString)*2); //2=SizeOf(WideChar)
reading can look like this:
Stream1.Read(WideChar1,2);//assert returned 2 and WideChar1=#$FEFF
SetLength(WideString1,(Stream1.Size div 2)-1);
Stream1.Read(WideString1[1],(Stream1.Size div 2)-1);
You can also use TFastFileStream for reading the data or strings, I pasted the unit at http://pastebin.com/m6ecdc8c2 and a sample below:
program Project36;
{$APPTYPE CONSOLE}
uses
SysUtils, Classes,
FastStream in 'FastStream.pas';
const
WideNull: WideChar = #0;
procedure WriteWideStringToStream(Stream: TFileStream; var Data: WideString);
var
len: Word;
begin
len := Length(Data);
// Write WideString length
Stream.Write(len, SizeOf(len));
if (len > 0) then
begin
// Write WideString
Stream.Write(Data[1], len * SizeOf(WideChar));
end;
// Write null termination
Stream.Write(WideNull, SizeOf(WideNull));
end;
procedure CreateTestFile;
var
Stream: TFileStream;
MyString: WideString;
begin
Stream := TFileStream.Create('test.bin', fmCreate);
try
MyString := 'Hello World!';
WriteWideStringToStream(Stream, MyString);
MyString := 'Speed is Delphi!';
WriteWideStringToStream(Stream, MyString);
finally
Stream.Free;
end;
end;
function ReadWideStringFromStream(Stream: TFastFileStream): WideString;
var
len: Word;
begin
// Read length of WideString
Stream.Read(len, SizeOf(len));
// Read WideString
Result := PWideChar(Cardinal(Stream.Memory) + Stream.Position);
// Update position and skip null termination
Stream.Position := Stream.Position + (len * SizeOf(WideChar)) + SizeOf(WideNull);
end;
procedure ReadTestFile;
var
Stream: TFastFileStream;
my_wide_string: WideString;
begin
Stream := TFastFileStream.Create('test.bin');
try
Stream.Position := 0;
// Read WideString
my_wide_string := ReadWideStringFromStream(Stream);
WriteLn(my_wide_string);
// Read another WideString
my_wide_string := ReadWideStringFromStream(Stream);
WriteLn(my_wide_string);
finally
Stream.Free;
end;
end;
begin
CreateTestFile;
ReadTestFile;
ReadLn;
end.
The code needs to be compatible with D2007 and D2009.
My Answer: Thanks to everyone who answered, I've gone with:
function ComputerName : String;
var
buffer: array[0..255] of char;
size: dword;
begin
size := 256;
if GetComputerName(buffer, size) then
Result := buffer
else
Result := ''
end;
The Windows API GetComputerName should work. It is defined in windows.pas.
Another approach, which works well is to get the computer name via the environment variable. The advantage of this approach (or disadvantage depending on your software) is that you can trick the program into running as a different machine easily.
Result := GetEnvironmentVariable('COMPUTERNAME');
The computer name environment variable is set by the system. To "override" the behavior, you can create a batch file that calls your program, setting the environment variable prior to the call (each command interpreter gets its own "copy" of the environment, and changes are local to that session or any children launched from that session).
GetComputerName from the Windows API is the way to go. Here's a wrapper for it.
function GetLocalComputerName : string;
var c1 : dword;
arrCh : array [0..MAX_PATH] of char;
begin
c1 := MAX_PATH;
GetComputerName(arrCh, c1);
if c1 > 0 then
result := arrCh
else
result := '';
end;
What about this :
function GetComputerName: string;
var
buffer: array[0..MAX_COMPUTERNAME_LENGTH + 1] of Char;
Size: Cardinal;
begin
Size := MAX_COMPUTERNAME_LENGTH + 1;
Windows.GetComputerName(#buffer, Size);
Result := StrPas(buffer);<br/>
end;
From http://exampledelphi.com/delphi.php/tips-and-tricks/delphi-how-to-get-computer-name/
If you want more than just the host name, you need GetComputerNameEx. Since there are many wrong implementations around (MAX_COMPUTERNAME_LENGTH is not enough, and 1024 is bad), here is mine:
uses Winapi.Windows;
function GetLocalComputerName(
NameType: TComputerNameFormat = ComputerNameDnsHostname): string;
var
len: DWORD;
begin
len:= 0;
GetComputerNameEx(NameType, nil, len); //get length
SetLength(Result, len - 1);
if not GetComputerNameEx(NameType, PChar(Result), len) then RaiseLastOSError;
end;
Valid values for the NameType parameter are:
ComputerNameDnsHostname, ComputerNameDnsDomain, ComputerNameDnsFullyQualified
ComputerNamePhysicalDnsHostname, ComputerNamePhysicalDnsDomain, ComputerNamePhysicalDnsFullyQualified
ComputerNameNetBIOS, ComputerNamePhysicalNetBIOS
I use this,
function GetLocalPCName: String;
var
Buffer: array [0..63] of AnsiChar;
i: Integer;
GInitData: TWSADATA;
begin
Result := '';
WSAStartup($101, GInitData);
GetHostName(Buffer, SizeOf(Buffer));
Result:=Buffer;
WSACleanup;
end;
Bye
This code works great, except when computer is on simple Workgroup and try to using GetLocalComputerName(ComputerNameDnsFullyQualified) returns computer name with a #0 (null) char at end, resulting in a bad processing of other charanters sent to a Memo component as a log.
Just fix this issue checking for null at end.
function GetLocalComputerName(
NameType: TComputerNameFormat = ComputerNameDnsHostname): WideString;
var
len: DWORD;
begin
len:= 0;
GetComputerNameEx(NameType, nil, len); //get length
SetLength(Result, len - 1);
if not GetComputerNameEx(NameType, PWideChar(Result), len)
then RaiseLastOSError;
// fix null at end
len := Length(Result);
if (len > 2) and (Result[len] = #0) then
Result := Copy(Result, 1, len-1);
end;