Related
I need to read the last line in some very large textfiles (to get the timestamp from the data). TStringlist would be a simple approach but it returns an out of memory error. I'm trying to use seek and blockread, but the characters in the buffer are all nonsense. Is this something to do with unicode?
Function TForm1.ReadLastLine2(FileName: String): String;
var
FileHandle: File;
s,line: string;
ok: 0..1;
Buf: array[1..8] of Char;
k: longword;
i,ReadCount: integer;
begin
AssignFile (FileHandle,FileName);
Reset (FileHandle); // or for binary files: Reset (FileHandle,1);
ok := 0;
k := FileSize (FileHandle);
Seek (FileHandle, k-1);
s := '';
while ok<>1 do begin
BlockRead (FileHandle, buf, SizeOf(Buf)-1, ReadCount); //BlockRead ( var FileHandle : File; var Buffer; RecordCount : Integer {; var RecordsRead : Integer} ) ;
if ord (buf[1]) <>13 then //Arg to integer
s := s + buf[1]
else
ok := ok + 1;
k := k-1;
seek (FileHandle,k);
end;
CloseFile (FileHandle);
// Reverse the order in the line read
setlength (line,length(s));
for i:=1 to length(s) do
line[length(s) - i+1 ] := s[i];
Result := Line;
end;
Based on www.delphipages.com/forum/showthread.php?t=102965
The testfile is a simple CSV I created in excel ( this is not the 100MB I ultimately need to read).
a,b,c,d,e,f,g,h,i,j,blank
A,B,C,D,E,F,G,H,I,J,blank
1,2,3,4,5,6,7,8,9,0,blank
Mary,had,a,little,lamb,His,fleece,was,white,as,snow
And,everywhere,that,Mary,went,The,lamb,was,sure,to,go
You really have to read the file in LARGE chunks from the tail to the head.
Since it is so large it does not fit the memory - then reading it line by line from start to end would be very slow. With ReadLn - twice slow.
You also has to be ready that the last line might end with EOL or may not.
Personally I would also account for three possible EOL sequences:
CR/LF aka #13#10=^M^J - DOS/Windows style
CR without LF - just #13=^M - Classic MacOS file
LF without CR - just #10=^J - UNIX style, including MacOS version 10
If you are sure your CSV files would only ever be generated by native Windows programs it would be safe to assume full CR/LF be used. But if there can be other Java programs, non-Windows platforms, mobile programs - I would be less sure. Of course pure CR without LF would be the least probable case of them all.
uses System.IOUtils, System.Math, System.Classes;
type FileChar = AnsiChar; FileString = AnsiString; // for non-Unicode files
// type FileChar = WideChar; FileString = UnicodeString;// for UTF16 and UCS-2 files
const FileCharSize = SizeOf(FileChar);
// somewhere later in the code add: Assert(FileCharSize = SizeOf(FileString[1]);
function ReadLastLine(const FileName: String): FileString; overload; forward;
const PageSize = 4*1024;
// the minimal read atom of most modern HDD and the memory allocation atom of Win32
// since the chances your file would have lines longer than 4Kb are very small - I would not increase it to several atoms.
function ReadLastLine(const Lines: TStringDynArray): FileString; overload;
var i: integer;
begin
Result := '';
i := High(Lines);
if i < Low(Lines) then exit; // empty array - empty file
Result := Lines[i];
if Result > '' then exit; // we got the line
Dec(i); // skip the empty ghost line, in case last line was CRLF-terminated
if i < Low(Lines) then exit; // that ghost was the only line in the empty file
Result := Lines[i];
end;
// scan for EOLs in not-yet-scanned part
function FindLastLine(buffer: TArray<FileChar>; const OldRead : Integer;
const LastChunk: Boolean; out Line: FileString): boolean;
var i, tailCRLF: integer; c: FileChar;
begin
Result := False;
if Length(Buffer) = 0 then exit;
i := High(Buffer);
tailCRLF := 0; // test for trailing CR/LF
if Buffer[i] = ^J then begin // LF - single, or after CR
Dec(i);
Inc(tailCRLF);
end;
if (i >= Low(Buffer)) and (Buffer[i] = ^M) then begin // CR, alone or before LF
Inc(tailCRLF);
end;
i := High(Buffer) - Max(OldRead, tailCRLF);
if i - Low(Buffer) < 0 then exit; // no new data to read - results would be like before
if OldRead > 0 then Inc(i); // the CR/LF pair could be sliced between new and previous buffer - so need to start a bit earlier
for i := i downto Low(Buffer) do begin
c := Buffer[i];
if (c=^J) or (c=^M) then begin // found EOL
SetString( Line, #Buffer[i+1], High(Buffer) - tailCRLF - i);
exit(True);
end;
end;
// we did not find non-terminating EOL in the buffer (except maybe trailing),
// now we should ask for more file content, if there is still left any
// or take the entire file (without trailing EOL if any)
if LastChunk then begin
SetString( Line, #Buffer[ Low(Buffer) ], Length(Buffer) - tailCRLF);
Result := true;
end;
end;
function ReadLastLine(const FileName: String): FileString; overload;
var Buffer, tmp: TArray<FileChar>;
// dynamic arrays - eases memory management and protect from stack corruption
FS: TFileStream; FSize, NewPos: Int64;
OldRead, NewLen : Integer; EndOfFile: boolean;
begin
Result := '';
FS := TFile.OpenRead(FileName);
try
FSize := FS.Size;
if FSize <= PageSize then begin // small file, we can be lazy!
FreeAndNil(FS); // free the handle and avoid double-free in finally
Result := ReadLastLine( TFile.ReadAllLines( FileName, TEncoding.ANSI ));
// or TEncoding.UTF16
// warning - TFIle is not share-aware, if the file is being written to by another app
exit;
end;
SetLength( Buffer, PageSize div FileCharSize);
OldRead := 0;
repeat
NewPos := FSize - Length(Buffer)*FileCharSize;
EndOfFile := NewPos <= 0;
if NewPos < 0 then NewPos := 0;
FS.Position := NewPos;
FS.ReadBuffer( Buffer[Low(Buffer)], (Length(Buffer) - OldRead)*FileCharSize);
if FindLastLine(Buffer, OldRead, EndOfFile, Result) then
exit; // done !
tmp := Buffer; Buffer := nil; // flip-flop: preparing to broaden our mouth
OldRead := Length(tmp); // need not to re-scan the tail again and again when expanding our scanning range
NewLen := Min( 2*Length(tmp), FSize div FileCharSize );
SetLength(Buffer, NewLen); // this may trigger EOutOfMemory...
Move( tmp[Low(tmp)], Buffer[High(Buffer)-OldRead+1], OldRead*FileCharSize);
tmp := nil; // free old buffer
until EndOfFile;
finally
FS.Free;
end;
end;
PS. Note one extra special case - if you would use Unicode chars (two-bytes ones) and would give odd-length file (3 bytes, 5 bytes, etc) - you would never be ble to scan the starting single byte (half-widechar). Maybe you should add the extra guard there, like Assert( 0 = FS.Size mod FileCharSize)
PPS. As a rule of thumb you better keep those functions out of the form class, - because WHY mixing them? In general you should separate concerns into small blocks. Reading file has nothing with user interaction - so should better be offloaded to an extra UNIT. Then you would be able to use functions from that unit in one form or 10 forms, in main thread or in multi-threaded application. Like LEGO parts - they give you flexibility by being small and separate.
PPPS. Another approach here would be using memory-mapped files. Google for MMF implementations for Delphi and articles about benefits and problems with MMF approach. Personally I think rewriting the code above to use MMF would greatly simplify it, removing several "special cases" and the troublesome and memory copying flip-flop. OTOH it would demand you to be very strict with pointers arithmetic.
https://en.wikipedia.org/wiki/Memory-mapped_file
https://msdn.microsoft.com/en-us/library/ms810613.aspx
http://torry.net/quicksearchd.php?String=memory+map&Title=No
Your char type is two byte, so that buffer is 16 byte. Then with blockread you read sizeof(buffer)-1 byte into it, and check the first 2 byte char if it is equal to #13.
The sizeof(buffer)-1 is dodgy (where does that -1 come from?), and the rest is valid, but only if your input file is utf16.
Also your read 8 (or 16) characters each time, but compare only one and then do a seek again. That is not very logical either.
If your encoding is not utf16, I suggest you change the type of a buffer element to ansichar and remove the -1
In response to kopiks suggestion, I figured out how to do it with TFilestream, it works ok with the simple test file, though there may be some further tweeks when I use it on a variety of csv files. Also, I don't make any claims that this is the most efficient method.
procedure TForm1.Button6Click(Sender: TObject);
Var
StreamSize, ApproxNumRows : Integer;
TempStr : String;
begin
if OpenDialog1.Execute then begin
TempStr := ReadLastLineOfTextFile(OpenDialog1.FileName,StreamSize, ApproxNumRows);
// TempStr := ReadFileStream('c:\temp\CSVTestFile.csv');
ShowMessage ('approximately '+ IntToStr(ApproxNumRows)+' Rows');
ListBox1.Items.Add(TempStr);
end;
end;
Function TForm1.ReadLastLineOfTextFile(const FileName: String; var StreamSize, ApproxNumRows : Integer): String;
const
MAXLINELENGTH = 256;
var
Stream: TFileStream;
BlockSize,CharCount : integer;
Hash13Found : Boolean;
Buffer : array [0..MAXLINELENGTH] of AnsiChar;
begin
Hash13Found := False;
Result :='';
Stream := TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
StreamSize := Stream.size;
if StreamSize < MAXLINELENGTH then
BlockSize := StreamSize
Else
BlockSize := MAXLINELENGTH;
// for CharCount := 0 to Length(Buffer)-1 do begin
// Buffer[CharCount] := #0; // zeroing the buffer can aid diagnostics
// end;
CharCount := 0;
Repeat
Stream.Seek(-(CharCount+3), 2); //+3 misses out the #0,#10,#13 at the end of the file
Stream.Read( Buffer[CharCount], 1);
Result := String(Buffer[CharCount]) + result;
if Buffer[CharCount] =#13 then
Hash13Found := True;
Inc(CharCount);
Until Hash13Found OR (CharCount = BlockSize);
ShowMessage(Result);
ApproxNumRows := Round(StreamSize / CharCount);
end;
Just thought of a new solution.
Again, there could be better ones, but this one is the best i thought of.
function GetLastLine(textFilePath: string): string;
var
list: tstringlist;
begin
list := tstringlist.Create;
try
list.LoadFromFile(textFilePath);
result := list[list.Count-1];
finally
list.free;
end;
end;
I want to read byte by byte with a TStream (streamIn) or any stream from cache file.
I don' t know Streams well, actually i want to read cache file but byte by byte.
When this "read" function called from another class it must be read next byte in cache file. But TStream has no "readbyte" method.
function TStringInputStream.read : Integer;
begin
if not back then
lastByte:= streamIn.ReadByte -> ???
back:= false;
Result:= lastByte;
end;
function TStringInputStream.readLine: String;
var
c : Integer;
ns : Boolean;
endd : Boolean;
res : String;
begin
ns:= True;
endd:= False;
while not endd do
begin
c:= read;
if (c = -1) and (ns) then
begin
Result:= '';
Exit;
end;
ns:= false;
if (c = LF) or (c = -1) then
endd:= true
else if c = CR then
begin
if read <> LF then
rewind;
endd:= true;
end
else
res:= IntToStr(c);
end;
Result:= res;
end;
Use ReadBuffer to read a single byte.
var
B: Byte;
....
Stream.ReadBuffer(B, SizeOf(B));
Or if you use XE3 or later you can use ReadData.
Stream.ReadData(B);
I have this function that should crypt bytes from resource file but it's just crashing my app:
function crypt(src: Pointer; len: DWORD): DWORD;
var
B: TByteArray absolute src;
index: DWORD;
begin
for index := 0 to len - 1 do
begin
B[index] := B[index] xor 5; //just to test if its working
end;
result := 1;
end;
i am using it like this:
hFind := FindResource(...);
size := SizeOfResource(HInstance, hFind);
hRes :=LoadResource(HInstance, hFind);
bytes :=LockResource(hRes);
crypt(bytes, size);
if i dont call the crypt function program works. What am i doing wrong?
You've got two problems with that code. First is with the byte array, its elements do not contain your resource data but random data starting with the address of your pointer 'src'. Use a pointer to a TByteArray like this:
var
B: PByteArray absolute src;
index: DWORD;
begin
for index := 0 to len - 1 do
begin
B^[index] := B^[index] xor 5; //just to test if its working
end;
..
Second is, you'll still get an AV for trying to modify a read-only memory segment. Depending on what you are trying to do, you can use VirtualProtect on 'bytes' before calling 'crypt', or copy the memory to a byte array and modify it there, or use BeginUpdateResource-UpdateResource-EndUpdateResource if you're trying to modify the resource.
Code like this is easiest to write with pointers like this:
function crypt(src: Pointer; len: DWORD): DWORD;
var
B: ^Byte;
index: DWORD;
begin
B := src;
for index := 0 to len - 1 do
begin
B^ := B^ xor 5; //just to test if its working
inc(B);
end;
result := 1;
end;
Naturally you do need to respect the issue of read-only memory that Sertac highlighted. I'm just adding this code to illustrate what I believe to be the canonical way to walk a buffer that arrives as a void pointer.
I'm using Delphi7 (non-unicode VCL), I need to store lots of WideStrings inside a TFileStream. I can't use TStringStream as the (wide)strings are mixed with binary data, the format is projected to speed up loading and writing the data ... However I believe that current way I'm loading/writing the strings might be a bottleneck of my code ...
currently I'm writing length of a string, then writing it char by char ...
while loading, first I'm loading the length, then loading char by char ...
So, what is the fastest way to save and load WideString to TFileStream?
Thanks in advance
Rather than read and write one character at a time, read and write them all at once:
procedure WriteWideString(const ws: WideString; stream: TStream);
var
nChars: LongInt;
begin
nChars := Length(ws);
stream.WriteBuffer(nChars, SizeOf(nChars);
if nChars > 0 then
stream.WriteBuffer(ws[1], nChars * SizeOf(ws[1]));
end;
function ReadWideString(stream: TStream): WideString;
var
nChars: LongInt;
begin
stream.ReadBuffer(nChars, SizeOf(nChars));
SetLength(Result, nChars);
if nChars > 0 then
stream.ReadBuffer(Result[1], nChars * SizeOf(Result[1]));
end;
Now, technically, since WideString is a Windows BSTR, it can contain an odd number of bytes. The Length function reads the number of bytes and divides by two, so it's possible (although not likely) that the code above will cut off the last byte. You could use this code instead:
procedure WriteWideString(const ws: WideString; stream: TStream);
var
nBytes: LongInt;
begin
nBytes := SysStringByteLen(Pointer(ws));
stream.WriteBuffer(nBytes, SizeOf(nBytes));
if nBytes > 0 then
stream.WriteBuffer(Pointer(ws)^, nBytes);
end;
function ReadWideString(stream: TStream): WideString;
var
nBytes: LongInt;
buffer: PAnsiChar;
begin
stream.ReadBuffer(nBytes, SizeOf(nBytes));
if nBytes > 0 then begin
GetMem(buffer, nBytes);
try
stream.ReadBuffer(buffer^, nBytes);
Result := SysAllocStringByteLen(buffer, nBytes)
finally
FreeMem(buffer);
end;
end else
Result := '';
end;
Inspired by Mghie's answer, have replaced my Read and Write calls with ReadBuffer and WriteBuffer. The latter will raise exceptions if they are unable to read or write the requested number of bytes.
There is nothing special about wide strings, to read and write them as fast as possible you need to read and write as much as possible in one go:
procedure TForm1.Button1Click(Sender: TObject);
var
Str: TStream;
W, W2: WideString;
L: integer;
begin
W := 'foo bar baz';
Str := TFileStream.Create('test.bin', fmCreate);
try
// write WideString
L := Length(W);
Str.WriteBuffer(L, SizeOf(integer));
if L > 0 then
Str.WriteBuffer(W[1], L * SizeOf(WideChar));
Str.Seek(0, soFromBeginning);
// read back WideString
Str.ReadBuffer(L, SizeOf(integer));
if L > 0 then begin
SetLength(W2, L);
Str.ReadBuffer(W2[1], L * SizeOf(WideChar));
end else
W2 := '';
Assert(W = W2);
finally
Str.Free;
end;
end;
WideStrings contain a 'string' of WideChar's, which use 2 bytes each. If you want to store the UTF-16 (which WideStrings use internally) strings in a file, and be able to use this file in other programs like notepad, you need to write a byte order mark first: #$FEFF.
If you know this, writing can look like this:
Stream1.Write(WideString1[1],Length(WideString)*2); //2=SizeOf(WideChar)
reading can look like this:
Stream1.Read(WideChar1,2);//assert returned 2 and WideChar1=#$FEFF
SetLength(WideString1,(Stream1.Size div 2)-1);
Stream1.Read(WideString1[1],(Stream1.Size div 2)-1);
You can also use TFastFileStream for reading the data or strings, I pasted the unit at http://pastebin.com/m6ecdc8c2 and a sample below:
program Project36;
{$APPTYPE CONSOLE}
uses
SysUtils, Classes,
FastStream in 'FastStream.pas';
const
WideNull: WideChar = #0;
procedure WriteWideStringToStream(Stream: TFileStream; var Data: WideString);
var
len: Word;
begin
len := Length(Data);
// Write WideString length
Stream.Write(len, SizeOf(len));
if (len > 0) then
begin
// Write WideString
Stream.Write(Data[1], len * SizeOf(WideChar));
end;
// Write null termination
Stream.Write(WideNull, SizeOf(WideNull));
end;
procedure CreateTestFile;
var
Stream: TFileStream;
MyString: WideString;
begin
Stream := TFileStream.Create('test.bin', fmCreate);
try
MyString := 'Hello World!';
WriteWideStringToStream(Stream, MyString);
MyString := 'Speed is Delphi!';
WriteWideStringToStream(Stream, MyString);
finally
Stream.Free;
end;
end;
function ReadWideStringFromStream(Stream: TFastFileStream): WideString;
var
len: Word;
begin
// Read length of WideString
Stream.Read(len, SizeOf(len));
// Read WideString
Result := PWideChar(Cardinal(Stream.Memory) + Stream.Position);
// Update position and skip null termination
Stream.Position := Stream.Position + (len * SizeOf(WideChar)) + SizeOf(WideNull);
end;
procedure ReadTestFile;
var
Stream: TFastFileStream;
my_wide_string: WideString;
begin
Stream := TFastFileStream.Create('test.bin');
try
Stream.Position := 0;
// Read WideString
my_wide_string := ReadWideStringFromStream(Stream);
WriteLn(my_wide_string);
// Read another WideString
my_wide_string := ReadWideStringFromStream(Stream);
WriteLn(my_wide_string);
finally
Stream.Free;
end;
end;
begin
CreateTestFile;
ReadTestFile;
ReadLn;
end.
We often replace non-desirable characters in a file with another "good" character.
The interface is:
procedure cleanfileASCII2(vfilename: string; vgood: integer; voutfilename: string);
To replace all non-desirables with a space we might call,
cleanfileASCII2(original.txt, 32 , cleaned.txt)
The problem is that this takes a rather long time. Is there
a better way to do it than shown?
procedure cleanfileASCII2(vfilename: string; vgood: integer; voutfilename:
string);
var
F1, F2: file of char;
Ch: Char;
tempfilename: string;
i,n,dex: integer;
begin
//original
AssignFile(F1, vfilename);
Reset(F1);
//outputfile
AssignFile(F2,voutfilename);
Rewrite(F2);
while not Eof(F1) do
begin
Read(F1, Ch);
//
n:=ord(ch);
if ((n<32)or(n>127))and (not(n in [10,13])) then
begin // bad char
if vgood<> -1 then
begin
ch:=chr(vgood);
Write(F2, Ch);
end
end
else //good char
Write(F2, Ch);
end;
CloseFile(F2);
CloseFile(F1);
end;
The problem has to do with how you're treating the buffer. Memory transfers are the most expensive part of any operation. In this case, you're looking at the file byte by byte. By changing to a blockread or buffered read, you will realize an enormous increase in speed. Note that the correct buffer size varies based on where you are reading from. For a networked file, you will find extremely large buffers may be less efficient due to the packet size TCP/IP imposes. Even this has become a bit murky with large packets from gigE but, as always, the best result is to benchmark it.
I converted from standard reads to a file stream just for convenience. You could easily do the same thing with a blockread. In this case, I took a 15MB file and ran it through your routine. It took 131,478ms to perform the operation on a local file. With the 1024 buffer, it took 258ms.
procedure cleanfileASCII3(vfilename: string; vgood: integer; voutfilename:string);
const bufsize=1023;
var
inFS, outFS:TFileStream;
buffer: array[0..bufsize] of byte;
readSize:integer;
tempfilename: string;
i: integer;
begin
if not FileExists(vFileName) then exit;
inFS:=TFileStream.Create(vFileName,fmOpenRead);
inFS.Position:=0;
outFS:=TFileStream.Create(vOutFileName,fmCreate);
while not (inFS.Position>=inFS.Size) do
begin
readSize:=inFS.Read(buffer,sizeof(buffer));
for I := 0 to readSize-1 do
begin
n:=buffer[i];
if ((n<32)or(n>127)) and (not(n in [10,13])) and (vgood<>-1) then
buffer[i]:=vgood;
end;
outFS.Write(buffer,readSize);
end;
inFS.Free;
outFS.Free;
end;
Several improvements:
Buffer the data, read 2k or 16k or similar sized blocks
Use a lookup table
here's a stab, that is untested (no compiler in front of me right now):
procedure cleanfileASCII2(vfilename: string; vgood: integer; voutfilename: string);
var
f1, f2: File;
table: array[Char] of Char;
index, inBuffer: Integer;
buffer: array[0..2047] of Char;
c: Char;
begin
for c := #0 to #31 do
table[c] := ' ';
for c := #32 to #127 do
table[c] := c;
for c := #128 to #255 do
table[c] := ' ';
table[#10] := #10; // exception to spaces <32
table[#13] := #13; // exception to spaces <32
AssignFile(F1, vfilename);
Reset(F1, 1);
AssignFile(F2,voutfilename);
Rewrite(F2, 1);
while not Eof(F1) do
begin
BlockRead(f1, buffer, SizeOf(buffer), inBuffer);
for index := 0 to inBuffer - 1 do
buffer[index] := table[buffer[index]];
BlockWrite(f2, buffer, inBuffer);
end;
Close(f2);
Close(f1);
end;
You could buffer your input and output so you read a chunk of characters (even the whole file, if it's not too big) into an array, then process the array, then write the entire array to the output file.
In most of these cases, the disk IO is the bottleneck, and if you can do fewer large reads instead of many small reads, it will be faster.
Buffering is the correct way to do that. I modified your code to see the difference:
procedure cleanfileASCII2(vfilename: string; vgood: integer; voutfilename:
string);
var
F1, F2: file;
NumRead, NumWritten: Integer;
Buf: array[1..2048] of Char;
Ch: Char;
i, n: integer;
begin
AssignFile(F1, vfilename);
Reset(F1, 1); // Record size = 1
AssignFile(F2, voutfilename);
Rewrite(F2, 1); // Record size = 1
repeat
BlockRead(F1, Buf, SizeOf(Buf), NumRead);
for i := 1 to NumRead do
begin
Ch := Buf[i];
//
n := ord(ch);
if ((n<32)or(n>127))and (not(n in [10,13])) then
begin // bad char
if vgood <> -1 then
begin
ch := chr(vgood);
Buf[i] := Ch;
end
//else //good char
//Write(F2, Ch);
end;
end;
BlockWrite(F2, Buf, NumRead, NumWritten);
until (NumRead = 0) or (NumWritten <> NumRead);
CloseFile(F1);
CloseFile(F2);
end;
I did it this way, ensuring that the file I/O is done all in one go before the processing. The code could do with updating for unicode but it copes with nasty text characters such as nulls and gives you a TStrings capability.
Bri
procedure TextStringToStringsAA( AStrings : TStrings; const AStr: Ansistring);
// A better routine than the stream 'SetTextStr'.
// Nulls (#0) which might be in the file e.g. from corruption in log files
// do not terminate the reading process.
var
P, Start, VeryEnd: PansiChar;
S: ansistring;
begin
AStrings.BeginUpdate;
try
AStrings.Clear;
P := Pansichar( AStr );
VeryEnd := P + Length( AStr );
if P <> nil then
while P < VeryEnd do
begin
Start := P;
while (P < VeryEnd) and not CharInSet(P^, [#10, #13]) do
Inc(P);
SetString(S, Start, P - Start);
AStrings.Add(string(S));
if P^ = #13 then Inc(P);
if P^ = #10 then Inc(P);
end;
finally
AStrings.EndUpdate;
end;
end;
procedure TextStreamToStrings( AStream : TStream; AStrings : TStrings );
// An alternative to AStream.LoadFromStream
// Nulls (#0) which might be in the file e.g. from corruption in log files
// do not terminate the reading process.
var
Size : Integer;
S : Ansistring;
begin
AStrings.BeginUpdate;
try
// Make a big string with all of the text
Size := AStream.Size - AStream.Position;
SetString( S, nil, Size );
AStream.Read(Pointer(S)^, Size);
// Parse it
TextStringToStringsAA( AStrings, S );
finally
AStrings.EndUpdate;
end;
end;
procedure LoadStringsFromFile( AStrings : TStrings; const AFileName : string );
// Loads this strings from a text file
// Nulls (#0) which might be in the file e.g. from corruption in log files
// do not terminate the reading process.
var
ST : TFileStream;
begin
ST := TFileStream.Create( AFileName, fmOpenRead + fmShareDenyNone);
// No attempt is made to prevent other applications from reading from or writing to the file.
try
ST.Position := 0;
AStrings.BeginUpdate;
try
TextStreamToStrings( ST, AStrings );
finally
AStrings.EndUpdate;
end;
finally
ST.Free;
end;
end;
Don't try to optimize without know where.
You shoud use the Sampling Profiler (delphitools.info) to know where is the bottleneck. It's easy to use.
Precompute the vgood chr conversion, before the loop.
Also, You don't need some conversions: Ord() and Chr(). Use always the 'Ch' variable.
if not (ch in [#10, #13, #32..#127]) then
Probably the easiest method would be:
make another file (temporary)
copy all content of basic file to the temp. file (line after line)
detect when it reads chars or words you want to replace and stop copying
enter your edit (to the temp. file)
continue and finish copying basic to temp file
rewrite (delete content of) basic file
copy lines from temp file to basic file
DONE!
vote this post +1 if it helped please