How to read last line in a text file using Delphi - delphi

I need to read the last line in some very large textfiles (to get the timestamp from the data). TStringlist would be a simple approach but it returns an out of memory error. I'm trying to use seek and blockread, but the characters in the buffer are all nonsense. Is this something to do with unicode?
Function TForm1.ReadLastLine2(FileName: String): String;
var
FileHandle: File;
s,line: string;
ok: 0..1;
Buf: array[1..8] of Char;
k: longword;
i,ReadCount: integer;
begin
AssignFile (FileHandle,FileName);
Reset (FileHandle); // or for binary files: Reset (FileHandle,1);
ok := 0;
k := FileSize (FileHandle);
Seek (FileHandle, k-1);
s := '';
while ok<>1 do begin
BlockRead (FileHandle, buf, SizeOf(Buf)-1, ReadCount); //BlockRead ( var FileHandle : File; var Buffer; RecordCount : Integer {; var RecordsRead : Integer} ) ;
if ord (buf[1]) <>13 then //Arg to integer
s := s + buf[1]
else
ok := ok + 1;
k := k-1;
seek (FileHandle,k);
end;
CloseFile (FileHandle);
// Reverse the order in the line read
setlength (line,length(s));
for i:=1 to length(s) do
line[length(s) - i+1 ] := s[i];
Result := Line;
end;
Based on www.delphipages.com/forum/showthread.php?t=102965
The testfile is a simple CSV I created in excel ( this is not the 100MB I ultimately need to read).
a,b,c,d,e,f,g,h,i,j,blank
A,B,C,D,E,F,G,H,I,J,blank
1,2,3,4,5,6,7,8,9,0,blank
Mary,had,a,little,lamb,His,fleece,was,white,as,snow
And,everywhere,that,Mary,went,The,lamb,was,sure,to,go

You really have to read the file in LARGE chunks from the tail to the head.
Since it is so large it does not fit the memory - then reading it line by line from start to end would be very slow. With ReadLn - twice slow.
You also has to be ready that the last line might end with EOL or may not.
Personally I would also account for three possible EOL sequences:
CR/LF aka #13#10=^M^J - DOS/Windows style
CR without LF - just #13=^M - Classic MacOS file
LF without CR - just #10=^J - UNIX style, including MacOS version 10
If you are sure your CSV files would only ever be generated by native Windows programs it would be safe to assume full CR/LF be used. But if there can be other Java programs, non-Windows platforms, mobile programs - I would be less sure. Of course pure CR without LF would be the least probable case of them all.
uses System.IOUtils, System.Math, System.Classes;
type FileChar = AnsiChar; FileString = AnsiString; // for non-Unicode files
// type FileChar = WideChar; FileString = UnicodeString;// for UTF16 and UCS-2 files
const FileCharSize = SizeOf(FileChar);
// somewhere later in the code add: Assert(FileCharSize = SizeOf(FileString[1]);
function ReadLastLine(const FileName: String): FileString; overload; forward;
const PageSize = 4*1024;
// the minimal read atom of most modern HDD and the memory allocation atom of Win32
// since the chances your file would have lines longer than 4Kb are very small - I would not increase it to several atoms.
function ReadLastLine(const Lines: TStringDynArray): FileString; overload;
var i: integer;
begin
Result := '';
i := High(Lines);
if i < Low(Lines) then exit; // empty array - empty file
Result := Lines[i];
if Result > '' then exit; // we got the line
Dec(i); // skip the empty ghost line, in case last line was CRLF-terminated
if i < Low(Lines) then exit; // that ghost was the only line in the empty file
Result := Lines[i];
end;
// scan for EOLs in not-yet-scanned part
function FindLastLine(buffer: TArray<FileChar>; const OldRead : Integer;
const LastChunk: Boolean; out Line: FileString): boolean;
var i, tailCRLF: integer; c: FileChar;
begin
Result := False;
if Length(Buffer) = 0 then exit;
i := High(Buffer);
tailCRLF := 0; // test for trailing CR/LF
if Buffer[i] = ^J then begin // LF - single, or after CR
Dec(i);
Inc(tailCRLF);
end;
if (i >= Low(Buffer)) and (Buffer[i] = ^M) then begin // CR, alone or before LF
Inc(tailCRLF);
end;
i := High(Buffer) - Max(OldRead, tailCRLF);
if i - Low(Buffer) < 0 then exit; // no new data to read - results would be like before
if OldRead > 0 then Inc(i); // the CR/LF pair could be sliced between new and previous buffer - so need to start a bit earlier
for i := i downto Low(Buffer) do begin
c := Buffer[i];
if (c=^J) or (c=^M) then begin // found EOL
SetString( Line, #Buffer[i+1], High(Buffer) - tailCRLF - i);
exit(True);
end;
end;
// we did not find non-terminating EOL in the buffer (except maybe trailing),
// now we should ask for more file content, if there is still left any
// or take the entire file (without trailing EOL if any)
if LastChunk then begin
SetString( Line, #Buffer[ Low(Buffer) ], Length(Buffer) - tailCRLF);
Result := true;
end;
end;
function ReadLastLine(const FileName: String): FileString; overload;
var Buffer, tmp: TArray<FileChar>;
// dynamic arrays - eases memory management and protect from stack corruption
FS: TFileStream; FSize, NewPos: Int64;
OldRead, NewLen : Integer; EndOfFile: boolean;
begin
Result := '';
FS := TFile.OpenRead(FileName);
try
FSize := FS.Size;
if FSize <= PageSize then begin // small file, we can be lazy!
FreeAndNil(FS); // free the handle and avoid double-free in finally
Result := ReadLastLine( TFile.ReadAllLines( FileName, TEncoding.ANSI ));
// or TEncoding.UTF16
// warning - TFIle is not share-aware, if the file is being written to by another app
exit;
end;
SetLength( Buffer, PageSize div FileCharSize);
OldRead := 0;
repeat
NewPos := FSize - Length(Buffer)*FileCharSize;
EndOfFile := NewPos <= 0;
if NewPos < 0 then NewPos := 0;
FS.Position := NewPos;
FS.ReadBuffer( Buffer[Low(Buffer)], (Length(Buffer) - OldRead)*FileCharSize);
if FindLastLine(Buffer, OldRead, EndOfFile, Result) then
exit; // done !
tmp := Buffer; Buffer := nil; // flip-flop: preparing to broaden our mouth
OldRead := Length(tmp); // need not to re-scan the tail again and again when expanding our scanning range
NewLen := Min( 2*Length(tmp), FSize div FileCharSize );
SetLength(Buffer, NewLen); // this may trigger EOutOfMemory...
Move( tmp[Low(tmp)], Buffer[High(Buffer)-OldRead+1], OldRead*FileCharSize);
tmp := nil; // free old buffer
until EndOfFile;
finally
FS.Free;
end;
end;
PS. Note one extra special case - if you would use Unicode chars (two-bytes ones) and would give odd-length file (3 bytes, 5 bytes, etc) - you would never be ble to scan the starting single byte (half-widechar). Maybe you should add the extra guard there, like Assert( 0 = FS.Size mod FileCharSize)
PPS. As a rule of thumb you better keep those functions out of the form class, - because WHY mixing them? In general you should separate concerns into small blocks. Reading file has nothing with user interaction - so should better be offloaded to an extra UNIT. Then you would be able to use functions from that unit in one form or 10 forms, in main thread or in multi-threaded application. Like LEGO parts - they give you flexibility by being small and separate.
PPPS. Another approach here would be using memory-mapped files. Google for MMF implementations for Delphi and articles about benefits and problems with MMF approach. Personally I think rewriting the code above to use MMF would greatly simplify it, removing several "special cases" and the troublesome and memory copying flip-flop. OTOH it would demand you to be very strict with pointers arithmetic.
https://en.wikipedia.org/wiki/Memory-mapped_file
https://msdn.microsoft.com/en-us/library/ms810613.aspx
http://torry.net/quicksearchd.php?String=memory+map&Title=No

Your char type is two byte, so that buffer is 16 byte. Then with blockread you read sizeof(buffer)-1 byte into it, and check the first 2 byte char if it is equal to #13.
The sizeof(buffer)-1 is dodgy (where does that -1 come from?), and the rest is valid, but only if your input file is utf16.
Also your read 8 (or 16) characters each time, but compare only one and then do a seek again. That is not very logical either.
If your encoding is not utf16, I suggest you change the type of a buffer element to ansichar and remove the -1

In response to kopiks suggestion, I figured out how to do it with TFilestream, it works ok with the simple test file, though there may be some further tweeks when I use it on a variety of csv files. Also, I don't make any claims that this is the most efficient method.
procedure TForm1.Button6Click(Sender: TObject);
Var
StreamSize, ApproxNumRows : Integer;
TempStr : String;
begin
if OpenDialog1.Execute then begin
TempStr := ReadLastLineOfTextFile(OpenDialog1.FileName,StreamSize, ApproxNumRows);
// TempStr := ReadFileStream('c:\temp\CSVTestFile.csv');
ShowMessage ('approximately '+ IntToStr(ApproxNumRows)+' Rows');
ListBox1.Items.Add(TempStr);
end;
end;
Function TForm1.ReadLastLineOfTextFile(const FileName: String; var StreamSize, ApproxNumRows : Integer): String;
const
MAXLINELENGTH = 256;
var
Stream: TFileStream;
BlockSize,CharCount : integer;
Hash13Found : Boolean;
Buffer : array [0..MAXLINELENGTH] of AnsiChar;
begin
Hash13Found := False;
Result :='';
Stream := TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
StreamSize := Stream.size;
if StreamSize < MAXLINELENGTH then
BlockSize := StreamSize
Else
BlockSize := MAXLINELENGTH;
// for CharCount := 0 to Length(Buffer)-1 do begin
// Buffer[CharCount] := #0; // zeroing the buffer can aid diagnostics
// end;
CharCount := 0;
Repeat
Stream.Seek(-(CharCount+3), 2); //+3 misses out the #0,#10,#13 at the end of the file
Stream.Read( Buffer[CharCount], 1);
Result := String(Buffer[CharCount]) + result;
if Buffer[CharCount] =#13 then
Hash13Found := True;
Inc(CharCount);
Until Hash13Found OR (CharCount = BlockSize);
ShowMessage(Result);
ApproxNumRows := Round(StreamSize / CharCount);
end;

Just thought of a new solution.
Again, there could be better ones, but this one is the best i thought of.
function GetLastLine(textFilePath: string): string;
var
list: tstringlist;
begin
list := tstringlist.Create;
try
list.LoadFromFile(textFilePath);
result := list[list.Count-1];
finally
list.free;
end;
end;

Related

Delphi- How to remove NON ANSI (NOT PRINTABLE) characters before saving?

Can somebody guide me to extend this procedure in a way so it removes all Non Printable characters or replaces with SPACE before it saves the stream to file ? String is read from Binary and could be maximum of 1 MB size.
My Procedure :
var
i : Word;
FileName : TFileName;
SizeofFiles,posi : Integer;
fs, sStream: TFileStream;
SplitFileName: String;
begin
ProgressBar1.Position := 0;
FileName:= lblFilePath.Caption;
SizeofFiles := StrToInt(edt2.Text) ;
posi := StrToInt(edt1.text) ;
fs := TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
try
fs.Position := Posi ;
begin
SplitFileName := ChangeFileExt(FileName, '.'+ FormatFloat('000', i));
sStream := TFileStream.Create(SplitFileName, fmCreate or fmShareExclusive);
try
if fs.Size - fs.Position < SizeofFiles then
SizeofFiles := fs.Size - fs.Position;
sStream.CopyFrom(fs, SizeofFiles);
ProgressBar1.Position := Round((fs.Position / fs.Size) * 100);
finally
sStream.Free;
end;
end;
finally
fs.Free;
end;
end;
You won't be able to use TStream.CopyFrom() anymore. You would have to Read(Buffer)() from the source TStream into a local byte array, strip off whatever you don't want from that array, and then Write(Buffer)() the remaining bytes to the destination TStream.
Here is a simple demo that should do what you want:
const
SrcFileName : String = 'Test.txt';
DstFileName : String = 'TestResult.txt';
StartPosition : Int64 = 50;
procedure TForm1.Button1Click(Sender: TObject);
var
FS : TFileStream;
Buf : TBytes;
I : Integer;
begin
// Read the source file from starting position
FS := TFileStream.Create(SrcFileName, fmOpenRead or fmShareDenyWrite);
try
FS.Position := StartPosition;
SetLength(Buf, FS.Size - FS.Position);
FS.Read(Buf[0], Length(Buf));
finally
FreeAndNil(FS);
end;
// Replace all non printable character by a space
// Assume file content is ASCII characters
for I := 0 to Length(Buf) - 1 do begin
// You may want to make a more complex test for printable of not
if (Ord(Buf[I]) < Ord(' ')) or (Ord(Buf[I]) > 126) then
Buf[I] := Ord(' ');
end;
// Write destination file
FS := TFileStream.Create(DstFileName, fmCreate);
try
FS.Write(Buf[0], Length(Buf));
finally
FreeAndNil(FS);
end;
end;
This code assume the file is pure ASCII text and that every character whose ASCII code is below 32 (space) or above 126 is not printable. This may not be the case for European languages. You'll easily adapt the test to fit your needs.
The source file could also be Unicode (16 bits characters). You should use a buffer made of Unicode characters or 16 bit integers (Word). And adapt the test for printable.
Could also be UTF8...

Is it possible to delete bytes from the beginning of a file?

I know that I can efficiently truncate a file and remove bytes from the end of the file.
Is there a corresponding efficient way to truncate files by deleting content from the beginning of the file to a point in the middle of the file?
As I read the question you are asking to remove content from a file starting from the beginning of the file. In other words you wish to delete content at the start of the file and shift the remaining content down.
This is not possible. You can only truncate a file from the end, not from the beginning. You will need to copy the remaining content into a new file, or copy it down yourself within the same file.
However you do it there is no shortcut efficient way to do this. You have to copy the data, for example as #kobik describes.
Raymond Chen wrote a nice article on this topic: How do I delete bytes from the beginning of a file?
Just for fun, here's a simple implementation of a stream based method to delete content from anywhere in the file. You could use this with a read/write file stream. I've not tested the code, I'll leave that to you!
procedure DeleteFromStream(Stream: TStream; Start, Length: Int64);
var
Buffer: Pointer;
BufferSize: Integer;
BytesToRead: Int64;
BytesRemaining: Int64;
SourcePos, DestPos: Int64;
begin
SourcePos := Start+Length;
DestPos := Start;
BytesRemaining := Stream.Size-SourcePos;
BufferSize := Min(BytesRemaining, 1024*1024*16);//no bigger than 16MB
GetMem(Buffer, BufferSize);
try
while BytesRemaining>0 do begin
BytesToRead := Min(BufferSize, BytesRemaining);
Stream.Position := SourcePos;
Stream.ReadBuffer(Buffer^, BytesToRead);
Stream.Position := DestPos;
Stream.WriteBuffer(Buffer^, BytesToRead);
inc(SourcePos, BytesToRead);
inc(DestPos, BytesToRead);
dec(BytesRemaining, BytesToRead);
end;
Stream.Size := DestPos;
finally
FreeMem(Buffer);
end;
end;
A very simple solution would be to shift (move) blocks of data from the "target position offset"
towards BOF, and then trim (truncate) the leftovers:
--------------------------
|******|xxxxxx|yyyyyy|zzz|
--------------------------
BOF <-^ (target position offset)
--------------------------
|xxxxxx|yyyyyy|zzz|******|
--------------------------
^ EOF
Since #David posted a code based on TStream, here is some code based on "low level" I/O pascal style:
function FileDeleteFromBOF(const FileName: string; const Offset: Cardinal): Boolean;
var
Buf: Pointer;
BufSize, FSize,
NumRead, NumWrite,
OffsetFrom, OffsetTo: Cardinal;
F: file;
begin
{$IOCHECKS OFF}
Result := False;
AssignFile(F, FileName);
try
FileMode := 2; // Read/Write
Reset(F, 1); // Record size = 1
FSize := FileSize(F);
if (IOResult <> 0) or (Offset >= FSize) then Exit;
BufSize := Min(Offset, 1024 * 64); // Max 64k - This value could be optimized
GetMem(Buf, BufSize);
try
OffsetFrom := Offset;
OffsetTo := 0;
repeat
Seek(F, OffsetFrom);
BlockRead(F, Buf^, BufSize, NumRead);
if NumRead = 0 then Break;
Seek(F, OffsetTo);
BlockWrite(F, Buf^, NumRead, NumWrite);
Inc(OffsetFrom, NumWrite);
Inc(OffsetTo, NumWrite);
until (NumRead = 0) or (NumWrite <> NumRead) or (OffsetFrom >= FSize);
// Truncate and set to EOF
Seek(F, FSize - Offset);
Truncate(F);
Result := IOResult = 0;
finally
FreeMem(Buf);
end;
finally
CloseFile(F);
end;
end;

TStringList load memorystream? <Delphi>

edited :
My file has several lines. I encrypt the file onto a new file. I want to store each line of decrypted file (=a stream) into StringList.
First, I have a file contain :
aa
bb
cc
I encrypt the file with this function :
procedure EnDecryptFile(pathin, pathout: string; Chave: Word) ;
var
InMS, OutMS: TMemoryStream;
cnt: Integer;
C: byte;
begin
InMS := TMemoryStream.Create;
OutMS := TMemoryStream.Create;
try
InMS.LoadFromFile(pathin) ;
InMS.Position := 0;
for cnt := 0 to InMS.Size - 1 do
begin
InMS.Read(C, 1) ;
C := (C xor not (ord(chave shr cnt))) ;
OutMS.Write(C, 1) ;
end;
OutMS.SaveToFile(pathout) ;
finally
InMS.Free;
OutMS.Free;
end;
end;
My purpose now is to store original value of each line into StringList. I don't want to store decrypted file into harddisk, so I use stream.
This is the function to decrypt the file into stream :
procedure DecryptFile(pathin: string; buff: TMemoryStream; Chave: Word);
var
InMS: TMemoryStream;
cnt: Integer;
C: byte;
begin
InMS := TMemoryStream.Create;
try
InMS.LoadFromFile(pathin);
InMS.Position := 0;
for cnt := 0 to InMS.Size - 1 do
begin
InMS.Read(C, 1);
C := (C xor not(ord(Chave shr cnt)));
buff.Write(C, 1);
end;
// buff.SaveToFile('c:\temp\dump.txt') ;
finally
InMS.free;
end;
end;
--
bbuffer := TMemoryStream.Create;
try
DecryptFile(path, bbuffer, 10); //
//ShowMessage(IntToStr(bbuffer.size)); // output : 1000
bbuffer.Position := 0;
SL := TStringList.Create;
try
SL.LoadFromStream(bbuffer);
for I := 0 to SL.Count - 1 do // SL.Count = 1
begin;
//add each line of orginal file into SL??
end;
finally
SL.free;
end;
finally
bbuffer.free;
end;
Load from stream takes a TStream so you can give it a TFileStream as well as an TMemoryStream. The code you posted should work without any problems. What exactly does not work?
You might have to use
bbuffer.Position := 0;
to reset the position to the start of the stream before loading it into the string list.
EDIT: You write single bytes to a stream and then try to load a string list from it. That won't work. The stream is just a collection of bytes. How should the string list know where one string ends and the next one starts? TStringList.SaveToStream writes separator bytes to the stream so that it can parse the string list back. So, you could do your encryption on the string list and then write the whole string list to the stream, then read the stringlist and do the decryption on the string list.

How can I remotely read binary registry data using Delphi 2010?

I am trying to remotely read a binary (REG_BINARY) registry value, but I get nothing but junk back. Any ideas what is wrong with this code? I'm using Delphi 2010:
function GetBinaryRegistryData(ARootKey: HKEY; AKey, AValue, sMachine: string; var sResult: string): boolean;
var
MyReg: TRegistry;
RegDataType: TRegDataType;
DataSize, Len: integer;
sBinData: string;
bResult: Boolean;
begin
bResult := False;
MyReg := TRegistry.Create(KEY_QUERY_VALUE);
try
MyReg.RootKey := ARootKey;
if MyReg.RegistryConnect('\\' + sMachine) then
begin
if MyReg.KeyExists(AKey) then
begin
if MyReg.OpenKeyReadOnly(AKey) then
begin
try
RegDataType := MyReg.GetDataType(AValue);
if RegDataType = rdBinary then
begin
DataSize := MyReg.GetDataSize(AValue);
if DataSize > 0 then
begin
SetLength(sBinData, DataSize);
Len := MyReg.ReadBinaryData(AValue, PChar(sBinData)^, DataSize);
if Len <> DataSize then
raise Exception.Create(SysErrorMessage(ERROR_CANTREAD))
else
begin
sResult := sBinData;
bResult := True;
end;
end;
end;
except
MyReg.CloseKey;
end;
MyReg.CloseKey;
end;
end;
end;
finally
MyReg.Free;
end;
Result := bResult;
end;
And I call it like this:
GetBinaryRegistryData(
HKEY_LOCAL_MACHINE,
'\SOFTWARE\Microsoft\Windows NT\CurrentVersion',
'DigitalProductId', '192.168.100.105',
sProductId
);
WriteLn(sProductId);
The result I receive from the WriteLn on the console is:
ñ ♥ ???????????6Z ????1 ???????☺ ???♦ ??3 ? ??? ?
??
Assuming that you are already connected remotely, try using the GetDataAsString function
to read binary data from the registry.
sResult := MyReg.GetDataAsString(AValue);
You're using Delphi 2010, so all your characters are two bytes wide. When you set the length of your result string, you're allocating twice the amount of space you need. Then you call ReadBinaryData, and it fills half your buffer. There are two bytes of data in each character. Look at each byte separately, and you'll probably find that your data looks less garbage-like.
Don't use strings for storing arbitrary data. Use strings for storing text. To store arbitrary blobs of data, use TBytes, which is an array of bytes.

replace characters in a file (faster method)

We often replace non-desirable characters in a file with another "good" character.
The interface is:
procedure cleanfileASCII2(vfilename: string; vgood: integer; voutfilename: string);
To replace all non-desirables with a space we might call,
cleanfileASCII2(original.txt, 32 , cleaned.txt)
The problem is that this takes a rather long time. Is there
a better way to do it than shown?
procedure cleanfileASCII2(vfilename: string; vgood: integer; voutfilename:
string);
var
F1, F2: file of char;
Ch: Char;
tempfilename: string;
i,n,dex: integer;
begin
//original
AssignFile(F1, vfilename);
Reset(F1);
//outputfile
AssignFile(F2,voutfilename);
Rewrite(F2);
while not Eof(F1) do
begin
Read(F1, Ch);
//
n:=ord(ch);
if ((n<32)or(n>127))and (not(n in [10,13])) then
begin // bad char
if vgood<> -1 then
begin
ch:=chr(vgood);
Write(F2, Ch);
end
end
else //good char
Write(F2, Ch);
end;
CloseFile(F2);
CloseFile(F1);
end;
The problem has to do with how you're treating the buffer. Memory transfers are the most expensive part of any operation. In this case, you're looking at the file byte by byte. By changing to a blockread or buffered read, you will realize an enormous increase in speed. Note that the correct buffer size varies based on where you are reading from. For a networked file, you will find extremely large buffers may be less efficient due to the packet size TCP/IP imposes. Even this has become a bit murky with large packets from gigE but, as always, the best result is to benchmark it.
I converted from standard reads to a file stream just for convenience. You could easily do the same thing with a blockread. In this case, I took a 15MB file and ran it through your routine. It took 131,478ms to perform the operation on a local file. With the 1024 buffer, it took 258ms.
procedure cleanfileASCII3(vfilename: string; vgood: integer; voutfilename:string);
const bufsize=1023;
var
inFS, outFS:TFileStream;
buffer: array[0..bufsize] of byte;
readSize:integer;
tempfilename: string;
i: integer;
begin
if not FileExists(vFileName) then exit;
inFS:=TFileStream.Create(vFileName,fmOpenRead);
inFS.Position:=0;
outFS:=TFileStream.Create(vOutFileName,fmCreate);
while not (inFS.Position>=inFS.Size) do
begin
readSize:=inFS.Read(buffer,sizeof(buffer));
for I := 0 to readSize-1 do
begin
n:=buffer[i];
if ((n<32)or(n>127)) and (not(n in [10,13])) and (vgood<>-1) then
buffer[i]:=vgood;
end;
outFS.Write(buffer,readSize);
end;
inFS.Free;
outFS.Free;
end;
Several improvements:
Buffer the data, read 2k or 16k or similar sized blocks
Use a lookup table
here's a stab, that is untested (no compiler in front of me right now):
procedure cleanfileASCII2(vfilename: string; vgood: integer; voutfilename: string);
var
f1, f2: File;
table: array[Char] of Char;
index, inBuffer: Integer;
buffer: array[0..2047] of Char;
c: Char;
begin
for c := #0 to #31 do
table[c] := ' ';
for c := #32 to #127 do
table[c] := c;
for c := #128 to #255 do
table[c] := ' ';
table[#10] := #10; // exception to spaces <32
table[#13] := #13; // exception to spaces <32
AssignFile(F1, vfilename);
Reset(F1, 1);
AssignFile(F2,voutfilename);
Rewrite(F2, 1);
while not Eof(F1) do
begin
BlockRead(f1, buffer, SizeOf(buffer), inBuffer);
for index := 0 to inBuffer - 1 do
buffer[index] := table[buffer[index]];
BlockWrite(f2, buffer, inBuffer);
end;
Close(f2);
Close(f1);
end;
You could buffer your input and output so you read a chunk of characters (even the whole file, if it's not too big) into an array, then process the array, then write the entire array to the output file.
In most of these cases, the disk IO is the bottleneck, and if you can do fewer large reads instead of many small reads, it will be faster.
Buffering is the correct way to do that. I modified your code to see the difference:
procedure cleanfileASCII2(vfilename: string; vgood: integer; voutfilename:
string);
var
F1, F2: file;
NumRead, NumWritten: Integer;
Buf: array[1..2048] of Char;
Ch: Char;
i, n: integer;
begin
AssignFile(F1, vfilename);
Reset(F1, 1); // Record size = 1
AssignFile(F2, voutfilename);
Rewrite(F2, 1); // Record size = 1
repeat
BlockRead(F1, Buf, SizeOf(Buf), NumRead);
for i := 1 to NumRead do
begin
Ch := Buf[i];
//
n := ord(ch);
if ((n<32)or(n>127))and (not(n in [10,13])) then
begin // bad char
if vgood <> -1 then
begin
ch := chr(vgood);
Buf[i] := Ch;
end
//else //good char
//Write(F2, Ch);
end;
end;
BlockWrite(F2, Buf, NumRead, NumWritten);
until (NumRead = 0) or (NumWritten <> NumRead);
CloseFile(F1);
CloseFile(F2);
end;
I did it this way, ensuring that the file I/O is done all in one go before the processing. The code could do with updating for unicode but it copes with nasty text characters such as nulls and gives you a TStrings capability.
Bri
procedure TextStringToStringsAA( AStrings : TStrings; const AStr: Ansistring);
// A better routine than the stream 'SetTextStr'.
// Nulls (#0) which might be in the file e.g. from corruption in log files
// do not terminate the reading process.
var
P, Start, VeryEnd: PansiChar;
S: ansistring;
begin
AStrings.BeginUpdate;
try
AStrings.Clear;
P := Pansichar( AStr );
VeryEnd := P + Length( AStr );
if P <> nil then
while P < VeryEnd do
begin
Start := P;
while (P < VeryEnd) and not CharInSet(P^, [#10, #13]) do
Inc(P);
SetString(S, Start, P - Start);
AStrings.Add(string(S));
if P^ = #13 then Inc(P);
if P^ = #10 then Inc(P);
end;
finally
AStrings.EndUpdate;
end;
end;
procedure TextStreamToStrings( AStream : TStream; AStrings : TStrings );
// An alternative to AStream.LoadFromStream
// Nulls (#0) which might be in the file e.g. from corruption in log files
// do not terminate the reading process.
var
Size : Integer;
S : Ansistring;
begin
AStrings.BeginUpdate;
try
// Make a big string with all of the text
Size := AStream.Size - AStream.Position;
SetString( S, nil, Size );
AStream.Read(Pointer(S)^, Size);
// Parse it
TextStringToStringsAA( AStrings, S );
finally
AStrings.EndUpdate;
end;
end;
procedure LoadStringsFromFile( AStrings : TStrings; const AFileName : string );
// Loads this strings from a text file
// Nulls (#0) which might be in the file e.g. from corruption in log files
// do not terminate the reading process.
var
ST : TFileStream;
begin
ST := TFileStream.Create( AFileName, fmOpenRead + fmShareDenyNone);
// No attempt is made to prevent other applications from reading from or writing to the file.
try
ST.Position := 0;
AStrings.BeginUpdate;
try
TextStreamToStrings( ST, AStrings );
finally
AStrings.EndUpdate;
end;
finally
ST.Free;
end;
end;
Don't try to optimize without know where.
You shoud use the Sampling Profiler (delphitools.info) to know where is the bottleneck. It's easy to use.
Precompute the vgood chr conversion, before the loop.
Also, You don't need some conversions: Ord() and Chr(). Use always the 'Ch' variable.
if not (ch in [#10, #13, #32..#127]) then
Probably the easiest method would be:
make another file (temporary)
copy all content of basic file to the temp. file (line after line)
detect when it reads chars or words you want to replace and stop copying
enter your edit (to the temp. file)
continue and finish copying basic to temp file
rewrite (delete content of) basic file
copy lines from temp file to basic file
DONE!
vote this post +1 if it helped please

Resources