Have some old code (written by someone else) that I need to fix to work with Unicode strings in Delphi 10.1. EDIT: I've narrowed my question down to the following: code below fails with unicode strings. Suggestions?
//global variable:
var
UpCaseLookup : array[ 1..255 ] of char;
// ---- Knuth, Morris, Pratt:
type
failure = array[1..255] of word;
procedure PrepareUpcaseLookup;
var
S : string; //was shortstring;
i : integer;
begin
for i := 1 to 255 do
begin
S := ToUpper( chr(i) ); //was AnsiUpperCase
UpCaseLookup[i] := S[1]
end
end;
function PosKnuthMorrisPratt(Pattern, Text: string): Integer;
var
Prefix: array of Integer;
i, k: Integer;
begin
Result := 0;
if (Pattern = '') or (Text = '') then
Exit;
Pattern := UpperCase(Pattern); // case-insensitive
Text := UpperCase(Text);
// Buld prefix function array
SetLength(Prefix, Length(Pattern) + 1);
Prefix[1] := 0;
k := 0;
for i := 2 to Length(Pattern) do begin
while (k > 0) and (Pattern[k + 1] <> Pattern[i]) do
k := Prefix[k];
if Pattern[k + 1] = Pattern[i] then
Inc(k);
Prefix[i] := k;
end;
k := 0;
for i := 1 to Length(Text) do begin
while (k > 0) and (Pattern[k + 1] <> Text[i]) do
k := Prefix[k];
if Pattern[k + 1] = Text[i] then
Inc(k);
if k = Length(Pattern) then
Exit(i + 1 - Length(Pattern));
end;
end;
begin
Memo1.Lines.Add(IntToStr(PosKnuthMorrisPratt('abaBc', 'ggabagabAbccsab')));
Memo1.Lines.Add(IntToStr(PosKnuthMorrisPratt('ab', 'ggagbc')));
Related
I was assigned a task for university where I have to write a program which deletes all words with more than 4 letters. I really have no clue at all. I would be very thankful for any kind of help.
VAR
UserString: string; //должна быть строка на 40 символов и точку в конце
i, n: byte;
BEGIN
Writeln('Enter the string:');
Readln(UserString);
i:=0;
n:=1;
repeat //MAIN LOOP:
inc(i);
if (UserString[i] = ' ') or (UserString[i] = '.') then
begin
if (i-n<3)then
begin
delete(UserString, n, i-n+1);
i:=n-1;
end;
n:=i+1
end
until (UserString[i] = '.') or (i>length(UserString));
Writeln('Result String: ', UserString);
END.
I tried this. and its working on onlinegdb but not on Delphi... and I don't know why...
You should break up the logic into smaller utility functions for each task you need (finding a word, getting the word's length, deleting the word and any subsequent whitespace, etc). It will make the code easier to read and maintain.
For example:
function FindNextWordStart(const S: string; var Index: Integer): Boolean;
var
Len: Integer;
begin
Len := Length(S);
while (Index <= Len) and (Ord(S[Index]) <= 32) do Inc(Index);
Result := (Index <= Len);
end;
function GetWordLength(const S: string; Index: Integer): Integer;
var
StartIdx, Len: Integer;
begin
Len := Length(S);
StartIdx := Index;
while (Index <= Len) and (Ord(S[Index]) > 32) do Inc(Index);
Result := (Index - StartIdx);
end;
procedure DeleteWord(var S: String; Index, WordLen: Integer);
var
StartIdx, Len: Integer;
begin
Len := Length(S);
StartIdx := Index;
Inc(Index, WordLen);
while (Index <= Len) and (Ord(S[Index]) <= 32) do Inc(Index);
Delete(S, StartIdx, Index - StartIdx);
end;
var
UserString: string;
StartIdx, WordLen: Integer;
begin
Writeln('Enter the string:');
Readln(UserString);
StartIdx := 1;
while FindNextWordStart(UserString, StartIdx) do
begin
WordLen := GetWordLength(UserString, StartIdx);
if WordLen > 4 then
DeleteWord(UserString, StartIdx, WordLen)
else
Inc(StartIdx, WordLen);
end;
Writeln('Result String: ', UserString);
end.
Online Demo
I guess you can solve your task with TStringlist class:
uses Classes;
......
var AStrLst : TStringlist ;
i : Integer ,
begin
AStrLst := TStringlist.Create ;
try
// use this char for separation of words
AStrLst.Delimiter :=' ';
AStrLst.DelimitedText := ' here comes my sample string ';
for I := AStrLst.Count-1 to 0 do
begin
// delete item from list if ...
if length( trim(AStrLst[i])) <= 4 then AStrLst.Delete(i);
end;
finally
// get the complete
writeln ( AStrLst.Text ) ;
AStrLst.Free;
end;
end;
I did not test this code - but hope it helps - to get this code running, your home work
How can I effectively check if a string contains one of a few sub strings?
Suppose I have a string:
`Hi there, <B>my</B> name is Joe <DIV>.</DIV> Hello world. `
How can I check if the string contains either <B> OR <DIV> OR ?
I could do a simple:
Result := (Pos('<B>', S) > 0) or
(Pos('<DIV>', S) > 0) or
(Pos(' ', S) > 0);
But this seems to be very inefficient since it make N (at worst) passes and my strings are considerably large.
Slightly better version:
function StringContainsAny(const S: string; const AnyOf: array of string): Boolean;
var
CurrChr, C: PChar;
i, j, Ln: Integer;
begin
for i := 1 to Length(S) do
begin
CurrChr := #S[i];
for j := 0 to High(AnyOf) do
begin
C := #AnyOf[j][1]; // assume that no empty strings
if C^ <> CurrChr^ then
Continue;
Ln := Length(AnyOf[j]);
if (Length(S) + 1 - i) < Ln then // check bounds
Continue;
if CompareMem(C, CurrChr, Ln * SizeOf(C^)) then
Exit(True);
end;
end;
Exit(False);
end;
You can also build some table of stop-symbols and improve speed. It's kinda complex topic, so I can just suggest you to read, for example, book Bill Smyth "Computing Patterns in Strings".
Here is my solution, thanks to David Heffernan comment:
function StringContainsAny(const S: string; const AnyOf: array of string): Boolean;
var
CurrChr, C: PChar;
I, L, H: Integer;
begin
Result := False;
CurrChr := PChar(S);
while CurrChr^ <> #0 do
begin
H := High(AnyOf);
for I := 0 to H do
begin
L := 0;
C := PChar(AnyOf[I]);
while C^ <> #0 do
begin
if C^ = CurrChr^ then
Inc(L)
else
Break;
Inc(C);
Inc(CurrChr);
if CurrChr^ = #0 then // end of S string
begin
Result := (C^ = #0);
if Result or (not Result and (I = H)) then // match or last AnyOf
Exit;
end;
end;
if C^ = #0 then // match
begin
Result := True;
Exit;
end
else
Dec(CurrChr, L);
end;
Inc(CurrChr);
end;
end;
I'm not sure it is perfect.
EDIT:
What can I say? You know what they say about assumptions...
after actually testing, it seems like using Pos():
function StringContainsAny(const S: string; const AnyOf: array of string): Boolean;
var
I: Integer;
begin
for I := 0 to High(AnyOf) do
begin
if Pos(AnyOf[I], S) <> 0 then
begin
Result := True;
Exit;
end;
end;
Result := False;
end;
Is faster than my solution and #Green_Wizard solution! they did a good job with the Pos function!
source array(4 bytes)
[$80,$80,$80,$80] =integer 0
[$80,$80,$80,$81] = 1
[$80,$80,$80,$FF] = 127
[$80,$80,$81,$01] = 128
need to convert this to integer.
below is my code and its working at the moment.
function convert(b: array of Byte): Integer;
var
i, st, p: Integer;
Negative: Boolean;
begin
result := 0;
st := -1;
for i := 0 to High(b) do
begin
if b[i] = $80 then Continue // skip leading 80
else
begin
st := i;
Negative := b[i] < $80;
b[i] := abs(b[i] - $80);
Break;
end;
end;
if st = -1 then exit;
for i := st to High(b) do
begin
p := round(Power(254, High(b) - i));
result := result + b[i] * p;
result := result - (p div 2);
end;
if Negative then result := -1 * result
end;
i'm looking for a better function?
Update:
file link
https://drive.google.com/file/d/0ByBA4QF-YOggZUdzcXpmOS1aam8/view?usp=sharing
in uploaded file ID field offset is from 5 to 9
NEW:
Now i got into new problem which is decoding date field
Date field hex [$80,$8F,$21,$C1] -> possible date 1995-12-15
* in uploaded file date field offset is from 199 to 203
Just an example of some improvements as outlined by David.
The array is passed by reference as a const.
The array is fixed in size.
The use of floating point calculations are converted directly into a constant array.
Const
MaxRange = 3;
Type
TMySpecial = array[0..MaxRange] of Byte;
function Convert(const b: TMySpecial): Integer;
var
i, j: Integer;
Negative: Boolean;
Const
// Pwr[i] = Round(Power(254,MaxRange-i));
Pwr: array[0..MaxRange] of Cardinal = (16387064,64516,254,1);
begin
for i := 0 to MaxRange do begin
if (b[i] <> $80) then begin
Negative := b[i] < $80;
Result := Abs(b[i] - $80)*Pwr[i] - (Pwr[i] shr 1);
for j := i+1 to MaxRange do
Result := Result + b[j]*Pwr[j] - (Pwr[j] shr 1);
if Negative then
Result := -Result;
Exit;
end;
end;
Result := 0;
end;
Note that less code lines is not always a sign of good performance.
Always measure performance before optimizing the code in order to find real bottlenecks.
Often code readability is better than optimizing over the top.
And for future references, please tell us what the algorithm is supposed to do.
Code for testing:
const
X : array[0..3] of TMySpecial =
(($80,$80,$80,$80), // =integer 0
($80,$80,$80,$81), // = 1
($80,$80,$80,$FF), // = 127
($80,$80,$81,$01)); // = 128
var
i,j: Integer;
sw: TStopWatch;
begin
sw := TStopWatch.StartNew;
for i := 1 to 100000000 do
for j := 0 to 3 do
Convert(X[j]);
WriteLn(sw.ElapsedMilliseconds);
ReadLn;
end.
I want to insert a char into every possible position of s string except start and end.
e.g.
abc
I want to have
a-bc
ab-c
a-b-c
Below is my test, but not correct:
procedure TForm1.Button2Click(Sender: TObject);
var
start, i,j,k,position,loop: integer;
laststart,lastend:integer;
c,item,stem:string;
str, prefix:string;
begin
str:='abcd';
memo1.clear;
memo1.Lines.Add(str);
laststart:=0;
lastend:=memo1.lines.count-1;
position:=0;
prefix:='';
loop:=0;
while loop<=length(str)-1 do
begin
for j:= laststart to lastend do
begin
item:=memo1.lines[j];
for k:=length(item) downto 1 do
begin
if item[k]='-' then
begin
position:=j;
break;
end;
end; //for k
prefix:=copy(item,1,position);
stem:=copy(item,position+1, length(item));
for start:=1 to length(stem)-1 do
begin
c:=prefix+copy(stem,1,start)+'-'+
copy(stem, start+1,length(stem));
memo1.lines.add(c);
end;
end; //for j
laststart:=lastend+1;
lastend:=memo1.Lines.Count-1;
inc(loop);
end; //end while
end;
it outputs:
abcd
a-bcd
ab-cd
abc-d
a--bcd // not correct
a-b-cd
a-bc-d
ab--cd //incorrect
ab-c-d
abc--d //incorrect
a--bc-d //incorrect
I feel the maximum possible breaks is lenth(str)-1, abc->most possible is insert 2 '-' (twice). Is this correct?
And are there other faster ways to do it?
Thanks a lot.
Recursive version.
procedure InsertSymbols(s: string; c: Char; Position: Integer = 1);
var
i: Integer;
begin
Memo1.Lines.Add(s);
for i := Position to Length(s) - 1 do
InsertSymbols(Copy(s, 1, i) + c + Copy(s, i + 1, Length(s) - i), c, i + 2);
end;
begin
InsertSymbols('Test', '-');
end;
This works:
procedure TForm4.Button1Click(Sender: TObject);
var
S: string;
N: integer;
Marker: cardinal;
MaxMarker: cardinal;
Str: string;
i: Integer;
begin
S := Edit1.Text;
N := length(S);
Marker := 0;
MaxMarker := 1 shl (N - 1) - 1;
Memo1.Clear;
Memo1.Lines.BeginUpdate;
for Marker := 0 to MaxMarker do
begin
Str := S[1];
for i := 2 to N do
begin
if (Marker shr (N-i)) and 1 <> 0 then
Str := Str + '-';
Str := Str + S[i];
end;
Memo1.Lines.Add(Str);
end;
Memo1.Lines.EndUpdate;
end;
As you can see, it works by using binary representation of numbers:
t e s t
0 0 0
0 0 1
0 1 0
0 1 1
1 0 0
1 0 1
1 1 0
1 1 1
Why all the difficult solutions?
Just copy the string to a new one char by char, add hyphens in between, except for the last one.
I needed to separate a string to use as a serial number and here is the code:
Function GetDashedKey(Key: string): string
const
PartSize = 7;
var
Indx: Integer;
dashedKey : string;
begin
repeat
if Trim(dashedKey)<>'' then
dashedKey := dashedKey + ' - ';
if Length(Key) < PartSize then
begin
dashedKey := dashedKey + Key;
Key := '';
end
else
begin
dashedKey := dashedKey + Copy(Key, 1, PartSize);
Key := Copy(Key, PartSize + 1, Length(Key)-1);
end;
until Trim(Key) = '';
Result := dashedKey;
end;
I used to use this function to convert hex string to string in Delphi 6 :
const
testSign = '207F8060287F585054505357FFD55861';
function Hex2Dec(const data: string): byte;
var
nH1, nH2: byte;
begin
if data[1] in ['0' .. '9'] then
nH1 := strtoint(data[1])
else
nH1 := 9 + ord(data[1]) - 64;
if data[2] in ['0' .. '9'] then
nH2 := strtoint(data[2])
else
nH2 := 9 + ord(data[2]) - 64;
Result := nH1 * 16 + nH2;
end;
function HexStrToStr(const HexStr: string): string;
var
BufStr: string;
LenHex: Integer;
x, y: Integer;
begin
LenHex := Length(HexStr) div 2;
x := 1;
y := 0;
while y <> LenHex do
begin
Inc(y);
BufStr := BufStr + Chr(Hex2Dec(HexStr[x] + HexStr[x + 1]));
Inc(x, 2);
end;
Result := BufStr;
end;
Now I want to use the function with Delphi 2010.
const
testSign: AnsiString = '207F8060287F585054505357FFD55861';
function Hex2Dec(const data: ansistring): byte;
var
nH1, nH2: byte;
begin
if data[1] in ['0' .. '9'] then
nH1 := strtoint(data[1])
else
nH1 := 9 + ord(data[1]) - 64;
if data[2] in ['0' .. '9'] then
nH2 := strtoint(data[2])
else
nH2 := 9 + ord(data[2]) - 64;
Result := nH1 * 16 + nH2;
end;
function HexStrToStr(const HexStr: ansistring): ansistring;
var
BufStr: ansistring;
LenHex: Integer;
x, y: Integer;
begin
LenHex := Length(HexStr) div 2;
x := 1;
y := 0;
while y <> LenHex do
begin
Inc(y);
BufStr := BufStr + Chr(Hex2Dec(HexStr[x] + HexStr[x + 1]));
Inc(x, 2);
end;
Result := BufStr;
end;
Output from first code in D6 :
' '#$7F'€`('#$7F'XPTPSWÿÕXa'
Output from second code in D2010 :
' '#$7F#$0080'`('#$7F'XPTPSWÿÕXa'
How do I fix the code in D2010 so it can produces same result like D6?
Besides the solutions others provided, you can also make use of the built-in function:
function HexStrToStr(const HexStr: string): string;
var
tmp: AnsiString;
begin
Assert(not Odd(Length(HexStr)), 'HexToStr input length must be an even number');
SetLength(tmp, Length(HexStr) div 2);
HexToBin(PWideChar(HexStr), #tmp[1], Length(tmp));
result := tmp;
end;
This implementation assumes that the hex-encoded string has been an Ansistring in the first place. For flexibility I suggest to use TBytes instead.