Only allow certain characters in a string - delphi

I am trying to validate a string, where by it can contain all alphebetical and numerical characters, aswell as the underline ( _ ) symbol.
This is what I tried so far:
var
S: string;
const
Allowed = ['A'..'Z', 'a'..'z', '0'..'9', '_'];
begin
S := 'This_is_my_string_0123456789';
if Length(S) > 0 then
begin
if (Pos(Allowed, S) > 0 then
ShowMessage('Ok')
else
ShowMessage('string contains invalid symbols');
end;
end;
In Lazarus this errors with:
Error: Incompatible type for arg no. 1: Got "Set Of Char", expected
"Variant"
Clearly my use of Pos is all wrong and I am not sure if my approach is even the correct way of going about it or not?
Thanks.

You will have to check every single character of the string, if it's contained in Allowed
e.g.:
var
S: string;
const
Allowed = ['A' .. 'Z', 'a' .. 'z', '0' .. '9', '_'];
Function Valid: Boolean;
var
i: Integer;
begin
Result := Length(s) > 0;
i := 1;
while Result and (i <= Length(S)) do
begin
Result := Result AND (S[i] in Allowed);
inc(i);
end;
if Length(s) = 0 then Result := true;
end;
begin
S := 'This_is_my_string_0123456789';
if Valid then
ShowMessage('Ok')
else
ShowMessage('string contains invalid symbols');
end;

TYPE TCharSet = SET OF CHAR;
FUNCTION ValidString(CONST S : STRING ; CONST ValidChars : TCharSet) : BOOLEAN;
VAR
I : Cardinal;
BEGIN
Result:=FALSE;
FOR I:=1 TO LENGTH(S) DO IF NOT (S[I] IN ValidChars) THEN EXIT;
Result:=TRUE
END;
If you are using a Unicode version of Delphi (as you seem to be), beware that a SET OF CHAR cannot contain all valid characters in the Unicode character set. Then perhaps this function will be useful instead:
FUNCTION ValidString(CONST S,ValidChars : STRING) : BOOLEAN;
VAR
I : Cardinal;
BEGIN
Result:=FALSE;
FOR I:=1 TO LENGTH(S) DO IF POS(S[I],ValidChars)=0 THEN EXIT;
Result:=TRUE
END;
but then again, not all characters (actually Codepoints) in Unicode can be expressed by a single character, and some characters can be expressed in more than one way (both as a single character and as a multi-character).
But as long as you constrain yourself within these limitations, one of the above functions should be useful. You can even include both, if you add an "OVERLOAD;" directive to the end of each function declaration, as in:
FUNCTION ValidString(CONST S : STRING ; CONST ValidChars : TCharSet) : BOOLEAN; OVERLOAD;
FUNCTION ValidString(CONST S,ValidChars : STRING) : BOOLEAN; OVERLOAD;

Lazarus/Free Pascal doesn't overload pos for that but has "posset" variants in unit strutils for that;
http://www.freepascal.org/docs-html/rtl/strutils/posset.html
Regarding Andreas' (IMHO correct ) remark, you can use isemptystr for that. It was meant to check for strings that only contain whitespace, but it basically checks if a string only contains characters in a set.
http://www.freepascal.org/docs-html/rtl/strutils/isemptystr.html

You can use Regular Expressions:
uses System.RegularExpressions;
if not TRegEx.IsMatch(S, '^[_a-zA-Z0-9]+$') then
ShowMessage('string contains invalid symbols');

Related

Why does this test to check if there are any illegal characters in a string keep coming up as false even though it should be true?

var
btof: boolean;
const
allowed = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
begin
btof:= false;
for i := 1 to length(sfilename) do
begin
if (sfilename[i] in [allowed[1] .. allowed[length(allowed)]])
then
btof := true;
end;
end;
I've been staring at this for the past hour trying to see the problem, but I just can't find it...
btof is always false, no matter the input.
The set notation [allowed[1] .. allowed[length(allowed)] boils down to a set of char ['a'..'9']. As the literal '9' has a lesser value than the literal 'a', the resulting set is just empty.
That is not how you check if a character is in an string. Actually you check if the character is between 'a' and '9'. That's not really what you intent to do.
Here is an implementation that works:
// Check if a filename contain any illegal character
function CheckIllegal(const SFilename : String) : Boolean;
var
C : Char;
const
Allowed = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
begin
Result := FALSE;
for C in SFilename do begin
if Pos(C, Allowed) < 1 then begin
Result := TRUE;
break;
end;
end;
end;
By the way, your list of allowed characters in a filename is incomplete, at least if you think about Windows OS.
var
bAllowed: boolean;
sfilename: string;
i, x: Integer;
C: Integer;
const
allowed = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
begin
bAllowed := true;
sfilename := edtInput.Text;
i := Length(sfilename);
x := 1;
while ((x <= i) and (bAllowed)) do
begin
bAllowed := Pos(sfilename[x], allowed) > 0;
Inc(x);
end;
if bAllowed then
ShowMessage('The string is valid')
else
ShowMessage('The string isn''t valid');
This is a more optimized version of the code. I take it that the purpose of the code that you gave was to determine if the string is valid and return a bool. Thus it only has to check up and till the point that it finds an illegal char in the string.
Note that I have changed it that if the string is valid it returns a true and invalid a false.

Dephi: faster way to convert a “dirty” string to a number

How can I make this code faster? the string can contain characters such as ", .?#" and possibly others.
Const Nums = ['0'..'9'];
function CleanNumber(s: String): Int64;
Var z: Cardinal;
begin
for z := length(s) downto 1 do
if not (s[z] in Nums) then Delete(s,z,1);
if s = '' then
Result := 0 else
Result := StrToInt64(s);
end;
Results (long loop):
CL2,CL3 = HeartWare's
32-bit, "dirty number" / "clean number"
Mine: 270ms, 165ms
CL2: 220ms, 210ms
CL3: 100ms, 110ms
DirtyStrToNum: 215ms, 90ms
64-bit, "dirty number" / "clean number"
Mine: 2280ms, 75ms
CL2: 1320ms, 130ms
CL3: 280ms, 25ms
DirtyStrToNum: 1390ms, 125ms
Here are two examples that for sure are faster than the one you have (deleting a character from a string is relatively slow):
This one works by pre-allocating a string of the maximum possible length and then filling it out with the digits as I come across them in the source string. No delete for every unsupported character, and no expansion of the target string for every supported character.
FUNCTION CleanNumber(CONST S : STRING) : Int64;
VAR
I,J : Cardinal;
C : CHAR;
T : STRING;
BEGIN
SetLength(T,LENGTH(S));
J:=LOW(T);
FOR I:=LOW(S) TO HIGH(S) DO BEGIN
C:=S[I];
IF (C>='0') AND (C<='9') THEN BEGIN
T[J]:=C;
INC(J)
END
END;
IF J=LOW(T) THEN
Result:=0
ELSE BEGIN
SetLength(T,J-LOW(T)); // or T[J]:=#0 [implementation-specific]
Result:=StrToInt64(T)
END
END;
This one works by simple multiplication of the end result by 10 and adding the corresponding digit value.
{$IFOPT Q+}
{$DEFINE OverflowEnabled }
{$ELSE }
{$Q+ If you want overflow checking }
{$ENDIF }
FUNCTION CleanNumber(CONST S : STRING) : Int64;
VAR
I : Cardinal;
C : CHAR;
BEGIN
Result:=0;
FOR I:=LOW(S) TO HIGH(S) DO BEGIN
C:=S[I];
IF (C>='0') AND (C<='9') THEN Result:=Result*10+(ORD(C)-ORD('0'))
END
END;
{$IFNDEF OverflowEnabled } {$Q-} {$ENDIF }
{$UNDEF OverflowEnabled }
Also note that I don't use IN or CharInSet as these are much slower than a simple inline >= and <= comparison.
Another comment I could make is the use of LOW and HIGH on the string variable. This makes it compatible with both 0-based strings (mobile compilers) and 1-based strings (desktop compilers).
Your function is slow mainly because of the Delete approach. Each call to Delete needs to move a lot of characters around.
A faster approach would be like this:
function DirtyStrToNum(const S: string): Int64;
var
tmp: string;
i, j: Integer;
const
DIGITS = ['0'..'9'];
begin
SetLength(tmp, S.Length);
j := 0;
for i := 1 to S.Length do
if CharInSet(S[i], DIGITS) then
begin
Inc(j);
tmp[j] := S[i];
end;
SetLength(tmp, j);
if tmp.IsEmpty then
Result := 0
else
Result := StrToInt64(tmp);
// Or, but not equivalent: Result := StrToInt64Def(tmp, 0);
end;
Notice I make a single allocation for a new string, and then only copy the minimum number of characters to it.

How can I test my WideReplace function?

OS: Hungarian Windows (Windows 1250)
Under Delphi 6 Prof there is no WideStringPos, WideStringCopy, WideStringReplace...
But in an XML based project I need to use them.
Because that I tried to write "something like" these functions.
But I'm not sure they are working as I want...
Because Delphi converts the Wide to Ansi and reverse in the background, I cannot be sure that my code is safe from these side effects... :-)
The code is very primitive - I need the solution quickly...
function WideStringCopy(WWhat : WideString; From, HowMany : integer) : WideString;
var
i : integer;
l : integer;
wc : WideChar;
begin
Result := '';
if WWhat = ''
then Exit;
if (HowMany <= 0)
then Exit;
if (From < 1)
then From := 1;
l := From + HowMany - 1;
if l > Length(WWhat)
then l := Length(WWhat);
for i := From to l do begin
wc := WWhat[i];
Result := Result + wc;
end;
end;
function WideStringPos(WWhere, WWhat : WideString) : integer;
var
wscomp : WideString;
i : integer;
begin
Result := 0;
for i := 1 to Length(WWhere) do begin
wscomp := WideStringCopy(WWhere, i, LengtH(WWhat));
if WideSameStr(wscomp, WWhat)
then begin
Result := i;
Exit;
end;
end;
end;
function WideStringReplace(WWhere, WFrom, WTo : WideString) : WideString;
var
actpos : integer;
wcomp : WideString;
wc : WideChar;
begin
Result := '';
actpos := 1;
while actpos <= Length(WWhere) do begin
wcomp := WideStringCopy(WWhere, actpos, Length(WFrom));
if WideSameStr(wcomp, WFrom) then begin
Result := Result + WTo;
inc(actpos, Length(WFrom));
end else begin
wc := WWhere[actpos];
Result := Result + wc;
inc(actpos);
end;
end;
end;
I have two questions about it:
Do you see any piece of code that surely making bad result (converting the Wide to Ansi silently, and causing character loosing)?
Do you know some character with I can test this code?
For example, chr(XXX) what is remaining when my converters are keeping the Wide rules, but loosing if I make wrong code...
Thanks for every info you will write...
Do you know some character with I can test this code?
Any codepage beyond Win1250 - for example Cyrillic Win1251, Greek, Hebrew - almost all letters there would be missed from 1250/1252
You can take Jedi CodeLibrary and use its locale conversion routines: make a string consisting of #128 till #255 in some encoding like aforementioned, convert it to Unicode from that codepage and then convert back from Unicode to Hungarian codepage.
function StringToWideStringEx(const S: AnsiString; CodePage: Word): WideString;
function WideStringToStringEx(const WS: WideString; CodePage: Word): AnsiString;
Or in one call
function TranslateString(const S: AnsiString; CP1, CP2: Word): AnsiString;
Then look which chars failed to translate and turned into ReplacementCharacter.
However in JCL you'd have your Pos function and such ready to use. And XML parser. So why bother ?

How to detect if a string is Base64Encoded or not?

Which is the best method to detect if a string is Base64Encoded or not (using Delphi)?
Best you can do is try to decode it. If the decode fails then the input was not base64 encoded. It the string successfully decodes then the input might have been base64 encoded.
You can check if the string only contains Base64 valids chars
function StringIsBase64(const InputString : String ) : Boolean;
const
Base64Chars: Set of AnsiChar = ['A'..'Z','a'..'z','0'..'9','+','/','='];
var
i : integer;
begin
Result:=True;
for i:=1 to Length(InputString) do
{$IFDEF UNICODE}
if not CharInSet(InputString[i],Base64Chars) then
{$ELSE}
if not (InputString[i] in Base64Chars) then
{$ENDIF}
begin
Result:=False;
break;
end;
end;
The = char is used for padding so you can add an aditional valiation to the function for padded base64 strings checking if the length of the string is mod 4
In addition to RRUZ answer you can also check the length of the string (is it a multiple of 4).
function IsValidBase64(const aValue: string): Boolean;
var
i: Integer;
lValidChars: set of Char;
begin
Result := aValue <> '';
lValidChars := ['a'..'z', 'A'..'Z', '0'..'9', '/', '+'];
//length of string should be multiple of 4
if Length(aValue) mod 4 > 0 then
Result := False
else
for i := 1 to Length(aValue) do
begin
if aValue[i] = '=' then
begin
if i < Length(aValue) - 1 then
begin
Result := False;
Exit;
end
else
lValidChars := ['='];
end
else if not (aValue[i] in lValidChars) then
begin
Result := False;
Break;
end;
end;
end;
Please note that this code is Delphi 7 code and not adjusted for Unicode use.
As was already told here, there is no reliable verification if a certain string is Base64 encoded or not, so even when you consider the input as a valid Base64 encoded string, it doesn't mean the string is actually encoded that way. I'm posting here just another version of a validation function, which according to RFC 4648 verifies:
if the input string is not empty and its length is multiple of 4
if the input string contains at most two padding characters and only at the end of the string
if the input string contains only characters from the Base64 alphabet (see the Page 5, Table 1)
function IsValidBase64EncodedString(const AValue: string): Boolean;
const
Base64Alphabet = ['A'..'Z', 'a'..'z', '0'..'9', '+', '/'];
var
I: Integer;
ValLen: Integer;
begin
ValLen := Length(AValue);
Result := (ValLen > 0) and (ValLen mod 4 = 0);
if Result then
begin
while (AValue[ValLen] = '=') and (ValLen > Length(AValue) - 2) do
Dec(ValLen);
for I := ValLen downto 1 do
if not (AValue[I] in Base64Alphabet) then
begin
Result := False;
Break;
end;
end;
end;

Delphi isNumber

Is there a method in Delphi to check if a string is a number without raising an exception?
its for int parsing.
and an exception will raise if one use the
try
StrToInt(s);
except
//exception handling
end;
function TryStrToInt(const S: string; out Value: Integer): Boolean;
TryStrToInt converts the string S, which represents an integer-type number in either decimal or hexadecimal notation, into a number, which is assigned to Value. If S does not represent a valid number, TryStrToInt returns false; otherwise TryStrToInt returns true.
To accept decimal but not hexadecimal values in the input string, you may use code like this:
function TryDecimalStrToInt( const S: string; out Value: Integer): Boolean;
begin
result := ( pos( '$', S ) = 0 ) and TryStrToInt( S, Value );
end;
var
s: String;
iValue, iCode: Integer;
...
val(s, iValue, iCode);
if iCode = 0 then
ShowMessage('s has a number')
else
ShowMessage('s has not a number');
Try this function StrToIntDef()
From help
Converts a string that represents an integer (decimal or hex notation) to a number with error default.
Pascal
function StrToIntDef(const S: string; Default: Integer): Integer;
Edit
Just now checked the source of TryStrToInt() function in Delphi 2007. If Delphi 7 dont have this function you can write like this. Its just a polished code to da-soft answer
function TryStrToInt(const S: string; out Value: Integer): Boolean;
var
E: Integer;
begin
Val(S, Value, E);
Result := E = 0;
end;
XE4 and newer:
for ch in s do
TCharacter.IsNumber(ch);
Don't forget:
uses System.Character
In delphi 7 you can use the Val procedure. From the help:
Unit: System
Delphi syntax: procedure Val(S; var V; var Code: Integer);
S is a string-type expression; it must be a sequence of characters that form a signed real number.
V is an integer-type or real-type variable. If V is an integer-type variable, S must form a whole number.
Code is a variable of type Integer.
If the string is invalid, the index of the offending character is stored in Code; otherwise, Code is set to zero. For a null-terminated string, the error position returned in Code is one larger than the actual zero-based index of the character in error.
use this function
function IsNumber(N : String) : Boolean;
var
I : Integer;
begin
Result := True;
if Trim(N) = '' then
Exit(False);
if (Length(Trim(N)) > 1) and (Trim(N)[1] = '0') then
Exit(False);
for I := 1 to Length(N) do
begin
if not (N[I] in ['0'..'9']) then
begin
Result := False;
Break;
end;
end;
end;
For older Delphi versions from delphi 5 help example:
uses Dialogs;
var
I, Code: Integer;
begin
{ Get text from TEdit control }
Val(Edit1.Text, I, Code);
{ Error during conversion to integer? }
if Code <> 0 then
MessageDlg('Error at position: ' + IntToStr(Code), mtWarning, [mbOk], 0);
else
Canvas.TextOut(10, 10, 'Value = ' + IntToStr(I));
end;
In some languages decimal separators are different (for example, '.' is used in English and ',' is used in Russian). For these cases to convert string to real number the following procedure is proposed:
function TryStrToFloatMultiLang(const S : String; out Value : Extended) : Boolean;
var
dc : char;
begin
Result := false;
dc := DecimalSeparator;
DecimalSeparator := '.';
try
Result := TryStrToFloat(S, Value);
except
DecimalSeparator := ',';
Result := TryStrToFloat(S, Value);
end;
DecimalSeparator := dc;
end;
Update
As #Pep mentioned TryStrToFloat catch exceptions, but it returns boolean value. So the correct code is:
function TryStrToFloatMultiLang(const S : String; out Value : Extended) : Boolean;
var
dc : char;
begin
Result := false;
dc := DecimalSeparator;
DecimalSeparator := '.';
Result := TryStrToFloat(S, Value);
if not Result then begin
DecimalSeparator := ',';
Result := TryStrToFloat(S, Value);
end;
DecimalSeparator := dc;
end;
When you using procedure
val(s, i, iCode);
and set value xd ....
val('xd', i, iCode)
as a result we obtain: 13
standard unit Variants
function VarIsNumeric(v:Variant):Boolean

Resources