Delete non alphabetic characters at the starting of String - ios

I want to replace all the non-alphanumeric characters (including white spaces) of a string up to the first alphabet
"\r\n A Simple PDF File \r\n This is a small demonstration .pdf file - \r\n just for use in the Virtual Mechanics tutorials. More text. And more \r\n text."
I used
let string = txtView.text
let newString = string.replacingOccurrences(of: "\n", with: "")
but it replaces all the "\n" of string. how could we replace the starting characters only?

I am able to remove leading white spaces using regular expression. This is the code :
extension String {
func trimLeadingWhitespaces() -> String {
return self.replacingOccurrences(of: "^\\s+", with: "", options: .regularExpression)
}
}
let string = " Some string abc"
let trimmed = string.trimLeadingWhitespaces()
print(trimmed)

There is a function drop that removes characters while a condition is filled.
Here are two variants of using it
let newString = string.drop { $0.isWhitespace || $0.isPunctuation }
let newString = string.drop { !($0.isLetter || $0.isNumber) }

You can use trimmingCharacters method of string class. It will remove all the leading and trailing whitespaces and \n from string.
let str = "\r\n A Simple PDF File \r\n This is a small demonstration .pdf file - \r\n just for use in the Virtual Mechanics tutorials. More text. And more \r\n text."
let result = str.trimmingCharacters(in: .whitespacesAndNewlines)
print(result)

Only leading spaces:
extension String {
func removingLeadingSpaces() -> String {
guard let index = firstIndex(where: { !CharacterSet(charactersIn: String($0)).isSubset(of: .whitespaces) }) else {
return self
}
return String(self[index...])
}
}

you can use two func from extension (thx #amer):
extension String {
func replacingFirstOccurrence(of string: String, with replacement: String) -> String {
guard let range = self.range(of: string) else { return self }
return replacingCharacters(in: range, with: replacement)
}
func removingLeadingSpaces() -> String {
guard let index = firstIndex(where: { !CharacterSet(charactersIn: String($0)).isSubset(of: .whitespaces) }) else {
return self
}
return String(self[index...])
}
}
and use it:
let myString = "\r\n A Simple PDF File \r\n This is a small demonstration .pdf file - \r\n just for use in the Virtual Mechanics tutorials. More text. And more \r\n text."
let charactersRemoved = myString.replacingFirstOccurrence(of: "\r\n", with: "")
let whiteSpacesRemoved = charactersRemoved.removingLeadingSpaces()
print(whiteSpacesRemoved) // "A Simple PDF File \r\n This is a small demonstration .pdf file - \r\n just for use in the Virtual Mechanics tutorials. More text. And more \r\n text."

[ |\t|\n|\r] matchs for space, newline, tabulation or carriage return
{2,} "2 times or more"
each match is replaced with a newline
let str = "\r\n A Simple PDF File \r\n This is a small demonstration .pdf file - \r\n just for use in the Virtual Mechanics tutorials. More text. And more \r\n text."
let newString = str.replacingOccurrences(
of: "[ |\t|\n|\r]{2,}",
with: "\n",
options: .regularExpression
)
print(newString)
// A Simple PDF File
// This is a small demonstration .pdf file -
// just for use in the Virtual Mechanics tutorials. More text. And more
// text.

As per your problem statement you need to delete non-alphabetic char which includes spaces, line,'#','$','%','^', etc. There is much non-alphabetic char. Instead of checking non-alphabetic char , you chan validate alphabetic char alone. Please check the following solution to your problem.
extension String{
func findFirstAlphabetic() -> String.Index?{
for index in self.indices{
if String(self[index]).isAlphanumeric == true{
return index
}
}
return nil
}
var isAlphanumeric: Bool {
return !isEmpty && range(of: "[^a-zA-Z0-9]", options: .regularExpression) == nil
}
func alphabetic_Leading_SubString() -> String?{
if let startIndex = self.findFirstAlphabetic(){
let newSubString = self[startIndex..<self.endIndex]
return String(newSubString)
}
return nil
}
}
usage:-
print("Output" + string.alphabetic_Leading_SubString())
Input :-
let string = "\r\n# # $ % & ^ * () -+ ######## A Simple PDF File \r\n This is a small demonstration .pdf file - \r\n just for use in the Virtual Mechanics tutorials. More text. And more \r\n text."
Output:-

Related

backspace not work in outside of regex in swift

I use this method for patterning the phone number in UITextField at the .editingChange event
But the delete key only removes the numbers
extension String{
func applyPatternOnNumbers(pattern: String) -> String {
let replacmentCharacter: Character = "#"
let pureNumber = self.replacingOccurrences( of: "[^۰-۹0-9]", with: "", options: .regularExpression)
var result = ""
var pureNumberIndex = pureNumber.startIndex
for patternCharacter in pattern {
if patternCharacter == replacmentCharacter {
guard pureNumberIndex < pureNumber.endIndex else { return result }
result.append(pureNumber[pureNumberIndex])
pureNumber.formIndex(after: &pureNumberIndex)
} else {
result.append(patternCharacter)
}
}
return result
}
}
use at the editingChange event
let pattern = "+# (###) ###-####"
let mobile = textField.text.substring(to: pattern.count-1)
textfield.text = mobile.applyPatternOnNumbers(pattern: pattern)
// print(textfield.text) +1 (800) 666-8888
the problem is space & - , ( , ) chars can not to be removed
The RegEx you are trying is to not consider digits only:
[^۰-۹0-9]
I'm not sure, but you may change it to:
[^۰-۹0-9\s-\(\)]
and it may work. You might just add a \ before your special chars inside [] and you can any other chars into it that you do not need to be replaced.
Or you may simplify it to
[^\d\s-\(\)]
and it might work.
Method 2
You may use this RegEx which is an exact match to the phone number format you are having:
\+\d+\s\(\d{3}\)\s\d{3}-\d{4}
You may remove the first +, if it is unnecessary
\d+\s\(\d{3}\)\s\d{3}-\d{4}

Check if string latin or cyrillic

Is it some way to check if some string latin or cyrillic? I've tried localizedCompare String method, but it don't gave me needed result.
What about something like this?
extension String {
var isLatin: Bool {
let upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
let lower = "abcdefghijklmnopqrstuvwxyz"
for c in self.characters.map({ String($0) }) {
if !upper.containsString(c) && !lower.containsString(c) {
return false
}
}
return true
}
var isCyrillic: Bool {
let upper = "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЮЯ"
let lower = "абвгдежзийклмнопрстуфхцчшщьюя"
for c in self.characters.map({ String($0) }) {
if !upper.containsString(c) && !lower.containsString(c) {
return false
}
}
return true
}
var isBothLatinAndCyrillic: Bool {
return self.isLatin && self.isCyrillic
}
}
Usage:
let s = "Hello"
if s.isLatin && !s.isBothLatinAndCyrillic {
// String is latin
} else if s.isCyrillic && !s.isBothLatinAndCyrillic {
// String is cyrillic
} else if s.isBothLatinAndCyrillic {
// String can be either latin or cyrillic
} else {
// String is not latin nor cyrillic
}
Considere there are cases where the given string could be both, for example the string:
let s = "A"
Can be both latin or cyrillic. So that's why there's the function "is both".
And it can also be none of them:
let s = "*"
You should get all unicode characters and detect if contains cyrillic chars or Latin char based on the unicode value. This code is not complet, you can complete it.
let a : String = "ӿ" //unicode value = 04FF
let scalars = a.unicodeScalars
//get unicode value of first char:
let unicodeValue = scalars[scalars.startIndex].value //print 1279, correspondant to 04FF.
Check here for all unicode value (in hexa).
http://jrgraphix.net/r/Unicode/0400-04FF
According to this site, cyrillic value are from 0400 -> 04FF (1024 -> 1279)
this is the code for cyrillic check:
var isCyrillic = true
for (index, unicode) in scalars.enumerate() {
if (unicode.value < 1024 || unicode.value > 1279) {
print("not a cyrillic text")
print(unicode.value)
isCyrillic = false
break
}
}
Surprisingly, there's no easy answer to your question. The Latin alphabet contains more than just A - Z. There are accented characters in French and archaic forms in German, etc. I don't know the Cyrillic alphabet so I'll leave it alone. On top of that, you have to deal with: punctuation (.,?"(), etc.) and white space, emojis, arrows, dingbats... which are language neutral. The complexity can escalate very quickly depending on your requirements.
The answer you accepted is inadequate to say the least: "hello world".isLatin == false since it doesn't deal with white spaces.
Visit a site like this one to learn what ranges contain characters for which language and play with the code below. It's not a complete answer but meant to get you started:
let neutralRanges = [0x20...0x40]
let latinRanges = [0x41...0x5A, 0x61...0x7A, 0xC0...0xFF, 0x100...0x17F]
let cyrillicRanges = [0x400...0x4FF, 0x500...0x52F]
func scalar(scalar: UnicodeScalar, isInRanges ranges: [Range<Int>]) -> Bool {
for r in ranges {
if r ~= Int(scalar.value) {
return true
}
}
return false
}
let str = "Hello world"
var isLatin = true
var isCyrillic = true
for s in "Hello world".unicodeScalars {
if scalar(s, isInRanges: neutralRanges) {
continue
}
else if !scalar(s, isInRanges: latinRanges) {
isLatin = false
}
else if !scalar(s, isInRanges: cyrillicRanges) {
isCyrillic = false
}
}
print(isLatin)
print(isCyrillic)
A couple of comments refer to another post that shows a fairly clean way to determine the language of a String using NSLinguisticTagger (How to detect text (string) language in iOS? ).
NSLinguisticTagger is definitely the best approach here and is intended exactly for this purpose, but it sounds to me like you're actually asking how to identify the script of the String rather than the language. English, French, German (for example) all use Latin script so the language example above doesn't show the ideal way to discern between Latin and Cyrillic (or other scripts).
Instead I wrote the following extension to String that shows how to identify the script for the first sentence in the String you supply - you can then easily adapt/build on this to get the exact thing you want for your use case:
import Foundation // Needed for NSLinguisticTagger
extension String {
func scriptCode() -> NSLinguisticTag? {
let linguisticTagger = NSLinguisticTagger(tagSchemes: [.script], options: 0)
linguisticTagger.string = self
return iso15924ScriptCode = linguisticTagger.tag(at: 0, unit: .sentence, scheme: .script, tokenRange: nil)
}
}
Scripts are uniformly described by four-letter ISO 15924 script codes, such as "Latn", and this is what you get with the returned NSLinguisticTag object. To perform a comparison, just check the raw value of NSLinguisticTag, for example like this:
if yourTestSentence.scriptCode()? == "Latn" || "Cyrl" {
print("This sentence is in Latin or Cyrillic script")
} else {
print("Some other script")
}
Caveat: This example only checks the first sentence of whatever string you supply. I haven't tested what happens if that sentence is mixed scripts - most likely the returned tag will be nil.
Here are some useful reference links to Apple's docs, and Wikipedia for more info:
https://developer.apple.com/documentation/foundation/nslinguistictagger
https://developer.apple.com/documentation/foundation/nslinguistictagscheme
https://en.wikipedia.org/wiki/ISO_15924
I hope that this also can be useful
let cyrillicToLatinMap: [Character : String] = [
" ":" ",
"А":"A",
"Б":"B",
"В":"V",
"Г":"G",
"Д":"D",
"Е":"E",
"Ж":"Zh",
"З":"Z",
"И":"I",
"Й":"Y",
"К":"K",
"Л":"L",
"М":"M",
"Н":"N",
"О":"O",
"П":"P",
"Р":"R",
"С":"S",
"Т":"T",
"У":"U",
"Ф":"F",
"Х":"H",
"Ц":"Ts",
"Ч":"Ch",
"Ш":"Sh",
"Щ":"Sht",
"Ъ": "A",
"Ю":"Yu",
"Я":"Ya",
"а":"a",
"б":"b",
"в":"v",
"г":"g",
"д":"d",
"е":"e",
"ж":"zh",
"з":"z",
"и":"i",
"й":"y",
"к":"k",
"л":"l",
"м":"m",
"н":"n",
"о":"o",
"п":"p",
"р":"r",
"с":"s",
"т":"t",
"у":"u",
"ф":"f",
"х":"h",
"ц":"ts",
"ч":"ch",
"ш":"sh",
"щ":"sht",
"ъ": "a",
"ь":"y",
"ю":"yu",
"я":"ya",]
Bulgarian Cyrillic to Latin
class CyrilicToLatinConverter {
public static func getLatin(wordInCyrillic: String) -> String{
if(wordInCyrillic.isEmpty) {return wordInCyrillic}
else{
let characters = Array(wordInCyrillic)
var wordInLatin: String = ""
for n in 0...characters.capacity-1 {
if isCyrillic(characters: characters[n]) {
wordInLatin+=cyrillicToLatinMap[characters[n]] ?? ""
}
else{
return ""
}
}
return wordInLatin
}
}
public static func isCyrillic(characters: Character) -> Bool {
var isCyrillic: Bool = true;
for (key,_) in cyrillicToLatinMap{
isCyrillic = (key == characters)
if isCyrillic {
break
}
}
return isCyrillic
}
Swift 3:
For Persian and Arabic
extension String {
var isFarsi: Bool {
//Remove extra spaces from the first and last word
let value = self.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
if value == "" {
return false
}
let farsiLetters = "آ ا ب پ ت ث ج چ ح خ د ذ ر ز ژ س ش ص ض ط ظ ع غ ف ق ک گ ل م ی ن و ه"
let arabicLetters = " ء ا أ إ ء ؤ ئـ ئ آ اً ة ا ب ت ث ج ‌ ح خ د ذ ر ز س ‌ ش ص ض ط ظ ع غ ف ق ك ل م ن ه و ي"
for c in value.characters.map({ String($0) }) {
if !farsiLetters.contains(c) && !arabicLetters.contains(c) {
return false
}
}
return true
}
}
swift 5 solution
extension String {
var isLatin: Bool {
let upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
let lower = "abcdefghijklmnopqrstuvwxyz"
for c in self.map({String($0)}) where !upper.contains(c) && !lower.contains(c) {
return false
}
return true
}
}

Parsing & contracting Russian full names

I have several text fields used to enter full name and short name, among other data. My task is:
Check if entered full name matches the standard Russian Cyrillic full name pattern:
Иванов Иван Иванович (three capitalized Cyrillic strings separated by spaces)
If it matches, create another string by auto-contracting full name according to pattern below and enter it to the corresponding text field:
Иванов И.И. (first string, space, first character of the second string, dot, first character of the third string, dot)
If it doesn't match, do nothing.
Currently I use the following code:
let fullNameArray = fullNameField.text!.characters.split{$0 == " "}.map(String.init)
if fullNameArray.count == 3 {
if fullNameArray[0] == fullNameArray[0].capitalizedString && fullNameArray[1] == fullNameArray[1].capitalizedString && fullNameArray[2] == fullNameArray[2].capitalizedString {
shortNameField.text = "\(fullNameArray[0]) \(fullNameArray[1].characters.first!).\(fullNameArray[2].characters.first!)."
}
}
How can I improve it? Maybe regular expressions could help me? If so, could you post some sample code?
My current solution:
do {
let regex = try NSRegularExpression(pattern: "^\\p{Lu}\\p{Ll}+\\s\\p{Lu}\\p{Ll}+\\s\\p{Lu}\\p{Ll}+$", options: .AnchorsMatchLines)
if regex.firstMatchInString(fullNameField.text!, options: [], range: NSMakeRange(0, fullNameField.text!.characters.count)) != nil {
let fullNameArray = fullNameField.text!.characters.split(" ").map(String.init)
shortNameField.text = "\(fullNameArray[0]) \(fullNameArray[1].characters.first!).\(fullNameArray[2].characters.first!)."
}
else {
shortNameField.text = ""
}
} catch let error as NSError {
print(error.localizedDescription)
}
Processes my full name pattern correctly.

Unicode Character Conversion

I am attempting to use the unicode character U+00AE in a UITextView. If I use the code \u{00AE} using the below:
textView.text = "THING AND STUFF TrademarkedThing\u{00AE}"
However, if I pull some text from another location (this is technically coming from an API call, but that shouldn't matter), and assign it to the textView, I do not get the unicode character:
var apiText = "TrademarkedThing\u{00AE}" //Pulled from API call as text and saved into text variable
textView.text = "THING AND STUFF " + apiText
So the in the code below, the first unicode character does not show, but the second does.
var apiText = "TrademarkedThing\u{00AE}" //Pulled from API call as text and saved into text variable
textView.text = "THING AND STUFF " + apiText + " \u{00AE}"
Why won't the unicode from that text show?
The Unicode conversion (from \u{...}) only happens for string literals. You can see the problem if you compare these two:
let t1 = "Thing\u{00AE}"
// Thing®
let t2 = "Thing\\u{00" + "AE}"
// Thing\u{00AE}
Since your string is coming from another source, it acts like the second one. The Swift language book has a section on Unicode characters in string literals.
If you want to interpret those Unicode sequences after the fact, here's a String extension:
extension String {
func indexOfSubstring(str: String, fromIndex: String.Index? = nil) -> String.Index? {
var index = fromIndex ?? startIndex
while index < endIndex {
if self[Range(start: index, end: endIndex)].hasPrefix(str) {
return index
}
index = index.successor()
}
return nil
}
func convertedUnicodeSequences() -> String {
if let index = indexOfSubstring("\\u{") {
if let nextIndex = indexOfSubstring("}", fromIndex: index) {
let substr = self[Range(start: advance(index, 3), end: nextIndex)]
let scalar = UnicodeScalar(UInt32(strtoul(substr, nil, 16)))
return self[Range(start: startIndex, end: index)] +
String(scalar) +
self[Range(start: nextIndex.successor(), end: endIndex)].convertedUnicodeSequences()
}
}
return self
}
}

Extract a whole word from string In Swift

for example I have a String text like : "I have to go to the kitchen"
and If I searched this text using the 'av' phrase I want a way that return me the whole word 'have'
how I can do this in swift
There is very nice solution with filter in swift.You can use rangeOfString method of String with filter to get only filtered string having "av"
var s = "I have to go to the kitchen"
//will return "have"
let abc:[String] = s.componentsSeparatedByString(" ").filter({ $0.rangeOfString("av", options: NSStringCompareOptions.CaseInsensitiveSearch, range: nil, locale: nil) != nil } )
There is an API for that, enumerateSubstrings(in:options:using:)
The byWords option returns all words in the closure
let string = "I have to go to the kitchen"
var found : String?
string.enumerateSubstrings(in: string.startIndex..., options: .byWords) { substring, _, _, stop in
if let word = substring, word.contains("av") {
found = word
stop = true
}
}
print(found ?? "not found")
Split your string into array by space char (" "), and return component that contains your 'av' string.
let words = stringYouWantToSearchIn.componentsSeparatedByString(" ")
for word in words
{
var range = word.rangeOfString(lastWord)
if (range != nil)
{
//you got what do you want in 'word variable'
break
}
}

Resources