Currently, in search, I am looking for a string that contains some characters:
func search(for keyword: String?) {
self.keyword = keyword
guard let keyword = keyword?
.folding(options: .diacriticInsensitive, locale: nil)
.lowercased(),
!keyword.isEmpty else {
filteredNames = names
return
}
filteredNames = names
.filter { $0.title.lowercased()
.folding(options: .diacriticInsensitive, locale: nil)
.contains(keyword)
}
}
The problem is that when the list is bigger, it shows more than expected.
I would use not .contain(keyword), but .starts(with: keywoard), but then it would miss some results.
let's say I have an array of:
let myArray = ["name", "my name", "some name", "else string", "string", "stringname"]
so when looking for 'ame' not it shows all that contains the string, and what I want is to shown only if the word in an array starts with that. So when typing 'name' it should return me "name", "some name" but NOT the "stringname"
An alternative is searching with Regular Expression and range(of:options:)
The benefit is no extra objects are created, it's O(n) and you can specify the case insensitive and diacritics insensitive options at the same time. The \b in the regex pattern represents word boundaries
let keyword = "name"
let myArray = ["name", "my name", "some name", "else string", "string", "stringname"]
let filtered = myArray.filter {
$0.range(of: "\\b\(keyword)", options: [.regularExpression, .caseInsensitive, .diacriticInsensitive]) != nil
}
print(filtered)
You can do it like this -
extension String {
// Assumption: Both `keyword` & `self` are already lowercased
func hasMatch(for keyword: String) -> Bool {
let comps = self
.folding(options: .diacriticInsensitive, locale: nil)
.components(separatedBy: " ")
return (comps.first(where: { $0.starts(with: keyword) }) != nil)
}
}
Now your filter logic can use it like this -
filteredNames = names
.filter { $0.title.lowercased()
.hasMatch(for: keyword)
}
You could split each String by their whitespace and match each component.
let searchTerm = "name"
let myArray = ["name", "my name", "some name", "else string", "string", "stringname"]
let matches = myArray.filter {
$0.components(separatedBy: " ").contains {
$0.hasPrefix(searchTerm)
}
}
Improvements left to the reader:
Split string by any kind of whitespace
Case-insensitive, diacritic-insensitive search
Related
Let's say I have a text and I want to check if the text contains a couple of words.
let text: String = "The rain in Spain"
let wordsA: [String] = [ "rain", "Spain" ] // should return true when compared with text
let wordsB: [String] = [ "rain", "Italy" ] // should return false when compared with text
What is the shortest, quickest way to check if my text contains ALL of the words?
I know, I can do:
var result: Bool = true
for word in wordsA {
result = result && text.contains(word)
}
But I was wondering if there is a shorter way that involves using predicates. For instance, to check if the string contains ANY of the words I could do:
let result: Bool = wordsA.contains(where: text.contains)
Is there something similar that results only in true if ALL words are found?
I found a very simple solution, which is perfect for me:
let result: Bool = wordsA.allSatisfy(text.contains)
This works, but it's not fast like Leo's solution.
You should enumerateSubstrings in your string using byWords option. It will return only words not a sequence of characters that might be just part of a larger word. Once you have all the words of your string you should create a set and check if it is a superset of your words. Your code should be something like:
extension StringProtocol {
var setOfWords: Set<String> {
var setOfWords: Set<String> = []
enumerateSubstrings(in: startIndex..., options: .byWords) { word, _, _, _ in
setOfWords.insert(word!)
}
return setOfWords
}
}
let text = "The rain in Spain"
let wordsA = [ "rain", "Spain" ] // should return true when compared with text
let wordsB = [ "rain", "Italy" ] // should return false when compared with text
let setOfWords = text.setOfWords
setOfWords.isSuperset(of: wordsA) // true
setOfWords.isSuperset(of: wordsB) // false
Without extending StringProtocol:
let text = "The rain in Spain"
var setOfWords: Set<String> = []
text.enumerateSubstrings(in: text.startIndex..., options: .byWords) { word, _, _, _ in
setOfWords.insert(word!)
}
let wordsA = [ "rain", "Spain" ]
let wordsB = [ "rain", "Italy" ]
setOfWords.isSuperset(of: wordsA) // true
setOfWords.isSuperset(of: wordsB) // false
I'm programming in swift 5 a search routine and I want to highlight in a string if the search is contained in this string (e.g. if I search for "bcd" in a string like "äbcdef" the result should look like "äbcdef". In doing so I wrote an extension for String to split a String into the substring before the match with the search string (="before") , the match with the search string (="match") and the substring afterwards (="after").
extension String {
func findSubstring(forSearchStr search: String, caseSensitive sensitive: Bool) -> (before: String, match: String, after: String) {
var before = self
var searchStr = search
if !sensitive {
before = before.lowercased()
searchStr = searchStr.lowercased()
}
var match = ""
var after = ""
let totalStringlength = before.count
let searchStringlength = searchStr.count
var startpos = self.startIndex
var endpos = self.endIndex
for id in 0 ... (totalStringlength - searchStringlength) {
startpos = self.index(self.startIndex, offsetBy: id)
endpos = self.index(startpos, offsetBy: searchStringlength)
if searchStr == String(before[startpos ..< endpos]) {
before = String(self[self.startIndex ..< startpos])
match = String(self[startpos ..< endpos])
if id < totalStringlength - searchStringlength - 1 {
startpos = self.index(startpos, offsetBy: searchStringlength)
after = String(self[startpos ..< self.endIndex])
}
break
}
}
return (before, match, after)
} // end findSubstring()
}
My problem is, that this routine works well for all strings without special characters like the German Umlaute "ä, ö, ü" or "ß". If a string contains one of these characters the returned substrings "match" and "after" are shifted one sign to the right. In the example above the result for the search "bcd" is in this case "äbcdef"
My question is, what do I have to do to handle this characters properly as well?
By the way: is there a simplier solution than mine to split a string as described than what I have programmed (which seems to me to be rather complex :) )
Thanks for your valuable support
String comparison is a complicated issue, and is something you would not want to handle yourself unless you are studying this.
Just use String.range(of:options:):
extension String {
func findSubstring(forSearchStr search: String, caseSensitive sensitive: Bool) -> (before: String, match: String, after: String)? {
if let substringRange = range(of: search, options: sensitive ? [] : [.caseInsensitive], locale: nil) {
return (String(self[startIndex..<substringRange.lowerBound]),
String(self[substringRange]),
String(self[substringRange.upperBound..<self.endIndex]))
} else {
return nil
}
}
}
// (before: "ä", match: "bcd", after: "e")
print("äbcde".findSubstring(forSearchStr: "bcd", caseSensitive: true)!)
Note that this is not a literal comparison. For example:
// prints (before: "", match: "ß", after: "")
print("ß".findSubstring(forSearchStr: "ss", caseSensitive: false)!)
If you want a literal comparison, use the literal option:
range(of: search, options: sensitive ? [.literal] : [.caseInsensitive, .literal])
I have array like this
var country = ["America","Germany","China"]
My need is if this array have America
I would like to make it become "US"
hoping result like this
["US","Europe","Asia"]
don't use country[0] = "US"
because array order every times are different
Use condition that I gave
How should I do ?
Create a dictionary of what a string in countries needs to be replaced with.
let countriesToRegions = ["America": "US", "Germany": "Europe", "China": "Asia"]
Then when you need to convert the countries into regions, you can look them up in the dictionary.
var countries = ["America", "Germany", "China"]
let regions = countries.map { country in
countriesToRegions[country] ?? country
}
The last bit here, ?? country, is handling the possibility that the country is not in the countriesToRegions dictionary.
Here is one way to do it (comments inline):
let country = ["USA", "Germany", "China", "Fakeland"]
// use a dictionary for the replacements
let continentForCountry = ["USA": "North America", "Germany": "Europe", "China": "Asia"]
// use map to replace each item and set a default value if the
// replacement is not found
let result = country.map { continentForCountry[$0, default: "unknown continent"] }
print(result)
["North America", "Europe", "Asia", "unknown continent"]
Try -
let requiredIndex = country.index(where: { $0 is "America" }) {
country[requiredIndex] = "US"
}
You can either use Map or for a more explained version, can use the below function
func replaceOccuranceInArray(inArray:[String], of originalString: String, with replacementString: String) -> [String] {
var arrayToBeReturned = inArray
var indexes = [Int]()
for currentIndex in 0...inArray.count - 1 {
if inArray[currentIndex] == originalString {
indexes.append(currentIndex)
}
}
for eachIndex in indexes {
arrayToBeReturned[eachIndex] = replacementString
}
return arrayToBeReturned
}
I have attached a Playground output for the same below
As some suggested, you can use power of .map function.
Here is simple solution how you can map country to continent:
let americanCountries = ["US", "Argentina", "Brasil"]
let asianCountries = ["China", "Japan", "Korea"]
let europeanCountries = ["Germany", "France", "Spain"]
let countries = ["US","Germany","China"]
let continents = countries.map { country -> String in
if americanCountries.contains(country) {
return "America"
} else if asianCountries.contains(country) {
return "Asia"
} else if europeanCountries.contains(country) {
return "Europe"
} else {
return "Unknown continent"
}
}
Another step would be try to use enums.
A simple solution :
Get the index of the value to be replaced and replace it if existing:
if let ind = country.firstIndex(of: "America"){
country[ind] = "US"
}
I want to extract value from a string which has unique starting and ending character. In my case its em
"Fully <em>Furni<\/em>shed |Downtown and Canal Views",
result
Furnished
I guess you want to remove the tags.
If the backslash is only virtual the pattern is pretty simple: Basically <em> with optional slash /?
let trimmedString = string.replacingOccurrences(of: "</?em>", with: "", options: .regularExpression)
Considering also the backslash it's
let trimmedString = string.replacingOccurrences(of: "<\\\\?/?em>", with: "", options: .regularExpression)
If you want to extract only Furnished you have to capture groups: The string between the tags and everything after the closing tag until the next whitespace character.
let string = "Fully <em>Furni<\\/em>shed |Downtown and Canal Views"
let pattern = "<em>(.*)<\\\\?/em>(\\S+)"
do {
let regex = try NSRegularExpression(pattern: pattern)
if let match = regex.firstMatch(in: string, range: NSRange(string.startIndex..., in: string)) {
let part1 = string[Range(match.range(at: 1), in: string)!]
let part2 = string[Range(match.range(at: 2), in: string)!]
print(String(part1 + part2))
}
} catch { print(error) }
Given this string:
let str = "Fully <em>Furni<\\/em>shed |Downtown and Canal Views"
and the corresponding NSRange:
let range = NSRange(location: 0, length: (str as NSString).length)
Let's construct a regular expression that would match letters between <em> and </em>, or preceded by </em>
let regex = try NSRegularExpression(pattern: "(?<=<em>)\\w+(?=<\\\\/em>)|(?<=<\\\\/em>)\\w+")
What it does is :
look for 1 or more letters: \\w+,
that are preceded by <em>: (?<=<em>) (positive lookbehind),
and followed by <\/em>: (?=<\\\\/em>) (positive lookahead),
or : |
letters: \\w+,
that are preceded by <\/em>: (?=<\\\\/em>) (positive lookbehind)
Let's get the matches:
let matches = regex.matches(in: str, range: range)
Which we can turn into substrings:
let strings: [String] = matches.map { match in
let start = str.index(str.startIndex, offsetBy: match.range.location)
let end = str.index(start, offsetBy: match.range.length)
return String(str[start..<end])
}
Now we can join the strings in even indices, with the ones in odd indices:
let evenStride = stride(from: strings.startIndex,
to: strings.index(strings.endIndex, offsetBy: -1),
by: 2)
let result = evenStride.map { strings[$0] + strings[strings.index($0, offsetBy: 1)]}
print(result) //["Furnished"]
We can test it with another string:
let str2 = "<em>Furni<\\/em>shed <em>balc<\\/em>ony <em>gard<\\/em>en"
the result would be:
["Furnished", "balcony", "garden"]
Not a regex but, for obtaining all words in tags, e.g [Furni, sma]:
let text = "Fully <em>Furni<\\/em>shed <em>sma<\\/em>shed |Downtown and Canal Views"
let emphasizedParts = text.components(separatedBy: "<em>").filter { $0.contains("<\\/em>")}.flatMap { $0.components(separatedBy: "<\\/em>").first }
For full words, e.g [Furnished, smashed]:
let emphasizedParts = text.components(separatedBy: " ").filter { $0.contains("<em>")}.map { $0.replacingOccurrences(of: "<\\/em>", with: "").replacingOccurrences(of: "<em>", with: "") }
Regex:
If you want to achieve that by regex, you can use Valexa's answer:
public extension String {
public func capturedGroups(withRegex pattern: String) -> [String] {
var results = [String]()
var regex: NSRegularExpression
do {
regex = try NSRegularExpression(pattern: pattern, options: [])
} catch {
return results
}
let matches = regex.matches(in: self, options: [], range: NSRange(location:0, length: self.count))
guard let match = matches.first else { return results }
let lastRangeIndex = match.numberOfRanges - 1
guard lastRangeIndex >= 1 else { return results }
for i in 1...lastRangeIndex {
let capturedGroupIndex = match.range(at: i)
let matchedString = (self as NSString).substring(with: capturedGroupIndex)
results.append(matchedString)
}
return results
}
}
like this:
let text = "Fully <em>Furni</em>shed |Downtown and Canal Views"
print(text.capturedGroups(withRegex: "<em>([a-zA-z]+)</em>"))
result:
["Furni"]
NSAttributedString:
If you want to do some highlighting or you only need to get rid of tags or any other reason that you can't use the first solution, you can also do that using NSAttributedString:
extension String {
var attributedStringAsHTML: NSAttributedString? {
do{
return try NSAttributedString(data: Data(utf8),
options: [
.documentType: NSAttributedString.DocumentType.html,
.characterEncoding: String.Encoding.utf8.rawValue],
documentAttributes: nil)
}
catch {
print("error: ", error)
return nil
}
}
}
func getTextSections(_ text:String) -> [String] {
guard let attributedText = text.attributedStringAsHTML else {
return []
}
var sections:[String] = []
let range = NSMakeRange(0, attributedText.length)
// we don't need to enumerate any special attribute here,
// but for example, if you want to just extract links you can use `NSAttributedString.Key.link` instead
let attribute: NSAttributedString.Key = .init(rawValue: "")
attributedText.enumerateAttribute(attribute,
in: range,
options: .longestEffectiveRangeNotRequired) {attribute, range, pointer in
let text = attributedText.attributedSubstring(from: range).string
sections.append(text)
}
return sections
}
let text = "Fully <em>Furni</em>shed |Downtown and Canal Views"
print(getTextSections(text))
result:
["Fully ", "Furni", "shed |Downtown and Canal Views"]
Here is basic implementation in PHP (yes, I know you asked Swift, but it's to demonstrate the regex part):
<?php
$in = "Fully <em>Furni</em>shed |Downtown and Canal Views";
$m = preg_match("/<([^>]+)>([^>]+)<\/\\1>([^ ]+|$)/i", $in, $t);
$s = $t[2] . $t[3];
echo $s;
Output:
ZC-MGMT-04:~ jv$ php -q regex.php
Furnished
Obviously, the most important bit is the regular expression part which would match any tag and find a respective closing tag and reminder afterward
If you just want to extract the text between <em> and <\/em> (note this is not normal HTML tags as then it would have been <em> and </em>) tags, we can simply capture this pattern and replace it with the group 1's value captured. And we don't need to worry about what is present around the matching text and just replace it with whatever got captured between those text which could actually be empty string, because OP hasn't mentioned any constraint for that. The regex for matching this pattern would be this,
<em>(.*?)<\\\/em>
OR to be technically more robust in taking care of optional spaces (as I saw someone pointing out in comment's of other answers) present any where within the tags, we can use this regex,
<\s*em\s*>(.*?)<\s*\\\/em\s*>
And replace it with \1 or $1 depending upon where you are doing it. Now whether these tags contain empty string, or contains some actual string within it, doesn't really matter as shown in my demo on regex101.
Here is the demo
Let me know if this meets your requirements and further, if any of your requirement remains unsatisfied.
I highly recommend the use of regex capture groups.
create your regex putting the name for the desired capture group:
let capturePattern = "(?<=<em>)(?<data1>\\w+)(?=<\\\\/em>)|(?<=<\\\\/em>)(?<data2>\\w+)"
now use the Swift capture pattern to get the data:
let captureRegex = try! NSRegularExpression(
pattern: capturePattern,
options: []
)
let textInput = "Fully <em>Furni<\/em>shed |Downtown and Canal Views"
let textInputRange = NSRange(
textInput.startIndex..<textInput.endIndex,
in: textInput
)
let matches = captureRegex.matches(
in: textInput,
options: [],
range: textInputRange
)
guard let match = matches.first else {
// Handle exception
throw NSError(domain: "", code: 0, userInfo: nil)
}
let data1Range = match.range(withName: "data1")
// Extract the substring matching the named capture group
if let substringRange = Range(data1Range, in: textInput) {
let capture = String(textInput[substringRange])
print(capture)
}
The same can be done to get the data2 group name:
let data2Range = match.range(withName: "data2")
if let substringRange = Range(data2Range, in: textInput) {
let capture = String(textInput[substringRange])
print(capture)
}
This method's main advantage is the group index independency. This makes this use less attached to the regex expression.
Below is the following line of code I use to replace an HTML break tag with a carriage return. However, I have other HTML symbols that I need to replace and when I call this line of code again, with different parameters, it's as if the first one is overwritten. Is there a way I can include multiple parameters? Is there a more efficient way to do this in Swift? For example: replace both br> with "" and nbsp with "".
textView.text = content.stringByReplacingOccurrencesOfString("<br /><br />", withString:"\r")
Use replacingOccurrences along with a the String.CompareOptions.regularExpresion option.
Example (Swift 3):
var x = "<Hello, [play^ground+]>"
let y = x.replacingOccurrences(of: "[\\[\\]^+<>]", with: "7", options: .regularExpression, range: nil)
print(y)
Input characters which are to be replaced inside the square brackets like so [\\ Characters]
Output:
7Hello, 7play7ground777
I solved it based on the idea of Rosetta Code
extension String {
func stringByRemovingAll(characters: [Character]) -> String {
return String(self.characters.filter({ !characters.contains($0) }))
}
func stringByRemovingAll(subStrings: [String]) -> String {
var resultString = self
subStrings.map { resultString = resultString.stringByReplacingOccurrencesOfString($0, withString: "") }
return resultString
}
}
Example:
let str = "Hello, stackoverflow"
let chars: [Character] = ["a", "e", "i"]
let myStrings = ["Hello", ", ", "overflow"]
let newString = str.stringByRemovingAll(chars)
let anotherString = str.stringByRemovingAll(myStrings)
Result, when printed:
newString: Hllo, stckovrflow
anotherString: stack
As #matt mentioned you are starting over with the same content string. The stringByReplacingOccurrencesOfString method doesn't actually change anything in the original content string. It returns to you a new string with the replacement changes while content remains unchanged.
Something like this should work for you
let result1 = content.stringByReplacingOccurrencesOfString("<br /><br />", withString:"\r")
let result2 = result1.stringByReplacingOccurrencesOfString(" ", withString:" ")
textView.text = result2
extension String {
var html2AttributedString:NSAttributedString {
return NSAttributedString(data: dataUsingEncoding(NSUTF8StringEncoding)!, options:[NSDocumentTypeDocumentAttribute:NSHTMLTextDocumentType, NSCharacterEncodingDocumentAttribute: NSUTF8StringEncoding], documentAttributes: nil, error: nil)!
}
}
let myHtmlCode = "<style type=\"text/css\">#red{color:#F00}#green{color:#0F0}#blue{color: #00F}</style><span id=\"red\" >Red</span> <span id=\"green\" >Green</span><span id=\"blue\">Blue</span>"
myHtmlCode.html2AttributedString