Regex function to find # mentions does not work anymore - ios

Xcode 8.3.3
I wrote a regex method to get all mentions of a string.
func getMentions() -> [String] {
let pattern = "\\B#[A-Za-z0-9]+"
do {
let regex = try NSRegularExpression(pattern: pattern, options: NSRegularExpression.Options(rawValue: 0))
let nsstr = self as NSString
let all = NSRange(location: 0, length: nsstr.length)
var matches : [String] = [String]()
regex.enumerateMatches(in: self, options: NSRegularExpression.MatchingOptions(rawValue: 0), range: all) {
(result : NSTextCheckingResult?, _, _) in
if let r = result {
let result = nsstr.substring(with: r.range) as String
matches.append(result)
}
}
Log.info(matches)
return matches
} catch {
return []
}
}
This used to work just fine.
However, when I run this, I get empty matches. Did something get updated just now?
I've also tried this:
let pattern = "\\B#[A-Za-z0-9]+\\b"

Related

How To Find The Index Of A Substring In Switf 5 [duplicate]

I'm used to do this in JavaScript:
var domains = "abcde".substring(0, "abcde".indexOf("cd")) // Returns "ab"
Swift doesn't have this function, how to do something similar?
edit/update:
Xcode 11.4 • Swift 5.2 or later
import Foundation
extension StringProtocol {
func index<S: StringProtocol>(of string: S, options: String.CompareOptions = []) -> Index? {
range(of: string, options: options)?.lowerBound
}
func endIndex<S: StringProtocol>(of string: S, options: String.CompareOptions = []) -> Index? {
range(of: string, options: options)?.upperBound
}
func indices<S: StringProtocol>(of string: S, options: String.CompareOptions = []) -> [Index] {
ranges(of: string, options: options).map(\.lowerBound)
}
func ranges<S: StringProtocol>(of string: S, options: String.CompareOptions = []) -> [Range<Index>] {
var result: [Range<Index>] = []
var startIndex = self.startIndex
while startIndex < endIndex,
let range = self[startIndex...]
.range(of: string, options: options) {
result.append(range)
startIndex = range.lowerBound < range.upperBound ? range.upperBound :
index(range.lowerBound, offsetBy: 1, limitedBy: endIndex) ?? endIndex
}
return result
}
}
usage:
let str = "abcde"
if let index = str.index(of: "cd") {
let substring = str[..<index] // ab
let string = String(substring)
print(string) // "ab\n"
}
let str = "Hello, playground, playground, playground"
str.index(of: "play") // 7
str.endIndex(of: "play") // 11
str.indices(of: "play") // [7, 19, 31]
str.ranges(of: "play") // [{lowerBound 7, upperBound 11}, {lowerBound 19, upperBound 23}, {lowerBound 31, upperBound 35}]
case insensitive sample
let query = "Play"
let ranges = str.ranges(of: query, options: .caseInsensitive)
let matches = ranges.map { str[$0] } //
print(matches) // ["play", "play", "play"]
regular expression sample
let query = "play"
let escapedQuery = NSRegularExpression.escapedPattern(for: query)
let pattern = "\\b\(escapedQuery)\\w+" // matches any word that starts with "play" prefix
let ranges = str.ranges(of: pattern, options: .regularExpression)
let matches = ranges.map { str[$0] }
print(matches) // ["playground", "playground", "playground"]
Using String[Range<String.Index>] subscript you can get the sub string. You need starting index and last index to create the range and you can do it as below
let str = "abcde"
if let range = str.range(of: "cd") {
let substring = str[..<range.lowerBound] // or str[str.startIndex..<range.lowerBound]
print(substring) // Prints ab
}
else {
print("String not present")
}
If you don't define the start index this operator ..< , it take the starting index. You can also use str[str.startIndex..<range.lowerBound] instead of str[..<range.lowerBound]
Swift 5
Find index of substring
let str = "abcdecd"
if let range: Range<String.Index> = str.range(of: "cd") {
let index: Int = str.distance(from: str.startIndex, to: range.lowerBound)
print("index: ", index) //index: 2
}
else {
print("substring not found")
}
Find index of Character
let str = "abcdecd"
if let firstIndex = str.firstIndex(of: "c") {
let index = str.distance(from: str.startIndex, to: firstIndex)
print("index: ", index) //index: 2
}
else {
print("symbol not found")
}
In Swift 4 :
Getting Index of a character in a string :
let str = "abcdefghabcd"
if let index = str.index(of: "b") {
print(index) // Index(_compoundOffset: 4, _cache: Swift.String.Index._Cache.character(1))
}
Creating SubString (prefix and suffix) from String using Swift 4:
let str : String = "ilike"
for i in 0...str.count {
let index = str.index(str.startIndex, offsetBy: i) // String.Index
let prefix = str[..<index] // String.SubSequence
let suffix = str[index...] // String.SubSequence
print("prefix \(prefix), suffix : \(suffix)")
}
Output
prefix , suffix : ilike
prefix i, suffix : like
prefix il, suffix : ike
prefix ili, suffix : ke
prefix ilik, suffix : e
prefix ilike, suffix :
If you want to generate a substring between 2 indices , use :
let substring1 = string[startIndex...endIndex] // including endIndex
let subString2 = string[startIndex..<endIndex] // excluding endIndex
Doing this in Swift is possible but it takes more lines, here is a function indexOf() doing what is expected:
func indexOf(source: String, substring: String) -> Int? {
let maxIndex = source.characters.count - substring.characters.count
for index in 0...maxIndex {
let rangeSubstring = source.startIndex.advancedBy(index)..<source.startIndex.advancedBy(index + substring.characters.count)
if source.substringWithRange(rangeSubstring) == substring {
return index
}
}
return nil
}
var str = "abcde"
if let indexOfCD = indexOf(str, substring: "cd") {
let distance = str.startIndex.advancedBy(indexOfCD)
print(str.substringToIndex(distance)) // Returns "ab"
}
This function is not optimized but it does the job for short strings.
There are three closely connected issues here:
All the substring-finding methods are over in the Cocoa NSString world (Foundation)
Foundation NSRange has a mismatch with Swift Range; the former uses start and length, the latter uses endpoints
In general, Swift characters are indexed using String.Index, not Int, but Foundation characters are indexed using Int, and there is no simple direct translation between them (because Foundation and Swift have different ideas of what constitutes a character)
Given all that, let's think about how to write:
func substring(of s: String, from:Int, toSubstring s2 : String) -> Substring? {
// ?
}
The substring s2 must be sought in s using a String Foundation method. The resulting range comes back to us, not as an NSRange (even though this is a Foundation method), but as a Range of String.Index (wrapped in an Optional, in case we didn't find the substring at all). However, the other number, from, is an Int. Thus we cannot form any kind of range involving them both.
But we don't have to! All we have to do is slice off the end of our original string using a method that takes a String.Index, and slice off the start of our original string using a method that takes an Int. Fortunately, such methods exist! Like this:
func substring(of s: String, from:Int, toSubstring s2 : String) -> Substring? {
guard let r = s.range(of:s2) else {return nil}
var s = s.prefix(upTo:r.lowerBound)
s = s.dropFirst(from)
return s
}
Or, if you prefer to be able to apply this method directly to a string, like this...
let output = "abcde".substring(from:0, toSubstring:"cd")
...then make it an extension on String:
extension String {
func substring(from:Int, toSubstring s2 : String) -> Substring? {
guard let r = self.range(of:s2) else {return nil}
var s = self.prefix(upTo:r.lowerBound)
s = s.dropFirst(from)
return s
}
}
Swift 5
let alphabet = "abcdefghijklmnopqrstuvwxyz"
var index: Int = 0
if let range: Range<String.Index> = alphabet.range(of: "c") {
index = alphabet.distance(from: alphabet.startIndex, to: range.lowerBound)
print("index: ", index) //index: 2
}
Swift 5
extension String {
enum SearchDirection {
case first, last
}
func characterIndex(of character: Character, direction: String.SearchDirection) -> Int? {
let fn = direction == .first ? firstIndex : lastIndex
if let stringIndex: String.Index = fn(character) {
let index: Int = distance(from: startIndex, to: stringIndex)
return index
} else {
return nil
}
}
}
tests:
func testFirstIndex() {
let res = ".".characterIndex(of: ".", direction: .first)
XCTAssert(res == 0)
}
func testFirstIndex1() {
let res = "12345678900.".characterIndex(of: "0", direction: .first)
XCTAssert(res == 9)
}
func testFirstIndex2() {
let res = ".".characterIndex(of: ".", direction: .last)
XCTAssert(res == 0)
}
func testFirstIndex3() {
let res = "12345678900.".characterIndex(of: "0", direction: .last)
XCTAssert(res == 10)
}
In the Swift version 3, String doesn't have functions like -
str.index(of: String)
If the index is required for a substring, one of the ways to is to get the range. We have the following functions in the string which returns range -
str.range(of: <String>)
str.rangeOfCharacter(from: <CharacterSet>)
str.range(of: <String>, options: <String.CompareOptions>, range: <Range<String.Index>?>, locale: <Locale?>)
For example to find the indexes of first occurrence of play in str
var str = "play play play"
var range = str.range(of: "play")
range?.lowerBound //Result : 0
range?.upperBound //Result : 4
Note : range is an optional. If it is not able to find the String it will make it nil. For example
var str = "play play play"
var range = str.range(of: "zoo") //Result : nil
range?.lowerBound //Result : nil
range?.upperBound //Result : nil
Leo Dabus's answer is great. Here is my answer based on his answer using compactMap to avoid Index out of range error.
Swift 5.1
extension StringProtocol {
func ranges(of targetString: Self, options: String.CompareOptions = [], locale: Locale? = nil) -> [Range<String.Index>] {
let result: [Range<String.Index>] = self.indices.compactMap { startIndex in
let targetStringEndIndex = index(startIndex, offsetBy: targetString.count, limitedBy: endIndex) ?? endIndex
return range(of: targetString, options: options, range: startIndex..<targetStringEndIndex, locale: locale)
}
return result
}
}
// Usage
let str = "Hello, playground, playground, playground"
let ranges = str.ranges(of: "play")
ranges.forEach {
print("[\($0.lowerBound.utf16Offset(in: str)), \($0.upperBound.utf16Offset(in: str))]")
}
// result - [7, 11], [19, 23], [31, 35]
Have you considered using NSRange?
if let range = mainString.range(of: mySubString) {
//...
}

Swift problem when adding annotations with regex to pdf page

I want to search for a regex in a pdf, and add annotations to it according, using the results from the regex. I have built a simple function that does this. As the amazing community (really amazing people who used their time helping me) posted I can I can use the decomposedStringWithCompatibilityMapping to search for the desired expression correctly in the pdf, but afterwards when I perform a pdf selection to find the bounds of it, I encounter a difference. I send you my code and some pictures.
func performRegex(regex:String, on pdfPage:PDFPage) {
guard let pdfString = pdfPage.string?.precomposedStringWithCanonicalMapping else { return }
guard let safeRegex = try? NSRegularExpression(pattern: regex, options: .caseInsensitive) else { return }
let results = safeRegex.matches(in: pdfString, options: .withoutAnchoringBounds, range: NSRange(pdfString.startIndex..., in: pdfString))
pdfPage.annotations.forEach { pdfPage.removeAnnotation($0)}
results.forEach { result in
let bbox = pdfPage.selection(for: result.range)?.bounds(for: pdfPage)
let annotation = PDFAnnotation(bounds: bbox!, forType: .highlight, withProperties: nil)
annotation.color = .yellow
annotation.contents = String(pdfString[Range(result.range, in:pdfString)!])
pdfPage.addAnnotation(annotation)
}
}
The problem is that when I do this and enter this expression [0-9] all my results are shifted:
While if I don't use precomposedStringWithCanonicalMapping, all my results are not shifted but I will encounter an error when I get a special character.
The problem (I suspect) is in this line of code.
let bbox = pdfPage.selection(for: result.range)?.bounds(for: pdfPage)
But I don't know any work arround for it.
Please if anyone can give me some help!
Thanks a lot
The only alternative I can think right now is to use the original string and fix the malformed ranges. Try like this:
var str = """
circular para poder realizar sus tareas laborales correspondientes a las actividades de comercialización de alimentos
"""
do {
let regex = try NSRegularExpression(pattern: ".", options: .caseInsensitive)
let results = regex.matches(in: str, options: .withoutAnchoringBounds, range: NSRange(location: 0, length: str.utf16.count))
var badrange: NSRange?
results.forEach { result in
guard let range = Range(result.range, in: str) else {
if badrange != nil {
badrange!.length += 1
if let range = Range(badrange!, in: str) {
let newStr = str[range]
print(newStr)
}
} else {
badrange = result.range
}
return
}
let newStr = str[range]
print(newStr)
badrange = nil
}
} catch {
print(error)
}

Swift code to produce a number of possible anagrams from a selected word

I've attempted to research ways to take a given word and calculate the number of possible anagrams a user can make from that word eg an 8 letter word such as snowbanks has 5 eight letter possibilities, 25 seven letter possibilities, etc (those are made up numbers). My initial plan would be to iterate over a dictionary list and check each of the words to see if it is an anagram of the word in question as I've seen suggested in other places.
Rearrange Letters from Array and check if arrangement is in array
seemed very promising, except that it is in objective C and when I tried to convert it to Swift using Swiftify I couldn't get it to work as shown below:
func findAnagrams() -> Set<AnyHashable>? {
let nineCharacters = [unichar](repeating: 0, count: 8)
let anagramKey = self.anagramKey()
// make sure this word is not too long/short.
if anagramKey == nil {
return nil
}
(anagramKey as NSString?)?.getCharacters(nineCharacters, range: NSRange)
let middleCharPos = Int((anagramKey as NSString?)?.range(of: (self as NSString).substring(with: NSRange)).location ?? 0)
var anagrams = Set<AnyHashable>()
// 0x1ff means first 9 bits set: one for each character
for i in 0...0x1ff {
// skip permutations that do not contain the middle letter
if (i & (1 << middleCharPos)) == 0 {
continue
}
var length: Int = 0
var permutation = [unichar](repeating: 0, count: 9)
for bit in 0...9 {
if true {
permutation[length] = nineCharacters[bit]
length += 1
}
}
if length < 4 {
continue
}
let permutationString = String(permutation)
let matchingAnagrams = String.anagramMap()[permutationString] as? [Any]
for word: String? in matchingAnagrams {
anagrams.insert(word)
}
}
return anagrams
}
class func anagramMap() -> [AnyHashable: Any]? {
var anagramMap: [AnyHashable: Any]
if anagramMap != nil {
return anagramMap
}
// this file is present on Mac OS and other unix variants
let allWords = try? String(contentsOfFile: "/usr/share/dict/words", encoding: .utf8)
var map = [AnyHashable: Any]()
autoreleasepool {
allWords?.enumerateLines(invoking: {(_ word: String?, _ stop: UnsafeMutablePointer<ObjCBool>?) -> Void in
let key = word?.anagramKey()
if key == nil {
return
}
var keyWords = map[key] as? [AnyHashable]
if keyWords == nil {
keyWords = [AnyHashable]()
map[key] = keyWords
}
if let aWord = word {
keyWords?.append(aWord)
}
})
}
anagramMap = map
return anagramMap
}
func anagramKey() -> String? {
let lowercaseWord = word.lowercased()
// make sure to take the length *after* lowercase. it might change!
let length: Int = lowercaseWord.count
// in this case we're only interested in anagrams 4 - 9 characters long
if length < 3 || length > 9 {
return nil
}
let sortedWord = [unichar](repeating: 0, count: length)
(lowercaseWord as NSString).getCharacters(sortedWord, range: NSRange)
qsort_b(sortedWord, length, MemoryLayout<unichar>.size, {(_ aPtr: UnsafeRawPointer?, _ bPtr: UnsafeRawPointer?) -> Int in
let a = Int(unichar(aPtr))
let b = Int(unichar(bPtr))
return b - a
})
return String(describing: sortedWord)
}
func isReal(word: String) -> Bool {
let checker = UITextChecker()
let range = NSMakeRange(0, word.utf16.count)
let misspelledRange = checker.rangeOfMisspelledWord(in: word, range: range, startingAt: 0, wrap: false, language: "en")
return misspelledRange.location == NSNotFound
}
}
I've also tried the following in an attempt to just produce a list of words that I could iterate over to check for anagrams (I have working code that checks guesses vs the main word to check for anagrams) but I wasn't able to get them to work, possibly because they require a file to be copied to the app, since I was under the impression that the phone has a dictionary preloaded that I could use for words (although I may be mistaken):
var allTheWords = try? String(contentsOfFile: "/usr/share/dict/words", encoding: .utf8)
for line: String? in allTheWords?.components(separatedBy: "\n") ?? [String?]() {
print("\(line ?? "")")
print("Double Fail \(allTheWords)")
}
and
if let wordsFilePath = Bundle.main.path(forResource: "dict", ofType: nil) {
do {
let wordsString = try String(contentsOfFile: wordsFilePath)
let wordLines = wordsString.components(separatedBy: NSCharacterSet.newlines)
let randomLine = wordLines[Int(arc4random_uniform(UInt32(wordLines.count)))]
print(randomLine)
} catch { // contentsOfFile throws an error
print("Error: \(error)")
}
}
}
I looked at UIReferenceLibraryViewController as well in an attempt to use it to produce a list of words instead of defining a selected word, but the following isn't a valid option.
let words = UIReferenceLibraryViewController.enumerated
Any assistance would be greatly appreciated!

How to trim a String using Swift 3

My code snippet is:
unwanted = " £€₹jetztabfromnow"
let favouritesPriceLabel = priceDropsCollectionView.cells.element(boundBy: UInt(index)).staticTexts[IPCUIAHighlightsPriceDropsCollectionViewCellPriceLabel].label
let favouritesPriceLabelTrimmed = favouritesPriceLabel.components(separatedBy: "jetzt").flatMap { String($0.trimmingCharacters(in: .whitespaces)) }.last
favouritesHighlightsDictionary[favouritesTitleLabel] = favouritesPriceLabelTrimmed
My problem is, this didn't work:
let favouritesPriceLabelTrimmed = favouritesPriceLabel.components(separatedBy: unwanted).flatMap { String($0.trimmingCharacters(in: .whitespaces)) }.last
I have a price like "from 3,95 €" - I want to cut all currencies "£€₹" and words like "from" or "ab"
Do you have a solution for me, what I can use here?
Rather than mess around with trying to replace or remove the right characters or using regular expressions, I'd go with Foundation's built-in linguistic tagging support. It will do a lexical analysis of the string and return tokens of various types. Use it on this kind of string and it should reliably find any numbers in the string.
Something like:
var str = "from 3,95 €"
let range = Range(uncheckedBounds: (lower: str.startIndex, upper: str.endIndex))
var tokenRanges = [Range<String.Index>]()
let scheme = NSLinguisticTagSchemeLexicalClass
let option = NSLinguisticTagger.Options()
let tags = str.linguisticTags(in: range, scheme: scheme, options: option, orthography: nil, tokenRanges: &tokenRanges)
let tokens = tokenRanges.map { str.substring(with:$0) }
if let numberTagIndex = tags.index(where: { $0 == "Number" }) {
let number = tokens[numberTagIndex]
print("Found number: \(number)")
}
In this example the code prints "3,95". If you change str to "from £28.50", it prints "28.50".
One way is to place the unwanted strings into an array, and use String's replacingOccurrences(of:with:) method.
let stringToScan = "£28.50"
let toBeRemoved = ["£", "€", "₹", "ab", "from"]
var result = stringToScan
toBeRemoved.forEach { result = result.replacingOccurrences(of: $0, with: "") }
print(result)
...yields "28.50".
If you just want to extract the numeric value use regular expression, it considers comma or dot decimal separators.
let string = "from 3,95 €"
let pattern = "\\d+[.,]\\d+"
do {
let regex = try NSRegularExpression(pattern: pattern, options: [])
if let match = regex.firstMatch(in: string, range: NSRange(location: 0, length: string.utf16.count)) {
let range = match.range
let start = string.index(string.startIndex, offsetBy: range.location)
let end = string.index(start, offsetBy: range.length)
print(string.substring(with: start..<end)) // 3,95
} else {
print("Not found")
}
} catch {
print("Regex Error:", error)
}
I asked if you had a fixed locale for this string, because then you can use the locale to determine what the decimal separator is: For example, try this in a storyboard.
let string = "some initial text 3,95 €" // define the string to scan
// Add a convenience extension to Scanner so you don't have to deal with pointers directly.
extension Scanner {
func scanDouble() -> Double? {
var value = Double(0)
guard scanDouble(&value) else { return nil }
return value
}
// Convenience method to advance the location of the scanner up to the first digit. Returning the scanner itself or nil, which allows for optional chaining
func scanUpToNumber() -> Scanner? {
var value: NSString?
guard scanUpToCharacters(from: CharacterSet.decimalDigits, into: &value) else { return nil }
return self
}
}
let scanner = Scanner(string: string)
scanner.locale = Locale(identifier: "fr_FR")
let double = scanner.scanUpToNumber()?.scanDouble() // -> double = 3.95 (note the type is Double?)
Scanners are a lot easier to use than NSRegularExpressions in these cases.
You can filter by special character by removing alphanumerics.
extension String {
func removeCharacters(from forbiddenChars: CharacterSet) -> String {
let passed = self.unicodeScalars.filter { !forbiddenChars.contains($0) }
return String(String.UnicodeScalarView(passed))
}
}
let str = "£€₹jetztabfromnow12"
let t1 = str.removeCharacters(from: CharacterSet.alphanumerics)
print(t1) // will print: £€₹
let t2 = str.removeCharacters(from: CharacterSet.decimalDigits.inverted)
print(t2) // will print: 12
Updated 1:
var str = "£3,95SS"
str = str.replacingOccurrences(of: ",", with: "")
let digit = str.removeCharacters(from: CharacterSet.decimalDigits.inverted)
print(digit) // will print: 395
let currency = str.removeCharacters(from: CharacterSet.alphanumerics)
print(currency) // will print: £
let amount = currency + digit
print(amount) // will print: £3,95
Update 2:
let string = "£3,95SS"
let pattern = "-?\\d+(,\\d+)*?\\.?\\d+?"
do {
let regex = try NSRegularExpression(pattern: pattern, options: [])
if let match = regex.firstMatch(in: string, range: NSRange(location: 0, length: string.utf16.count)) {
let range = match.range
let start = string.index(string.startIndex, offsetBy: range.location)
let end = string.index(start, offsetBy: range.length)
let digit = string.substring(with: start..<end)
print(digit) //3,95
let symbol = string.removeCharacters(from: CharacterSet.symbols.inverted)
print(symbol) // £
print(symbol + digit) //£3,95
} else {
print("Not found")
}
} catch {
print("Regex Error:", error)
}

Named capture groups in NSRegularExpression - get a range's group's name

Apple says that NSRegularExpression is based on the ICU Regular Expression library: https://developer.apple.com/library/ios/documentation/Foundation/Reference/NSRegularExpression_Class/
The pattern syntax currently supported is that specified by ICU. The ICU regular expressions are described at http://userguide.icu-project.org/strings/regexp.
That page (on icu-project.org) claims that Named Capture Groups are now supported, using the same syntax as .NET Regular Expressions:
(?<name>...) Named capture group. The <angle brackets> are literal - they appear in the pattern.
I have written a program which gets a single match with multiple ranges which seem correct - though each range is returned twice (for reasons unknown) - but the only information I have is the range's index and its text range.
For example, the regex: ^(?<foo>foo)\.(?<bar>bar)\.(?<bar2>baz)$ with test string foo.bar.baz
Gives me these results:
Idx Start Length Text
0 0 11 foo.bar.baz
1 0 3 foo
2 4 3 bar
3 8 3 baz
Is there any way to know that "baz" came from the capture-group bar2?
Since iOS11 named capture groups are supported. NSTextCheckingResult has the function open func range(withName name: String) -> NSRange.
Using the regex: ^(?<foo>foo)\.(?<bar>bar)\.(?<bar2>baz)$ with the test string foo.bar.baz gives 4 result matches. The function match.range(withName: "bar2") returns the range for the String baz
I have worked on the example as created by Daniele Bernardini.
There are a number of changes:
First of all the code is now compatible with Swift 3
The code of Daniele has a defect that it will not capture nested captures. I have made the regular expressions slightly less aggressive to allow for nesting of capture groups.
I prefer to actually receive the actual captures in a Set. I added a method named captureGroups() that returns the captures as a string instead of a range.
import Foundation
extension String {
func matchingStrings(regex: String) -> [[String]] {
guard let regex = try? NSRegularExpression(pattern: regex, options: []) else { return [] }
let nsString = self as NSString
let results = regex.matches(in: self, options: [], range: NSMakeRange(0, nsString.length))
return results.map { result in
(0..<result.numberOfRanges).map { result.rangeAt($0).location != NSNotFound
? nsString.substring(with: result.rangeAt($0))
: ""
}
}
}
func range(from nsRange: NSRange) -> Range<String.Index>? {
guard
let from16 = utf16.index(utf16.startIndex, offsetBy: nsRange.location, limitedBy: utf16.endIndex),
let to16 = utf16.index(utf16.startIndex, offsetBy: nsRange.location + nsRange.length, limitedBy: utf16.endIndex),
let from = from16.samePosition(in: self),
let to = to16.samePosition(in: self)
else { return nil }
return from ..< to
}
}
extension NSRegularExpression {
typealias GroupNamesSearchResult = (NSTextCheckingResult, NSTextCheckingResult, Int)
private func textCheckingResultsOfNamedCaptureGroups() -> [String:GroupNamesSearchResult] {
var groupnames = [String:GroupNamesSearchResult]()
guard let greg = try? NSRegularExpression(pattern: "^\\(\\?<([\\w\\a_-]*)>$", options: NSRegularExpression.Options.dotMatchesLineSeparators) else {
// This never happens but the alternative is to make this method throwing
return groupnames
}
guard let reg = try? NSRegularExpression(pattern: "\\(.*?>", options: NSRegularExpression.Options.dotMatchesLineSeparators) else {
// This never happens but the alternative is to make this method throwing
return groupnames
}
let m = reg.matches(in: self.pattern, options: NSRegularExpression.MatchingOptions.withTransparentBounds, range: NSRange(location: 0, length: self.pattern.utf16.count))
for (n,g) in m.enumerated() {
let r = self.pattern.range(from: g.rangeAt(0))
let gstring = self.pattern.substring(with: r!)
let gmatch = greg.matches(in: gstring, options: NSRegularExpression.MatchingOptions.anchored, range: NSRange(location: 0, length: gstring.utf16.count))
if gmatch.count > 0{
let r2 = gstring.range(from: gmatch[0].rangeAt(1))!
groupnames[gstring.substring(with: r2)] = (g, gmatch[0],n)
}
}
return groupnames
}
func indexOfNamedCaptureGroups() throws -> [String:Int] {
var groupnames = [String:Int]()
for (name,(_,_,n)) in try self.textCheckingResultsOfNamedCaptureGroups() {
groupnames[name] = n + 1
}
return groupnames
}
func rangesOfNamedCaptureGroups(match:NSTextCheckingResult) throws -> [String:Range<Int>] {
var ranges = [String:Range<Int>]()
for (name,(_,_,n)) in try self.textCheckingResultsOfNamedCaptureGroups() {
ranges[name] = match.rangeAt(n+1).toRange()
}
return ranges
}
private func nameForIndex(_ index: Int, from: [String:GroupNamesSearchResult]) -> String? {
for (name,(_,_,n)) in from {
if (n + 1) == index {
return name
}
}
return nil
}
func captureGroups(string: String, options: NSRegularExpression.MatchingOptions = []) -> [String:String] {
return captureGroups(string: string, options: options, range: NSRange(location: 0, length: string.utf16.count))
}
func captureGroups(string: String, options: NSRegularExpression.MatchingOptions = [], range: NSRange) -> [String:String] {
var dict = [String:String]()
let matchResult = matches(in: string, options: options, range: range)
let names = try self.textCheckingResultsOfNamedCaptureGroups()
for (n,m) in matchResult.enumerated() {
for i in (0..<m.numberOfRanges) {
let r2 = string.range(from: m.rangeAt(i))!
let g = string.substring(with: r2)
if let name = nameForIndex(i, from: names) {
dict[name] = g
}
}
}
return dict
}
}
An example of using the new method captureGroups() is:
let node = "'test_literal'"
let regex = try NSRegularExpression(pattern: "^(?<all>(?<delimiter>'|\")(?<value>.*)(?:\\k<delimiter>))$", options: NSRegularExpression.Options.dotMatchesLineSeparators)
let match2 = regex.captureGroups(string: node, options: NSRegularExpression.MatchingOptions.anchored)
print(match2)
And it will print:
["delimiter": "\'", "all": "\'test_literal\'", "value": "test_literal"]
I was facing the same issue and ended up backing my own solution. Feel free to comment or improve ;-)
extension NSRegularExpression {
typealias GroupNamesSearchResult = (NSTextCheckingResult, NSTextCheckingResult, Int)
private func textCheckingResultsOfNamedCaptureGroups() throws -> [String:GroupNamesSearchResult] {
var groupnames = [String:GroupNamesSearchResult]()
let greg = try NSRegularExpression(pattern: "^\\(\\?<([\\w\\a_-]*)>.*\\)$", options: NSRegularExpressionOptions.DotMatchesLineSeparators)
let reg = try NSRegularExpression(pattern: "\\([^\\(\\)]*\\)", options: NSRegularExpressionOptions.DotMatchesLineSeparators)
let m = reg.matchesInString(self.pattern, options: NSMatchingOptions.WithTransparentBounds, range: NSRange(location: 0, length: self.pattern.utf16.count))
for (n,g) in m.enumerate() {
let gstring = self.pattern.substringWithRange(g.rangeAtIndex(0).toRange()!)
print(self.pattern.substringWithRange(g.rangeAtIndex(0).toRange()!))
let gmatch = greg.matchesInString(gstring, options: NSMatchingOptions.Anchored, range: NSRange(location: 0, length: gstring.utf16.count))
if gmatch.count > 0{
groupnames[gstring.substringWithRange(gmatch[0].rangeAtIndex(1).toRange()!)] = (g,gmatch[0],n)
}
}
return groupnames
}
func indexOfNamedCaptureGroups() throws -> [String:Int] {
var groupnames = [String:Int]()
for (name,(_,_,n)) in try self.textCheckingResultsOfNamedCaptureGroups() {
groupnames[name] = n + 1
}
//print(groupnames)
return groupnames
}
func rangesOfNamedCaptureGroups(match:NSTextCheckingResult) throws -> [String:Range<Int>] {
var ranges = [String:Range<Int>]()
for (name,(_,_,n)) in try self.textCheckingResultsOfNamedCaptureGroups() {
ranges[name] = match.rangeAtIndex(n+1).toRange()
}
return ranges
}
}
Here is an usage example:
let node = "'test_literal'"
let regex = try NSRegularExpression(pattern: "^(?<delimiter>'|\")(?<value>.*)(?:\\k<delimiter>)$", options: NSRegularExpressionOptions.DotMatchesLineSeparators)
let match = regex.matchesInString(node, options: NSMatchingOptions.Anchored, range: NSRange(location: 0,length: node.utf16.count))
if match.count > 0 {
let ranges = try regex.rangesOfNamedCaptureGroups(match[0])
guard let range = ranges["value"] else {
}
}

Resources