Slow Swift String Performance

Slow Swift String Performance - ios

I am trying to solve the Palindrome Partitioning Question. You can find the question in https://leetcode.com/problems/palindrome-partitioning/.
And I came up with the solution:
func partition(_ s: String) -> [[String]] {
var result: [[String]] = []
func dfs(string: String, partiton: [String]) {
if string.characters.count == 0 {
result.append(partiton)
return
}
for length in 1...string.characters.count {
let endIndex = string.index(string.startIndex, offsetBy: length-1)
let part = string[string.startIndex...endIndex]
if isPalindrome(part) {
let leftPart = string[string.index(after: endIndex)..<string.endIndex]
print("string: \(string) part: \(part) leftpart: \(leftPart)")
dfs(string: leftPart, partiton: partiton + [part])
}
}
}
func isPalindrome(_ s: String) -> Bool {
if String(s.characters.reversed()) == s {
return true
} else {
return false
}
}
dfs(string: s, partiton: [])
return result
}
But the performance is Bad. Time Limit Exceeded.
But the same idea with Python implementation can pass:
def partition(self, s):
res = []
self.dfs(s, [], res)
return res
def dfs(self, s, path, res):
if not s:
res.append(path)
return
for i in range(1, len(s)+1):
if self.isPal(s[:i]):
self.dfs(s[i:], path+[s[:i]], res)
def isPal(self, s):
return s == s[::-1]
It make me wonder that how to improve the swift implementation and why the swift implementation is slower than python.

A Swift String is a collection of Characters, and a Character represents a single extended grapheme cluster, that can be one or more
Unicode scalars. That makes some index operations like "skip the first N characters" slow.
But the first improvement is to "short-circuit" the isPalindrome()
function. Instead of building the reversed string completely, compare
the character sequence with its reversed sequence and stop as soon
as a difference is found:
func isPalindrome(_ s: String) -> Bool {
return !zip(s.characters, s.characters.reversed()).contains { $0 != $1 }
}
s.characters.reversed() does not create a new collection in reverse
order, it just enumerates the characters from back to front.
With String(s.characters.reversed()) as in your method however,
you force the creation of a new collection for the reversed string,
that makes it slow.
For the 110-character string
let string = String(repeating: "Hello world", count: 10)
this reduces the computation time from about 6 sec to 1.2 sec in my test.
Next, avoid index calculations like
let endIndex = string.index(string.startIndex, offsetBy: length-1)
and iterate over the character index itself instead:
func partition(_ s: String) -> [[String]] {
var result: [[String]] = []
func dfs(string: String, partiton: [String]) {
if string.isEmpty {
result.append(partiton)
return
}
var idx = string.startIndex
repeat {
string.characters.formIndex(after: &idx)
let part = string.substring(to: idx)
if isPalindrome(part) {
let leftPart = string.substring(from: idx)
dfs(string: leftPart, partiton: partiton + [part])
}
} while idx != string.endIndex
}
func isPalindrome(_ s: String) -> Bool {
return !zip(s.characters, s.characters.reversed()).contains { $0 != $1 }
}
dfs(string: s, partiton: [])
return result
}
Computation time is now 0.7 sec.
The next step is to avoid string indexing totally, and work with
an array of characters, because array indexing is fast. Even better,
use array slices which are fast to create and reference the original
array elements:
func partition(_ s: String) -> [[String]] {
var result: [[String]] = []
func dfs(chars: ArraySlice<Character>, partiton: [String]) {
if chars.isEmpty {
result.append(partiton)
return
}
for length in 1...chars.count {
let part = chars.prefix(length)
if isPalindrome(part) {
let leftPart = chars.dropFirst(length)
dfs(chars: leftPart, partiton: partiton + [String(part)])
}
}
}
func isPalindrome(_ c: ArraySlice<Character>) -> Bool {
return !zip(c, c.reversed()).contains { $0 != $1 }
}
dfs(chars: ArraySlice(s.characters), partiton: [])
return result
}
Computation time is now 0.08 sec.
If your string contains only characters in the "basic multilingual plane" (i.e. <= U+FFFF) then you can work with UTF-16 code points instead:
func partition(_ s: String) -> [[String]] {
var result: [[String]] = []
func dfs(chars: ArraySlice<UInt16>, partiton: [String]) {
if chars.isEmpty {
result.append(partiton)
return
}
for length in 1...chars.count {
let part = chars.prefix(length)
if isPalindrome(part) {
let leftPart = chars.dropFirst(length)
part.withUnsafeBufferPointer {
dfs(chars: leftPart, partiton: partiton + [String(utf16CodeUnits: $0.baseAddress!, count: length)])
}
}
}
}
func isPalindrome(_ c: ArraySlice<UInt16>) -> Bool {
return !zip(c, c.reversed()).contains { $0 != $1 }
}
dfs(chars: ArraySlice(s.utf16), partiton: [])
return result
}
Computation time is now 0.04 sec for the 110 character test string.
So some tips which potentially can improve the performance when working with Swift strings are
Iterate over the characters/indices sequentially. Avoid "jumping"
to the n'th position.
If you need "random" access to all characters, convert the string
to an array first.
Working with the UTF-16 view of a string can be faster than working
with the characters view.
Of course it depends on the actual use-case. In this application,
we were able to reduce the computation time from 6 sec to 0.04 sec,
that is a factor of 150.

Related

All possible combinations (subsets) in swift

I'm having this issue of trying to trying to recursively print out all subsets of the giving array of String(characters) using swift. The value is ("a1b2"). The output should have 4 subsets.
Currently stuck here:
func overall(string: String) {
helper(string: string, i: 0, slate: "")
}
func helper(string: String, i: Int, slate: String) {
var result = [Any]()
let word = Array(string)
var counter = i
if string.count == counter {
result.append(slate)
} else {
if word[i].isNumber {
counter += 1
helper(string: string, i: counter, slate: slate + String(word[i]))
} else if word[i].isLowercase {
counter += 1
helper(string: string, i: counter, slate: slate + String(word[i]).uppercased())
} else {
counter += 1
helper(string: string, i: counter, slate: slate + String(word[i]).lowercased())
}
}
}
overall(string: "a1b2")
I'm having issues creating a base case in the helper functions. Also I'm unsure if I'm using recursion properly. Could you please help with an explanation, it will be greatly appreciated.

I'm sure this is totally useless in general, but just for fun, here's an amusing recursion-free solution for the particular problem given, where we know the string is exactly four characters and we know that either uppercased or lowercased must be applied to each character:
let s = "a1b2"
let arr = Array(s).map(String.init)
var result : Set<String> = []
for i in 0b0000...0b1111 {
var tog = [Bool]()
for sh in 0...3 { tog.append(i & 1<<sh == 0) }
var word = ""
for ix in 0...3 {
let f = tog[ix] ? arr[ix].lowercased : arr[ix].uppercased
word = word + f()
}
result.insert(word)
}
print(result)

The OP clarified in comments that he wanted the case-variants of the original string, not "subsets" as originally stated
[Edit] I originally had a paragraph here about String.count, however, my memory must have been in error, because Apple's documentation does state that String.count is in fact the number of Characters, which is what we would all want it to be anyway. I hope my error didn't throw anyone too far off.
You don't need any counters. All you need is the first character, and recurse on the rest of the string.
The thing is, when you have a letter as your first you need to preprend both the upper and lower case variants to all of the strings returned by the recursive call.
The base case is at the end of the string, in which case you return an an array containing just the empty string.
Here's my implementation:
func caseVariants(of s: String) -> [String]
{
func caseVariants(of s: Substring) -> [String]
{
guard let c = s.first else { return [""] } // base case
let remainder = s[s.index(after: s.startIndex)...]
let remainderVariants = caseVariants(of: remainder)
var results: [String] = []
if c.isLetter
{
results.append(
contentsOf: remainderVariants.map {
"\(c.uppercased())" + $0
}
)
results.append(
contentsOf: remainderVariants.map {
"\(c.lowercased())" + $0
}
)
}
else
{
results.append(
contentsOf: remainderVariants.map { "\(c)" + $0 }
)
}
return results
}
return caseVariants(of: s[...]).sorted()
}
print("Case variants:")
for s in caseVariants(of: "a1b2") { print("\"\(s)\"") }
The output is:
Case variants:
"A1B2"
"A1b2"
"a1B2"
"a1b2"
[EDIT] in comments, OP asked what if .startIndex were disallowed (such as in an interview). While I think such a restriction is insane, there is an easy solution, and it's a one-line, quite reasonable change to my previous code. Change this line:
let remainder = s[s.index(after: s.startIndex)...]
to use .dropFirst()
let remainder = s.dropFirst()
If we look at the implementation of dropFirst in the Collection protocol in the standard library:
#inlinable
public __consuming func dropFirst(_ k: Int = 1) -> SubSequence {
_precondition(k >= 0, "Can't drop a negative number of elements from a collection")
let start = index(startIndex, offsetBy: k, limitedBy: endIndex) ?? endIndex
return self[start..<endIndex]
}
We see that the use of dropFirst will use the default value of 1 for k. In that case, when we've already checked that we're not at the end of the string, the line
let start = index(startIndex, offsetBy: k, limitedBy: endIndex) ?? endIndex
is equivalent to
let start = index(after: startIndex)
which means that the returned substring is
return self[index(after: startIndex)..<endIndex]
which is just the canonical way of saying:
return self[index(after: startIndex)...]
So a version using dropFirst() is identical to the original solution once inlining has done its thing.

Return multiple of string characters

I want to create a function where an input of a string is taken in and the output returns a string of how many times the number is repeated. For example if my string is "1111223444" it will return "41221334", because there are four 1's, two 2's, one 3, and three 4's. So an input of "2234467" would return "2213241617". I am not sure if a dictionary would be the best way to implement it and I am really confused. I have started the function however don't know where to go from here. Any tips or resources will be helpful.
func stringOutput(input: String) -> String {
var result = ""
var lastCharacter: Character
var count = 0
var countDict: [String: Int] = [:]
for item in input {
countDict[item] as Character
}
return result
}

Your function computes the next term of the Look-and-say sequence. Here is an implementation (essentially taken from Leetcode 38: The “count-and-say” sequence on Code Review). Instead of traversing the string, we directly search for the next index of a character different from the current one. Neither the first nor the last run has be be treated specially:
extension String {
func lookAndSay() -> String {
var result = ""
var fromIndex = startIndex // Start of current run
while fromIndex != endIndex {
let char = self[fromIndex] // Current character
// Find start of next run
let toIndex = self[fromIndex...].firstIndex(where: { $0 != char }) ?? endIndex
// Compute length of run, and append it to the result
let len = distance(from: fromIndex, to: toIndex)
result += "\(len)\(char)"
// Continue with next run
fromIndex = toIndex
}
return result
}
}
print("1111223444".lookAndSay()) // 41221334
print("2234467".lookAndSay()) // 2213241617

Try the snippet below.
func stringOutput(input: String) -> String {
var result = ""
var lastKnownCharacter: Character? = nil
var lastKnownCharacterCount: Int = 0
for character in input {
if lastKnownCharacter == nil {
lastKnownCharacter = character
lastKnownCharacterCount = 1
} else if lastKnownCharacter == character {
lastKnownCharacterCount += 1
} else {
result.append("\(lastKnownCharacterCount)\(lastKnownCharacter!)")
lastKnownCharacter = character
lastKnownCharacterCount = 1
}
}
result.append("\(lastKnownCharacterCount)\(lastKnownCharacter!)")
return result
}

Swift: Map Array of Objects Alphabetically by Name(String) into Separate Letter Collections within a new Array

I have created a struct called Contact which represents a human contact where there are currently a few in an Array. They are already sorted alphabetically however I would like to sort them alphabetically by the name property which is a String BUT I don't just want to have them in order in a single array, I would like to split the objects out into different collections which is corresponded by the first letter of their name. eg. "A" contains 2 objects where a Contacts name begins with A, "B" for names like Bobby, Brad etc.. and so on and so forth.
let contactData:[Contact] = [
Contact(id: 1, available: true, name: "Adam"),
Contact(id: 2, available: true, name: "Adrian"),
Contact(id: 3, available: true, name: "Balthazar"),
Contact(id: 4, available: true, name: "Bobby")
]
I would like to create something like
let sectionTitles = ["A", "B"]
let sortedContactData = [
[
Contact(name: "Adam"),
Contact(name: "Adrian")
],
[
Contact(name:"Balthazar")
Contact(name:"Bobby")
]
]
Or something similar...
The end result is that I would like to display them into a UITableView with the letters in Sections and the Objects into indexPath.rows much like how the Contacts app native to the iPhone does it. I am actually not sure whether this is the most ideal way to achieve this result so I welcome any challenges to this question!

let sortedContacts = contactData.sorted(by: { $0.name < $1.name }) // sort the Array first.
print(sortedContacts)
let groupedContacts = sortedContacts.reduce([[Contact]]()) {
guard var last = $0.last else { return [[$1]] }
var collection = $0
if last.first!.name.characters.first == $1.name.characters.first {
last += [$1]
collection[collection.count - 1] = last
} else {
collection += [[$1]]
}
return collection
}
print(groupedContacts)
sort the list. O(nlogn) , where n is the number of items in the Array(contactData).
use reduce to iterate each contact
in the list, then either add it to new group, or the last one. O(n), where n is the number of items in the Array(sortedContacts).
If you need to have a better printed information, you better make Contact conforms to protocol CustomStringConvertible

Chunk up a collection based on a predicate
We could let ourselves be inspired by Github user oisdk:s chunk(n:) method of collection, and modify this to chunk up a Collection instance based on a supplied (Element, Element) -> Bool predicate, used to decide whether a given element should be included in the same chunk as the preceeding one.
extension Collection {
func chunk(by predicate: #escaping (Iterator.Element, Iterator.Element) -> Bool) -> [SubSequence] {
var res: [SubSequence] = []
var i = startIndex
var k: Index
while i != endIndex {
k = endIndex
var j = index(after: i)
while j != endIndex {
if !predicate(self[i], self[j]) {
k = j
break
}
formIndex(after: &j)
}
res.append(self[i..<k])
i = k
}
return res
}
}
Applying this to your example
Example setup (where we, as you've stated, assume that the contactData array is already sorted).
struct Contact {
let id: Int
var available: Bool
let name: String
}
let contactData: [Contact] = [
Contact(id: 1, available: true, name: "Adam"),
Contact(id: 2, available: true, name: "Adrian"),
Contact(id: 3, available: true, name: "Balthazar"),
Contact(id: 4, available: true, name: "Bobby")
]
Using the chunk(by:) method above to split the contactData array into chunks of Contact instances, based on the initial letter of their names:
let groupedContactData = contactData.chunk {
$0.name.characters.first.map { String($0) } ?? "" ==
$1.name.characters.first.map { String($0) } ?? ""
}
for group in groupedContactData {
print(group.map { $0.name })
} /* ["Adam", "Adrian"]
["Balthazar", "Bobby"] */
Improving the chunk(by:) method above
In my initial (non-compiling) version of chunk(by:) above, I wanted to make use of the index(where:) method available to Slice instances:
// does not compile!
extension Collection {
func chunk(by predicate: #escaping (Iterator.Element, Iterator.Element) -> Bool) -> [SubSequence] {
var res: [SubSequence] = []
var i = startIndex
var j = index(after: i)
while i != endIndex {
j = self[j..<endIndex]
.index(where: { !predicate(self[i], $0) } ) ?? endIndex
/* ^^^^^ error: incorrect argument label in call
(have 'where:', expected 'after:') */
res.append(self[i..<j])
i = j
}
return res
}
}
But it seems as if it can not resolve this method correctly, probably due to a lacking constraint (Collection where ...) in the extension. Maybe someone can shed light on how to allow the stdlib-simplified extension above?
We may, however, implement this somewhat briefer extension if we apply it to Array, in which case index(where:) can be successfully called on the ArraySlice instance (self[...]):
// ok
extension Array {
func chunk(by predicate: #escaping (Iterator.Element, Iterator.Element) -> Bool) -> [SubSequence] {
var res: [SubSequence] = []
var i = startIndex
var j = index(after: i)
while i != endIndex {
j = self[j..<endIndex]
.index(where: { !predicate(self[i], $0) } ) ?? endIndex
res.append(self[i..<j])
i = j
}
return res
}
}

IMHO there is no single-map way to do that, so the algorithm is:
var sectionedData: [String: [Contact]] = [:]
contactData.forEach {
guard let firstLetter = $0.name.characters.first else {
sectionedData["#"] = (sectionedData["#"] ?? []) + [$0]
return
}
let firstLetterStr = String(firstLetter)
sectionedData[firstLetterStr] = (sectionedData[firstLetterStr] ?? []) + [$0]
}
let sortedContactData = sectionedData.sorted(by: { $0.0.key < $0.1.key })

A solution for filtering and splitting a collection to smaller collections as many as given predicates.
e.g. given array of ints [1, 2, 3, 4] applying predicates: odd, even and >3,
result would be [ [1, 3], [2, 4], [4] ]
Note: The subsets can be repetitive depending on the given predicates. (I'm curious if this can be improved. reduce has complexity: O(n))
extension Collection {
func filterParts(_ predicates: ((Element) -> Bool)...) -> [ [Element] ] {
let empty = predicates.map { _ -> [Element] in return [] }
let enumeratedPredicates = predicates.enumerated()
return reduce(empty) { (result, element) in
var result = result
enumeratedPredicates.forEach { offset, predicate in
if predicate(element) {
result[offset].append(element)
}
}
return result
}
}
}

Use Dictionary's init(grouping:by:) like so:
lazy var sectionDictionary: Dictionary<String, [Contact]> = {
return Dictionary(grouping: contactData, by: {
// assumes title is a non-empty string
let name = $0.name
let normalizedName = name.folding(options: [.diacriticInsensitive, .caseInsensitive], locale: .current)
let firstCharAsString = String(normalizedName.first!).uppercased()
return firstCharAsString
})
}()
I spelled out the different transformation steps but you can combine them in a one-liner if you like.
This yields a dictionary with the section names as keys and arrays of objects as values.
From there you can easily extract the section array of arrays and you get your section names array for free:
lazy var sectionTitles: [String] = {
return self.sectionDictionary.keys.sorted()
}()
lazy var sections: Array<[String]> = {
return self.sectionTitles.map { sectionDictionary[$0]! }
}()
Please mind that I use some force unwrapping which you should guard from in production code.

method that removes all twice duplicate characters in String

I wrote a method that removes all 2 duplicate characters in String, for example
I need delete only char that contains twice, for example
"bndkss" -> "bndk"
"nnmmhj" - > "hj"
"aaabbaac" -> "ac
"abba" -> ""
I wrote on objc and everything works, but Swift is not working, help please, where did I go wrong?
override func viewDidLoad() {
super.viewDidLoad()
let string = "baab"
print("before: \(string)")
let stringAfter = checkString(string: string)
print("after: \(stringAfter)")
}
func checkString(string : String) -> String {
var tempString = string
for (index, element) in string.characters.enumerated() {
for (index2, element2) in string.characters.enumerated() {
if element == element2 && index != index2 {
if index > index2 {
tempString.remove(at: tempString.index(tempString.startIndex, offsetBy: index))
tempString.remove(at: tempString.index(tempString.startIndex, offsetBy: index2))
} else {
tempString.remove(at: tempString.index(tempString.startIndex, offsetBy: index2))
tempString.remove(at: tempString.index(tempString.startIndex, offsetBy: index))
}
if tempString.characters.count < 1 {
return ""
} else {
checkString(string: tempString)
}
} else {
if index == tempString.characters.count - 1 && index2 == tempString.characters.count - 1 {
return tempString
}
}
}
}
return ""
}
Updates:
just need
return checkString(string: tempString)
instead
checkString(string: tempString)

There are two problems in your code, such as
After removing characters in tempString, the indices index and index2 to long refer to the original characters in tempString.
Wrong characters are removed as a consequence.
You call checkString() recursively but discard the result.
Update: As you already noticed in the meantime, return checkString(string: tempString) solves these problems.
Here is an alternative implementation. The idea is to use a dictionary
to remember where a character has been seen last, and an index set
which keeps track of the positions of the characters which are to
be preserved. Instead of two nested loops and recursion, two "simple"
loops are used here, plus the cost of the dictionary and set operations.
func removeDuplicateCharacters(string: String) -> String {
var seen = [Character: Int]()
var keep = IndexSet(integersIn: 0..<string.characters.count)
for (idx, c) in string.characters.enumerated() {
if let prevIndex = seen[c] {
keep.remove(prevIndex)
keep.remove(idx)
seen.removeValue(forKey: c)
} else {
seen[c] = idx
}
}
return String(keep.map { string[string.index(string.startIndex, offsetBy: $0)] })
}
Examples:
print(removeDuplicateCharacters(string: "bndkss")) // ""bndk"
print(removeDuplicateCharacters(string: "nnmmhj")) // "jh"
print(removeDuplicateCharacters(string: "abba")) // ""
print(removeDuplicateCharacters(string: "aaabbaac")) // "ac"

Martin wrote a much cleaner version than myself, but I worked on this for a little while so I figured I'd post it to show you another way it could have been accomplished.
func removeDuplicates(from original: String) -> String {
var originalString = original
var newString = ""
for character in originalString.characters {
if !newString.contains("\(character)") {
newString.append(character)
originalString = originalString.characters.filter { $0.description != "\(character)" }.map { "\($0)" }.joined(separator: "")
} else {
newString = newString.characters.filter { $0.description != "\(character)" }.map { "\($0)" }.joined(separator: "")
}
}
return newString
}

How to read and slice a file chunk by chunk, then display as String? [duplicate]

I am trying to read a file given in an NSURL and load it into an array, with items separated by a newline character \n.
Here is the way I've done it so far:
var possList: NSString? = NSString.stringWithContentsOfURL(filePath.URL) as? NSString
if var list = possList {
list = list.componentsSeparatedByString("\n") as NSString[]
return list
}
else {
//return empty list
}
I'm not very happy with this for a couple of reasons. One, I'm working with files that range from a few kilobytes to hundreds of MB in size. As you can imagine, working with strings this large is slow and unwieldy. Secondly, this freezes up the UI when it's executing--again, not good.
I've looked into running this code in a separate thread, but I've been having trouble with that, and besides, it still doesn't solve the problem of dealing with huge strings.
What I'd like to do is something along the lines of the following pseudocode:
var aStreamReader = new StreamReader(from_file_or_url)
while aStreamReader.hasNextLine == true {
currentline = aStreamReader.nextLine()
list.addItem(currentline)
}
How would I accomplish this in Swift?
A few notes about the files I'm reading from: All files consist of short (<255 chars) strings separated by either \n or \r\n. The length of the files range from ~100 lines to over 50 million lines. They may contain European characters, and/or characters with accents.

(The code is for Swift 2.2/Xcode 7.3 now. Older versions can be found in the edit history if somebody needs it. An updated version for Swift 3 is provided at the end.)
The following Swift code is heavily inspired by the various answers to
How to read data from NSFileHandle line by line?. It reads from the file in chunks, and converts complete lines to strings.
The default line delimiter (\n), string encoding (UTF-8) and chunk size (4096)
can be set with optional parameters.
class StreamReader {
let encoding : UInt
let chunkSize : Int
var fileHandle : NSFileHandle!
let buffer : NSMutableData!
let delimData : NSData!
var atEof : Bool = false
init?(path: String, delimiter: String = "\n", encoding : UInt = NSUTF8StringEncoding, chunkSize : Int = 4096) {
self.chunkSize = chunkSize
self.encoding = encoding
if let fileHandle = NSFileHandle(forReadingAtPath: path),
delimData = delimiter.dataUsingEncoding(encoding),
buffer = NSMutableData(capacity: chunkSize)
{
self.fileHandle = fileHandle
self.delimData = delimData
self.buffer = buffer
} else {
self.fileHandle = nil
self.delimData = nil
self.buffer = nil
return nil
}
}
deinit {
self.close()
}
/// Return next line, or nil on EOF.
func nextLine() -> String? {
precondition(fileHandle != nil, "Attempt to read from closed file")
if atEof {
return nil
}
// Read data chunks from file until a line delimiter is found:
var range = buffer.rangeOfData(delimData, options: [], range: NSMakeRange(0, buffer.length))
while range.location == NSNotFound {
let tmpData = fileHandle.readDataOfLength(chunkSize)
if tmpData.length == 0 {
// EOF or read error.
atEof = true
if buffer.length > 0 {
// Buffer contains last line in file (not terminated by delimiter).
let line = NSString(data: buffer, encoding: encoding)
buffer.length = 0
return line as String?
}
// No more lines.
return nil
}
buffer.appendData(tmpData)
range = buffer.rangeOfData(delimData, options: [], range: NSMakeRange(0, buffer.length))
}
// Convert complete line (excluding the delimiter) to a string:
let line = NSString(data: buffer.subdataWithRange(NSMakeRange(0, range.location)),
encoding: encoding)
// Remove line (and the delimiter) from the buffer:
buffer.replaceBytesInRange(NSMakeRange(0, range.location + range.length), withBytes: nil, length: 0)
return line as String?
}
/// Start reading from the beginning of file.
func rewind() -> Void {
fileHandle.seekToFileOffset(0)
buffer.length = 0
atEof = false
}
/// Close the underlying file. No reading must be done after calling this method.
func close() -> Void {
fileHandle?.closeFile()
fileHandle = nil
}
}
Usage:
if let aStreamReader = StreamReader(path: "/path/to/file") {
defer {
aStreamReader.close()
}
while let line = aStreamReader.nextLine() {
print(line)
}
}
You can even use the reader with a for-in loop
for line in aStreamReader {
print(line)
}
by implementing the SequenceType protocol (compare http://robots.thoughtbot.com/swift-sequences):
extension StreamReader : SequenceType {
func generate() -> AnyGenerator<String> {
return AnyGenerator {
return self.nextLine()
}
}
}
Update for Swift 3/Xcode 8 beta 6: Also "modernized" to
use guard and the new Data value type:
class StreamReader {
let encoding : String.Encoding
let chunkSize : Int
var fileHandle : FileHandle!
let delimData : Data
var buffer : Data
var atEof : Bool
init?(path: String, delimiter: String = "\n", encoding: String.Encoding = .utf8,
chunkSize: Int = 4096) {
guard let fileHandle = FileHandle(forReadingAtPath: path),
let delimData = delimiter.data(using: encoding) else {
return nil
}
self.encoding = encoding
self.chunkSize = chunkSize
self.fileHandle = fileHandle
self.delimData = delimData
self.buffer = Data(capacity: chunkSize)
self.atEof = false
}
deinit {
self.close()
}
/// Return next line, or nil on EOF.
func nextLine() -> String? {
precondition(fileHandle != nil, "Attempt to read from closed file")
// Read data chunks from file until a line delimiter is found:
while !atEof {
if let range = buffer.range(of: delimData) {
// Convert complete line (excluding the delimiter) to a string:
let line = String(data: buffer.subdata(in: 0..<range.lowerBound), encoding: encoding)
// Remove line (and the delimiter) from the buffer:
buffer.removeSubrange(0..<range.upperBound)
return line
}
let tmpData = fileHandle.readData(ofLength: chunkSize)
if tmpData.count > 0 {
buffer.append(tmpData)
} else {
// EOF or read error.
atEof = true
if buffer.count > 0 {
// Buffer contains last line in file (not terminated by delimiter).
let line = String(data: buffer as Data, encoding: encoding)
buffer.count = 0
return line
}
}
}
return nil
}
/// Start reading from the beginning of file.
func rewind() -> Void {
fileHandle.seek(toFileOffset: 0)
buffer.count = 0
atEof = false
}
/// Close the underlying file. No reading must be done after calling this method.
func close() -> Void {
fileHandle?.closeFile()
fileHandle = nil
}
}
extension StreamReader : Sequence {
func makeIterator() -> AnyIterator<String> {
return AnyIterator {
return self.nextLine()
}
}
}

Efficient and convenient class for reading text file line by line (Swift 4, Swift 5)
Note: This code is platform independent (macOS, iOS, ubuntu)
import Foundation
/// Read text file line by line in efficient way
public class LineReader {
public let path: String
fileprivate let file: UnsafeMutablePointer<FILE>!
init?(path: String) {
self.path = path
file = fopen(path, "r")
guard file != nil else { return nil }
}
public var nextLine: String? {
var line:UnsafeMutablePointer<CChar>? = nil
var linecap:Int = 0
defer { free(line) }
return getline(&line, &linecap, file) > 0 ? String(cString: line!) : nil
}
deinit {
fclose(file)
}
}
extension LineReader: Sequence {
public func makeIterator() -> AnyIterator<String> {
return AnyIterator<String> {
return self.nextLine
}
}
}
Usage:
guard let reader = LineReader(path: "/Path/to/file.txt") else {
return; // cannot open file
}
for line in reader {
print(">" + line.trimmingCharacters(in: .whitespacesAndNewlines))
}
Repository on github

Swift 4.2 Safe syntax
class LineReader {
let path: String
init?(path: String) {
self.path = path
guard let file = fopen(path, "r") else {
return nil
}
self.file = file
}
deinit {
fclose(file)
}
var nextLine: String? {
var line: UnsafeMutablePointer<CChar>?
var linecap = 0
defer {
free(line)
}
let status = getline(&line, &linecap, file)
guard status > 0, let unwrappedLine = line else {
return nil
}
return String(cString: unwrappedLine)
}
private let file: UnsafeMutablePointer<FILE>
}
extension LineReader: Sequence {
func makeIterator() -> AnyIterator<String> {
return AnyIterator<String> {
return self.nextLine
}
}
}
Usage:
guard let reader = LineReader(path: "/Path/to/file.txt") else {
return
}
reader.forEach { line in
print(line.trimmingCharacters(in: .whitespacesAndNewlines))
}

I'm late to the game, but here's small class I wrote for that purpose. After some different attempts (try to subclass NSInputStream) I found this to be a reasonable and simple approach.
Remember to #import <stdio.h> in your bridging header.
// Use is like this:
let readLine = ReadLine(somePath)
while let line = readLine.readLine() {
// do something...
}
class ReadLine {
private var buf = UnsafeMutablePointer<Int8>.alloc(1024)
private var n: Int = 1024
let path: String
let mode: String = "r"
private lazy var filepointer: UnsafeMutablePointer<FILE> = {
let csmode = self.mode.withCString { cs in return cs }
let cspath = self.path.withCString { cs in return cs }
return fopen(cspath, csmode)
}()
init(path: String) {
self.path = path
}
func readline() -> String? {
// unsafe for unknown input
if getline(&buf, &n, filepointer) > 0 {
return String.fromCString(UnsafePointer<CChar>(buf))
}
return nil
}
deinit {
buf.dealloc(n)
fclose(filepointer)
}
}

This function takes a file URL and returns a sequence which will return every line of the file, reading them lazily. It works with Swift 5. It relies on the underlying getline:
typealias LineState = (
// pointer to a C string representing a line
linePtr:UnsafeMutablePointer<CChar>?,
linecap:Int,
filePtr:UnsafeMutablePointer<FILE>?
)
/// Returns a sequence which iterates through all lines of the the file at the URL.
///
/// - Parameter url: file URL of a file to read
/// - Returns: a Sequence which lazily iterates through lines of the file
///
/// - warning: the caller of this function **must** iterate through all lines of the file, since aborting iteration midway will leak memory and a file pointer
/// - precondition: the file must be UTF8-encoded (which includes, ASCII-encoded)
func lines(ofFile url:URL) -> UnfoldSequence<String,LineState>
{
let initialState:LineState = (linePtr:nil, linecap:0, filePtr:fopen(url.path,"r"))
return sequence(state: initialState, next: { (state) -> String? in
if getline(&state.linePtr, &state.linecap, state.filePtr) > 0,
let theLine = state.linePtr {
return String.init(cString:theLine)
}
else {
if let actualLine = state.linePtr { free(actualLine) }
fclose(state.filePtr)
return nil
}
})
}
So for instance, here's how you would use it to print every line of a file named "foo" in your app bundle:
let url = NSBundle.mainBundle().urlForResource("foo", ofType: nil)!
for line in lines(ofFile:url) {
// suppress print's automatically inserted line ending, since
// lineGenerator captures each line's own new line character.
print(line, separator: "", terminator: "")
}
I developed this answer by modifying Alex Brown's answer to remove a memory leak mentioned by Martin R's comment, and by updating it to for Swift 5.

Try this answer, or read the Mac OS Stream Programming Guide.
You may find that performance will actually be better using the stringWithContentsOfURL, though, as it will be quicker to work with memory-based (or memory-mapped) data than disc-based data.
Executing it on another thread is well documented, also, for example here.
Update
If you don't want to read it all at once, and you don't want to use NSStreams, then you'll probably have to use C-level file I/O. There are many reasons not to do this - blocking, character encoding, handling I/O errors, speed to name but a few - this is what the Foundation libraries are for. I've sketched a simple answer below that just deals with ACSII data:
class StreamReader {
var eofReached = false
let fileHandle: UnsafePointer<FILE>
init (path: String) {
self.fileHandle = fopen(path.bridgeToObjectiveC().UTF8String, "rb".bridgeToObjectiveC().UTF8String)
}
deinit {
fclose(self.fileHandle)
}
func nextLine() -> String {
var nextChar: UInt8 = 0
var stringSoFar = ""
var eolReached = false
while (self.eofReached == false) && (eolReached == false) {
if fread(&nextChar, 1, 1, self.fileHandle) == 1 {
switch nextChar & 0xFF {
case 13, 10 : // CR, LF
eolReached = true
case 0...127 : // Keep it in ASCII
stringSoFar += NSString(bytes:&nextChar, length:1, encoding: NSASCIIStringEncoding)
default :
stringSoFar += "<\(nextChar)>"
}
} else { // EOF or error
self.eofReached = true
}
}
return stringSoFar
}
}
// OP's original request follows:
var aStreamReader = StreamReader(path: "~/Desktop/Test.text".stringByStandardizingPath)
while aStreamReader.eofReached == false { // Changed property name for more accurate meaning
let currentline = aStreamReader.nextLine()
//list.addItem(currentline)
println(currentline)
}

Or you could simply use a Generator:
let stdinByLine = GeneratorOf({ () -> String? in
var input = UnsafeMutablePointer<Int8>(), lim = 0
return getline(&input, &lim, stdin) > 0 ? String.fromCString(input) : nil
})
Let's try it out
for line in stdinByLine {
println(">>> \(line)")
}
It's simple, lazy, and easy to chain with other swift things like enumerators and functors such as map, reduce, filter; using the lazy() wrapper.
It generalises to all FILE as:
let byLine = { (file:UnsafeMutablePointer<FILE>) in
GeneratorOf({ () -> String? in
var input = UnsafeMutablePointer<Int8>(), lim = 0
return getline(&input, &lim, file) > 0 ? String.fromCString(input) : nil
})
}
called like
for line in byLine(stdin) { ... }

Following up on #dankogai's answer, I made a few modifications for Swift 4+,
let bufsize = 4096
let fp = fopen(jsonURL.path, "r");
var buf = UnsafeMutablePointer<Int8>.allocate(capacity: bufsize)
while (fgets(buf, Int32(bufsize-1), fp) != nil) {
print( String(cString: buf) )
}
buf.deallocate()
This worked for me.
Thanks

Swift 5.5: use url.lines
ADC Docs are here
Example usage:
guard let url = URL(string: "https://www.example.com") else {
return
}
// Manipulating an `Array` in memory seems to be a requirement.
// This will balloon in size as lines of data get added.
var myHugeArray = [String]()
do {
// This should keep the inbound data memory usage low
for try await line in url.lines {
myHugeArray.append(line)
}
} catch {
debugPrint(error)
}
You can use this in a SwiftUI .task { } modifier or wrap this in a Task return type to get its work off the main thread.

It turns out good old-fasioned C API is pretty comfortable in Swift once you grok UnsafePointer. Here is a simple cat that reads from stdin and prints to stdout line-by-line. You don't even need Foundation. Darwin suffices:
import Darwin
let bufsize = 4096
// let stdin = fdopen(STDIN_FILENO, "r") it is now predefined in Darwin
var buf = UnsafePointer<Int8>.alloc(bufsize)
while fgets(buf, Int32(bufsize-1), stdin) {
print(String.fromCString(CString(buf)))
}
buf.destroy()

(Note: I'm using Swift 3.0.1 on Xcode 8.2.1 with macOS Sierra 10.12.3)
All of the answers I've seen here missed that he could be looking for LF or CRLF. If everything goes well, s/he could just match on LF and check the returned string for an extra CR at the end. But the general query involves multiple search strings. In other words, the delimiter needs to be a Set<String>, where the set is neither empty nor contains the empty string, instead of a single string.
On my first try at this last year, I tried to do the "right thing" and search for a general set of strings. It was too hard; you need a full blown parser and state machines and such. I gave up on it and the project it was part of.
Now I'm doing the project again, and facing the same challenge again. Now I'm going to hard-code searching on CR and LF. I don't think anyone would need to search on two semi-independent and semi-dependent characters like this outside of CR/LF parsing.
I'm using the search methods provided by Data, so I'm not doing string encodings and stuff here. Just raw binary processing. Just assume I got an ASCII superset, like ISO Latin-1 or UTF-8, here. You can handle string encoding at the next-higher layer, and you punt on whether a CR/LF with secondary code-points attached still counts as a CR or LF.
The algorithm: just keep searching for the next CR and the next LF from your current byte offset.
If neither is found, then consider the next data string to be from the current offset to the end-of-data. Note that the terminator length is 0. Mark this as the end of your reading loop.
If a LF is found first, or only a LF is found, consider the next data string to be from the current offset to the LF. Note that the terminator length is 1. Move the offset to after the LF.
If only a CR is found, do like the LF case (just with a different byte value).
Otherwise, we got a CR followed by a LF.
If the two are adjacent, then handle like the LF case, except the terminator length will be 2.
If there is one byte between them, and said byte is also CR, then we got the "Windows developer wrote a binary \r\n while in text mode, giving a \r\r\n" problem. Also handle it like the LF case, except the terminator length will be 3.
Otherwise the CR and LF aren't connected, and handle like the just-CR case.
Here's some code for that:
struct DataInternetLineIterator: IteratorProtocol {
/// Descriptor of the location of a line
typealias LineLocation = (offset: Int, length: Int, terminatorLength: Int)
/// Carriage return.
static let cr: UInt8 = 13
/// Carriage return as data.
static let crData = Data(repeating: cr, count: 1)
/// Line feed.
static let lf: UInt8 = 10
/// Line feed as data.
static let lfData = Data(repeating: lf, count: 1)
/// The data to traverse.
let data: Data
/// The byte offset to search from for the next line.
private var lineStartOffset: Int = 0
/// Initialize with the data to read over.
init(data: Data) {
self.data = data
}
mutating func next() -> LineLocation? {
guard self.data.count - self.lineStartOffset > 0 else { return nil }
let nextCR = self.data.range(of: DataInternetLineIterator.crData, options: [], in: lineStartOffset..<self.data.count)?.lowerBound
let nextLF = self.data.range(of: DataInternetLineIterator.lfData, options: [], in: lineStartOffset..<self.data.count)?.lowerBound
var location: LineLocation = (self.lineStartOffset, -self.lineStartOffset, 0)
let lineEndOffset: Int
switch (nextCR, nextLF) {
case (nil, nil):
lineEndOffset = self.data.count
case (nil, let offsetLf):
lineEndOffset = offsetLf!
location.terminatorLength = 1
case (let offsetCr, nil):
lineEndOffset = offsetCr!
location.terminatorLength = 1
default:
lineEndOffset = min(nextLF!, nextCR!)
if nextLF! < nextCR! {
location.terminatorLength = 1
} else {
switch nextLF! - nextCR! {
case 2 where self.data[nextCR! + 1] == DataInternetLineIterator.cr:
location.terminatorLength += 1 // CR-CRLF
fallthrough
case 1:
location.terminatorLength += 1 // CRLF
fallthrough
default:
location.terminatorLength += 1 // CR-only
}
}
}
self.lineStartOffset = lineEndOffset + location.terminatorLength
location.length += self.lineStartOffset
return location
}
}
Of course, if you have a Data block of a length that's at least a significant fraction of a gigabyte, you'll take a hit whenever no more CR or LF exist from the current byte offset; always fruitlessly searching until the end during every iteration. Reading the data in chunks would help:
struct DataBlockIterator: IteratorProtocol {
/// The data to traverse.
let data: Data
/// The offset into the data to read the next block from.
private(set) var blockOffset = 0
/// The number of bytes remaining. Kept so the last block is the right size if it's short.
private(set) var bytesRemaining: Int
/// The size of each block (except possibly the last).
let blockSize: Int
/// Initialize with the data to read over and the chunk size.
init(data: Data, blockSize: Int) {
precondition(blockSize > 0)
self.data = data
self.bytesRemaining = data.count
self.blockSize = blockSize
}
mutating func next() -> Data? {
guard bytesRemaining > 0 else { return nil }
defer { blockOffset += blockSize ; bytesRemaining -= blockSize }
return data.subdata(in: blockOffset..<(blockOffset + min(bytesRemaining, blockSize)))
}
}
You have to mix these ideas together yourself, since I haven't done it yet. Consider:
Of course, you have to consider lines completely contained in a chunk.
But you have to handle when the ends of a line are in adjacent chunks.
Or when the endpoints have at least one chunk between them
The big complication is when the line ends with a multi-byte sequence, but said sequence straddles two chunks! (A line ending in just CR that's also the last byte in the chunk is an equivalent case, since you need to read the next chunk to see if your just-CR is actually a CRLF or CR-CRLF. There are similar shenanigans when the chunk ends with CR-CR.)
And you need to handle when there are no more terminators from your current offset, but the end-of-data is in a later chunk.
Good luck!

I wanted a version that did not continually modify the buffer or duplicate code, as both are inefficient, and would allow for any size buffer (including 1 byte) and any delimiter. It has one public method: readline(). Calling this method will return the String value of the next line or nil at EOF.
import Foundation
// LineStream(): path: String, [buffSize: Int], [delim: String] -> nil | String
// ============= --------------------------------------------------------------
// path: the path to a text file to be parsed
// buffSize: an optional buffer size, (1...); default is 4096
// delim: an optional delimiter String; default is "\n"
// ***************************************************************************
class LineStream {
let path: String
let handle: NSFileHandle!
let delim: NSData!
let encoding: NSStringEncoding
var buffer = NSData()
var buffSize: Int
var buffIndex = 0
var buffEndIndex = 0
init?(path: String,
buffSize: Int = 4096,
delim: String = "\n",
encoding: NSStringEncoding = NSUTF8StringEncoding)
{
self.handle = NSFileHandle(forReadingAtPath: path)
self.path = path
self.buffSize = buffSize < 1 ? 1 : buffSize
self.encoding = encoding
self.delim = delim.dataUsingEncoding(encoding)
if handle == nil || self.delim == nil {
print("ERROR initializing LineStream") /* TODO use STDERR */
return nil
}
}
// PRIVATE
// fillBuffer(): _ -> Int [0...buffSize]
// ============= -------- ..............
// Fill the buffer with new data; return with the buffer size, or zero
// upon reaching end-of-file
// *********************************************************************
private func fillBuffer() -> Int {
buffer = handle.readDataOfLength(buffSize)
buffIndex = 0
buffEndIndex = buffer.length
return buffEndIndex
}
// PRIVATE
// delimLocation(): _ -> Int? nil | [1...buffSize]
// ================ --------- ....................
// Search the remaining buffer for a delimiter; return with the location
// of a delimiter in the buffer, or nil if one is not found.
// ***********************************************************************
private func delimLocation() -> Int? {
let searchRange = NSMakeRange(buffIndex, buffEndIndex - buffIndex)
let rangeToDelim = buffer.rangeOfData(delim,
options: [], range: searchRange)
return rangeToDelim.location == NSNotFound
? nil
: rangeToDelim.location
}
// PRIVATE
// dataStrValue(): NSData -> String ("" | String)
// =============== ---------------- .............
// Attempt to convert data into a String value using the supplied encoding;
// return the String value or empty string if the conversion fails.
// ***********************************************************************
private func dataStrValue(data: NSData) -> String? {
if let strVal = NSString(data: data, encoding: encoding) as? String {
return strVal
} else { return "" }
}
// PUBLIC
// readLine(): _ -> String? nil | String
// =========== ____________ ............
// Read the next line of the file, i.e., up to the next delimiter or end-of-
// file, whichever occurs first; return the String value of the data found,
// or nil upon reaching end-of-file.
// *************************************************************************
func readLine() -> String? {
guard let line = NSMutableData(capacity: buffSize) else {
print("ERROR setting line")
exit(EXIT_FAILURE)
}
// Loop until a delimiter is found, or end-of-file is reached
var delimFound = false
while !delimFound {
// buffIndex will equal buffEndIndex in three situations, resulting
// in a (re)filling of the buffer:
// 1. Upon the initial call;
// 2. If a search for a delimiter has failed
// 3. If a delimiter is found at the end of the buffer
if buffIndex == buffEndIndex {
if fillBuffer() == 0 {
return nil
}
}
var lengthToDelim: Int
let startIndex = buffIndex
// Find a length of data to place into the line buffer to be
// returned; reset buffIndex
if let delim = delimLocation() {
// SOME VALUE when a delimiter is found; append that amount of
// data onto the line buffer,and then return the line buffer
delimFound = true
lengthToDelim = delim - buffIndex
buffIndex = delim + 1 // will trigger a refill if at the end
// of the buffer on the next call, but
// first the line will be returned
} else {
// NIL if no delimiter left in the buffer; append the rest of
// the buffer onto the line buffer, refill the buffer, and
// continue looking
lengthToDelim = buffEndIndex - buffIndex
buffIndex = buffEndIndex // will trigger a refill of buffer
// on the next loop
}
line.appendData(buffer.subdataWithRange(
NSMakeRange(startIndex, lengthToDelim)))
}
return dataStrValue(line)
}
}
It is called as follows:
guard let myStream = LineStream(path: "/path/to/file.txt")
else { exit(EXIT_FAILURE) }
while let s = myStream.readLine() {
print(s)
}

Develop Reference

ios ruby-on-rails asp.net-mvc docker delphi jenkins grails google-sheets machine-learning dart

Slow Swift String Performance - ios

Related

All possible combinations (subsets) in swift

Return multiple of string characters

Swift: Map Array of Objects Alphabetically by Name(String) into Separate Letter Collections within a new Array

method that removes all twice duplicate characters in String

How to read and slice a file chunk by chunk, then display as String? [duplicate]

Categories

Resources