I am trying to parse a CSV but i am getting some issues. Below is the code i used for parsing CSV:
let fileURL = Bundle.main.url(forResource: "test_application_data - Sheet 1", withExtension: "csv")
let content = try String(contentsOf: fileURL!, encoding: String.Encoding.utf8)
let parsedCSV: [[String]] = content.components(separatedBy: "\n").map{ $0.components(separatedBy: ",")}
And this is the data in the CSV i am parsing :
Item 9,Description 9,image url
"Item 10 Extra line 1 Extra line 2 Extra line 3",Description 10,image url
So by using above code i get correct response for first row i.e Item 9 but i am getting malformed response for Item 10
How can i correctly parse both rows?
The RFC for CSV: Common Format and MIME Type for Comma-Separated Values (CSV) Files(RFC-4180)
Not all CSV data or CSV processors conform to all descriptions of this RFC, but generally, fields enclosed within double-quotes can contain:
newlines
commas
escaped double-quotes ("" represents a single double-quote)
This code is a little bit simplified than RFC-4180, but handles all three cases above:
UPDATE This old code does not handle CRLF well. (Which is a valid newline in RFC-4180.) I added a new code at the bottom, please check it.
Thanks to Jay.
import Foundation
let csvText = """
Item 9,Description 9,image url
"Item 10
Extra line 1
Extra line 2
Extra line 3",Description 10,image url
"Item 11
Csv item can contain ""double quote"" and comma(,)", Description 11 ,image url
"""
let pattern = "[ \r\t]*(?:\"((?:[^\"]|\"\")*)\"|([^,\"\\n]*))[ \t]*([,\\n]|$)"
let regex = try! NSRegularExpression(pattern: pattern)
var result: [[String]] = []
var record: [String] = []
let offset: Int = 0
regex.enumerateMatches(in: csvText, options: .anchored, range: NSRange(0..<csvText.utf16.count)) {match, flags, stop in
guard let match = match else {fatalError()}
if match.range(at: 1).location != NSNotFound {
let field = csvText[Range(match.range(at: 1), in: csvText)!].replacingOccurrences(of: "\"\"", with: "\"")
record.append(field)
} else if match.range(at: 2).location != NSNotFound {
let field = csvText[Range(match.range(at: 2), in: csvText)!].trimmingCharacters(in: .whitespaces)
record.append(field)
}
let separator = csvText[Range(match.range(at: 3), in: csvText)!]
switch separator {
case "\n": //newline
result.append(record)
record = []
case "": //end of text
//Ignoring empty last line...
if record.count > 1 || (record.count == 1 && !record[0].isEmpty) {
result.append(record)
}
stop.pointee = true
default: //comma
break
}
}
print(result)
(Intended to test in a Playground.)
New code, CRLF ready.
import Foundation
let csvText = "Field0,Field1\r\n"
let pattern = "[ \t]*(?:\"((?:[^\"]|\"\")*)\"|([^,\"\r\\n]*))[ \t]*(,|\r\\n?|\\n|$)"
let regex = try! NSRegularExpression(pattern: pattern)
var result: [[String]] = []
var record: [String] = []
let offset: Int = 0
regex.enumerateMatches(in: csvText, options: .anchored, range: NSRange(0..<csvText.utf16.count)) {match, flags, stop in
guard let match = match else {fatalError()}
if let quotedRange = Range(match.range(at: 1), in: csvText) {
let field = csvText[quotedRange].replacingOccurrences(of: "\"\"", with: "\"")
record.append(field)
} else if let range = Range(match.range(at: 2), in: csvText) {
let field = csvText[range].trimmingCharacters(in: .whitespaces)
record.append(field)
}
let separator = csvText[Range(match.range(at: 3), in: csvText)!]
switch separator {
case "": //end of text
//Ignoring empty last line...
if record.count > 1 || (record.count == 1 && !record[0].isEmpty) {
result.append(record)
}
stop.pointee = true
case ",": //comma
break
default: //newline
result.append(record)
record = []
}
}
print(result) //->[["Field0", "Field1"]]
The problem is with this line of code:
content.components(separatedBy: "\n")
It separates your csv file into rows based on the newline character. There are newline characters in your "Item 10 Extra line 1 Extra line 2 Extra line 3" String so each extra line is getting treated as a different row, so in the end you get the wrong result.
I'd suggest escaping the newline characters in your multiline text column or getting rid of them altogether. You can also modyfy the input file so the newline delimeter isn't \n at the end of each row but something custom (a string that won't appear elsewhere in the csv file).
Related
I know how to remove first character from a word in swift like this:
var data = "CITY Singapore"
data.removeFirst()
print(data)//ITY Singapore
what i want is to remove the first word and space so the result is "Singapore".
How can i remove the first word and leading space in swift?
You can try
let data = "CITY Singapore"
let res = data.components(separatedBy: " ").dropFirst().joined(separator: " ")
print(res)
Or
let res = data[data.range(of: " ")!.upperBound...] // may crash for no " " inside the string
Or you can go with this too
let strData = "CITY Singapore"
if let data = strData.components(separatedBy: " ").dropFirst().first {
// do with data
}
else {
// fallback
}
This is a Regular Expression solution, the benefit is to modify the string in place.
The pattern searches from the beginning of the string to the first space character
var data = "CITY Singapore"
if let range = data.range(of: "^\\S+\\s", options: .regularExpression) {
data.removeSubrange(range)
}
You can use String's enumerateSubstrings in range method (Foundation) using byWords option and remove the first enclosing range. You need also to stop enumeration after removing the range at the first occurrence:
var string = "CITY Singapore"
string.enumerateSubstrings(in: string.startIndex..., options: .byWords) { (_, _, enclosingRange, stop) in
string.removeSubrange(enclosingRange)
stop = true
}
string // "Singapore"
I'm new to Swift and after going through the Apple documentation and other sources is not clear for me how can I extract a substring using more than one character as delimiter. For example: I have a string which looks like:
A.1 value1
B.2 value2
E value3
C value4
and need to assign the values 1 - 4 to different variables.
• Possible solution:
1. Separate all the elements (separator: white space)
2. Iterate 2 by 2 and use a key/value system, like a Dictionary.
3. Read each values from the keys afterward
Step 1:
let string = "A.1 value1 B.2 value2 E value3 C value4"
let components = string.components(separatedBy: CharacterSet.whitespaces)
Step 2:
var dictionary: [String: String] = [:]
stride(from: 0, to: components.count - 1, by: 2).forEach({
dictionary[components[$0]] = components[$0+1]
})
or
let dictionary = stride(from: 0, to: components.count - 1, by: 2).reduce(into: [String: String]()) { (result, currentInt) in
result[components[currentInt]] = components[currentInt+1]
}
dictionary is ["A.1": "value1", "C": "value4", "E": "value3", "B.2": "value2"]
Inspiration for the stride(from:to:) that I rarely use.
Step 3:
let name = dictionary["A.1"]
let surname = dictionary["C"]
• Potential issues:
If you have:
let string = "A.1 value One B.2 value2 E value3 C value4"
You want "value One", and since there is a space, you'll get some issue because if will give a false result (since there is the separator).
You'll get: ["A.1": "value", "One": "B.2", "value2": "E", "value3": "C"] for dictionary.
So you could use instead a regex: A.1(.*)B.2(.*)E(.*)C(.*) (for instance).
let string = "A.1 value One B.2 value2 E value3 C value4"
let regex = try! NSRegularExpression(pattern: "A.1(.*)B.2(.*)E(.*)C(.*)", options: [])
regex.enumerateMatches(in: string, options: [], range: NSRange(location: 0, length: string.utf16.count)) { (result, flags, stop) in
guard let result = result,
let aValueRange = Range(result.range(at: 1), in: string),
let bValueRange = Range(result.range(at: 2), in: string),
let cValueRange = Range(result.range(at: 4), in: string),
let eValueRange = Range(result.range(at: 3), in: string) else { return }
let aValue = string[aValueRange].trimmingCharacters(in: CharacterSet.whitespaces)
print("aValue: \(aValue)")
let bValue = string[bValueRange].trimmingCharacters(in: CharacterSet.whitespaces)
print("bValue: \(bValue)")
let cValue = string[cValueRange].trimmingCharacters(in: CharacterSet.whitespaces)
print("cValue: \(cValue)")
let eValue = string[eValueRange].trimmingCharacters(in: CharacterSet.whitespaces)
print("eValue: \(eValue)")
}
Output:
$>aValue: value One
$>bValue: value2
$>cValue: value4
$>eValue: value3
Note that the trim could be inside the regex, but I don't especially like having too complex regexes.
I like regular expressions for this sort of thing.
I'm going to take you very literally and assume that the substrings to be found are preceded by "A.1", "B.2", "E", and "C", and are all preceded and followed by a space except for the last substring which is followed by the end of the original string. Moreover I'm going to assume very simple-mindedly that the delimiters such as "E" cannot appear in our string in any other way. Then we can capture each substring with an appropriate pattern:
let s = "A.1 harpo B.2 chico E zeppo C groucho"
let p1 = "^A\\.1 (.*) B\\.2 "
let p2 = " B\\.2 (.*) E "
let p3 = " E (.*) C "
let p4 = " C (.*)$"
let patts = [p1,p2,p3,p4]
var result = [String]()
for patt in patts {
let regex = try! NSRegularExpression(pattern: patt, options: [])
if let match = regex.firstMatch(in: s, options: [],
range: NSRange(s.startIndex..<s.endIndex, in: s)) {
let r = match.range(at: 1)
result.append((s as NSString).substring(with: r))
}
}
// result is now ["harpo", "chico", "zeppo", "groucho"]
We now have the four desired substrings extracted into an array, and dealing with them from there is trivial.
Observe that we make no assumptions about spaces. The above works perfectly well even if the target substrings contain spaces, because we are appealing only to the delimiters. For example, if the original string is
let s = "A.1 the rain B.2 in spain E stays mainly C in the plain"
then result is the array
["the rain", "in spain", "stays mainly", "in the plain"]
I should point out, however, that another way to do this sort of thing is to walk the original string with a Scanner. You might prefer this because regular expressions are not really needed here, and if you don't know regular expressions you'll find this kind of walk much clearer. So here it is rewritten to use a scanner. Note that we end up with four Optional NSString objects, because Scanner is actually an Objective-C Cocoa Foundation thing, but it isn't difficult to turn those into String objects as needed:
let s = "A.1 the rain B.2 in spain E stays mainly C in the plain"
let scan = Scanner(string: s)
scan.scanString("A.1 ", into: nil)
var r1 : NSString? = nil
scan.scanUpTo(" B.2 ", into: &r1)
scan.scanString("B.2 ", into: nil)
var r2 : NSString? = nil
scan.scanUpTo(" E ", into: &r2)
scan.scanString("E ", into: nil)
var r3 : NSString? = nil
scan.scanUpTo(" C ", into: &r3)
scan.scanString("C ", into: nil)
var r4 : NSString? =
(scan.string as NSString).substring(from: scan.scanLocation) as NSString
r1 // the rain
r2 // in spain
r3 // stays mainly
r4 // in the plain
I have a text file with 8000+ lines of information. I want to parse it to JSON for using it in iOS application. Because parsing that kind of txt file takes about 35 seconds, which is not good for user experience.
Text file goes like this:
TURKIYE;ADANA;ADANA;36,9914194;35,3308285;03:00:00
TURKIYE;ADANA;ALADAĞ;37,545348;35,394608;03:00:00
TURKIYE;ADANA;CEYHAN;37,028765;35,8124309;03:00:00
TURKIYE;ADANA;FEKE;37,814467;35,910391;03:00:00
TURKIYE;ADANA;İMAMOĞLU;37,2984443;35,6095474;03:00:00
TURKIYE;ADANA;KARAISALI;37,2758825;35,1268781;03:00:00
TURKIYE;ADANA;KARATAŞ;36,6649776;35,2587964;03:00:00
I just want the "TURKIYE", "ADANA", "ALADAĞ", "Latitude" and "Longitude" parts and don't need the last "03:00:00" part.
Edit:
Forget the mention about something. Text files some lines doesn't includes third column. For example;
AVUSTURYA;HORBRANZ;47,5557073;9,7525947;01:00:00
AVUSTURYA;HORN;48,66607;15,65716;01:00:00
AVUSTURYA;IMST;47,24013;10,73954;01:00:00
AVUSTURYA;IMSTERAU;47,21018;10,70901;01:00:00
I want to parse them like "if third column doesn't exist print nil value there."
My txt parsing code is:
let textFilePath = Bundle.main.path(forResource: "LatLongData", ofType: "txt")
let fileManager = FileManager.default
if fileManager.fileExists(atPath: textFilePath!) {
do {
let fullText = try String(contentsOfFile: textFilePath!, encoding: String.Encoding.utf8)
let lines = fullText.components(separatedBy: "\n") as [String]
for line in lines {
let data = line.components(separatedBy: ";")
let locationData = LocationData()
if data.first == "TURKIYE" || data.first == "ABD" {
locationData.country = data[0]
locationData.city = data[1]
locationData.district = data[2]
locationData.latitude = data[3]
locationData.longitude = data[4]
locationData.compoundKey = "\(data[0])-\(data[1])-\(data[3])"
} else {
locationData.country = data[0]
locationData.city = data[1]
locationData.latitude = data[2]
locationData.longitude = data[3]
locationData.compoundKey = "\(data[0])-\(data[1])-\(data[3])"
}
locationData.writeToRealmDB()
}
} catch let error as NSError {
print(error.localizedDescription)
}
How can I convert this text to JSON for using in iOS app?
Thanks in advance.
import pandas as pd
with open('filename.txt', 'r') as f:
lines = f.readlines()
for line in lines:
if line.count(';') == 4:
index = line.index(';', 2)
line = line[:index] + ';' + line[index:]
with open('filename2.txt', 'a') as f2:
f2.write(line)
df = pd.read_csv('filename2.txt', header=None, sep=';')
df = df.iloc[:, :-1]
df.columns = ['col1', 'col2', 'col3', 'Latitude', 'Longitude']
df.to_json('filename.json')
http://pandas.pydata.org/pandas-docs/stable/io.html
I have Strings with the form string \ string example
"some sting with random length\233"
I want to deletes the last \ and get the value after it, so the result will be
"some sting with random length"
"233"
I tried this code but it's not working
let regex = try! NSRegularExpression(pattern: "\\\s*(\\S[^,]*)$")
if let match = regex.firstMatch(in: string, range: string.nsRange) {
let result = string.substring(with: match.rangeAt(1))
}
You did not correctly adapt the pattern from How to get substring after last occurrence of character in string: Swift IOS to your case. Both instances of the comma must be replaced by a backslash,
and that must be "double-escaped":
let regex = try! NSRegularExpression(pattern: "\\\\\\s*(\\S[^\\\\]*)$")
(once be interpreted as a literal backslash in the regex pattern, and
once more in the definition of a Swift string literal).
However, a simpler solution is to find the last occurrence of the
backslash and extract the suffix from that position:
let string = "some sting with random length\\233"
let separator = "\\" // A single(!) backslash
if let r = string.range(of: separator, options: .backwards) {
let prefix = string.substring(to: r.lowerBound)
let suffix = string.substring(from: r.upperBound)
print(prefix) // some sting with random length
print(suffix) // 233
}
Update for Swift 4:
if let r = string.range(of: separator, options: .backwards) {
let prefix = string[..<r.lowerBound]
let suffix = string[r.upperBound...]
print(prefix) // some sting with random length
print(suffix) // 233
}
prefix and suffix are a String.SubSequence, which can be used
in many places instead of a String. If necessary, create a real
string:
let prefix = String(string[..<r.lowerBound])
let suffix = String(string[r.upperBound...])
You could do this with regex, but I think this solution is better:
yourString.components(separatedBy: "\\").last!
It splits the string with \ as the separator and gets the last split.
I'm looking for a fast, optimized way to trim log files on iOS. I want to specify that my log files have a maximum number of lines (e.g., 10,000). Appending new lines to the end of a text file seems relatively simple. However, I haven't yet found a fast way to trim lines at the beginning of the file. Here's the (slow) code I came up with.
guard let fileURL = self.fileURL else {
return
}
guard let path = fileURL.path else {
return
}
guard let fileHandle = NSFileHandle(forUpdatingAtPath: path) else {
return
}
fileHandle.seekToEndOfFile()
fileHandle.writeData(message.dataUsingEncoding(NSUTF8StringEncoding)!)
fileHandle.writeData("\n".dataUsingEncoding(NSUTF8StringEncoding)!)
currentLineCount += 1
// TODO: This could probably use some major optimization
if currentLineCount >= maxLineCount {
if let fileString = try? NSString(contentsOfURL: fileURL, encoding: NSUTF8StringEncoding) {
var lines = fileString.componentsSeparatedByCharactersInSet(NSCharacterSet.newlineCharacterSet())
lines.removeFirst()
let newData = lines.joinWithSeparator("\n")
fileHandle.seekToFileOffset(0)
fileHandle.writeData(newData.dataUsingEncoding(NSUTF8StringEncoding)!)
}
}
fileHandle.closeFile()
There are two aspects of your question. First, your code removes a single
line from the log file. Therefore, once the limit is reached, every new
log message causes the entire file to be read, shortened, and be re-written.
It would be more effective to use a "high-water mark" and a "low-water mark". For example, if you want the last 10.000 lines to be preserved,
let the log file grow until it has 15.000 lines, and only then truncate
it to 10.000 lines. This reduces the number of "trim actions"
considerably.
The second part is about the truncating itself. Your code loads the
file into an NSString, which requires the conversion of UTF-8 data
to Unicode characters
(and fails if there is a single invalid byte in the log file).
Then the string is split into an array, one array element removed,
the array concatenated to a string again, and then written back
to the file, which converts the Unicode characters to UTF-8.
I haven't done performance tests, but I can imagine that it could be
faster to operate on binary data only, without the conversions to
NSString, Array and back. Here is a possible implementation
which removes a given number of lines from the start of a file:
func removeLinesFromFile(fileURL: NSURL, numLines: Int) {
do {
let data = try NSData(contentsOfURL: fileURL, options: .DataReadingMappedIfSafe)
let nl = "\n".dataUsingEncoding(NSUTF8StringEncoding)!
var lineNo = 0
var pos = 0
while lineNo < numLines {
// Find next newline character:
let range = data.rangeOfData(nl, options: [], range: NSMakeRange(pos, data.length - pos))
if range.location == NSNotFound {
return // File has less than `numLines` lines.
}
lineNo++
pos = range.location + range.length
}
// Now `pos` is the position where line number `numLines` begins.
let trimmedData = data.subdataWithRange(NSMakeRange(pos, data.length - pos))
trimmedData.writeToURL(fileURL, atomically: true)
} catch let error as NSError {
print(error.localizedDescription)
}
}
I have updated Martin R answer to Swift 3, and I also changed it so that we can pass the number of lines to keep instead of the number of lines to remove:
func removeLinesFromFile(fileURL: URL, linesToKeep numLines: Int) {
do {
let data = try Data(contentsOf: fileURL, options: .dataReadingMapped)
let nl = "\n".data(using: String.Encoding.utf8)!
var lineNo = 0
var pos = data.count-1
while lineNo <= numLines {
// Find next newline character:
guard let range = data.range(of: nl, options: [ .backwards ], in: 0..<pos) else {
return // File has less than `numLines` lines.
}
lineNo += 1
pos = range.lowerBound
}
let trimmedData = data.subdata(in: pos..<data.count)
try trimmedData.write(to: fileURL)
} catch let error as NSError {
print(error.localizedDescription)
}
}
Instead of writing the new line to the log, and processing the appended content afterwards, do both write and trimming in one step:
let fileString = (try? NSString(contentsOfURL: fileURL, encoding: NSUTF8StringEncoding)) as NSString? ?? ""
var lines = fileString.characters.split("\n").map{String($0)}
lines.append(message)
// this also more generic as it will remove any number of extra lines
lines.removeFirst(max(currentLineCount - maxLineCount), 0))
let newLogContents = lines.joinWithSeparator("\n")
(newLogContents as NSString).writeToURL(fileURL, atomically: true, encoding: NSUTF8StringEncoding)