Parsing CSV file in Swift - ios

I need to preload data into my tableView when the app launches. So i am using core data by parsing a .csv file. I am following this tutorial for this purpose.
Here is my parseCSV function
func parseCSV (contentsOfURL: NSURL, encoding: NSStringEncoding, error: NSErrorPointer) -> [(stationName:String, stationType:String, stationLineType: String, stationLatitude: String, stationLongitude: String)]? {
// Load the CSV file and parse it
let delimiter = ","
var stations:[(stationName:String, stationType:String, stationLineType: String, stationLatitude: String, stationLongitude: String)]?
let content = String(contentsOfURL: contentsOfURL, encoding: encoding, error: error)
stations = []
let lines:[String] = content.componentsSeparatedByCharactersInSet(NSCharacterSet.newlineCharacterSet()) as [String]
for line in lines {
var values:[String] = []
if line != "" {
// For a line with double quotes
// we use NSScanner to perform the parsing
if line.rangeOfString("\"") != nil {
var textToScan:String = line
var value:NSString?
var textScanner:NSScanner = NSScanner(string: textToScan)
while textScanner.string != "" {
if (textScanner.string as NSString).substringToIndex(1) == "\"" {
textScanner.scanLocation += 1
textScanner.scanUpToString("\"", intoString: &value)
textScanner.scanLocation += 1
} else {
textScanner.scanUpToString(delimiter, intoString: &value)
}
// Store the value into the values array
values.append(value as! String)
// Retrieve the unscanned remainder of the string
if textScanner.scanLocation < textScanner.string.characters.count {
textToScan = (textScanner.string as NSString).substringFromIndex(textScanner.scanLocation + 1)
} else {
textToScan = ""
}
textScanner = NSScanner(string: textToScan)
}
// For a line without double quotes, we can simply separate the string
// by using the delimiter (e.g. comma)
} else {
values = line.componentsSeparatedByString(delimiter)
}
// Put the values into the tuple and add it to the items array
let station = (stationName: values[0], stationType: values[1], stationLineType: values[2], stationLatitude: values[3], stationLongitude: values[4])
stations?.append(station)
}
}
return stations
}
this is my sample .csv file
Rithala,Underground,Yellow Line,28.7209,77.1070
But i am getting an error on this line
let station = (stationName: values[0], stationType: values[1], stationLineType: values[2], stationLatitude: values[3], stationLongitude: values[4])
stations?.append(station)
Fatal error : Array index out of range
What am i doing wrong ? Please help me.

Here's a foolproof way of parsing a CSV file into your swift code (I'm using Swift 5 here). I'll talk through each step for any beginners in the room.
Let's assume your CSV file looks like this:
Firstname,Last name,Age,Registered
Duncan,Campbell,40,True
Tobi,Dorner,36,False
Saskia,Boogarts,29,True
1). We need a struct (or Object) to hold each row of data. Let's use this:
struct Person {
var firstName: String
var lastName: String
var age: Int
var isRegistered: Bool
}
2) We also need a variable which holds an array of each Person.
var people = [Person]()
3) Now - add the CSV file to your XCode project (you can drag and drop this into your project). Make sure it has a sensible name (e.g. data.csv).
4) You now need to "locate" the data that you want to use. Create a filepath which tells the code where to find your csv file:
guard let filepath = Bundle.main.path(forResource: "data", ofType: "csv") else {
return
}
5) Now we want to read the contents of this file. First let's convert the whole file into one long String.
var data = ""
do {
data = try String(contentsOfFile: filepath)
} catch {
print(error)
return
}
6) We now have a string with all out data in one line. We want to split this up into an array of strings, one string for each row in the data. (BTW, the \n means "new line")
let rows = data.components(separatedBy: "\n")
7) We now have an array with 4 rows - one for the header titles, and 3 rows for each person in the data. We're not interested in the first header row, so we can remove that one. (Ignore this step if you don't have a header row in your data)
rows.removeFirst()
8) Now loop around each of rows. Each row is currently a string (e.g. Duncan,Campbell,40,True) but we want to split it into an array of each of its 4 columns.
for row in rows {
let columns = row.components(separatedBy: ",")
9) We now have a array columns which has 4 strings in it. Let's convert each column to the correct data type.
let firstName = columns[0]
let lastName = columns[1]
let age = Int(columns[2]) ?? 0
let isRegistered = columns[3] == "True"
10) And now we can create the Person object, and append it to our array.
let person = Person(firstName: firstName, lastName: lastName, age: age, isRegistered: isRegistered)
people.append(person)
Here's the full code for anyone who is interested:
struct Person {
var firstName: String
var lastName: String
var age: Int
var isRegistered: Bool
}
var people = [Person]()
func convertCSVIntoArray() {
//locate the file you want to use
guard let filepath = Bundle.main.path(forResource: "data", ofType: "csv") else {
return
}
//convert that file into one long string
var data = ""
do {
data = try String(contentsOfFile: filepath)
} catch {
print(error)
return
}
//now split that string into an array of "rows" of data. Each row is a string.
var rows = data.components(separatedBy: "\n")
//if you have a header row, remove it here
rows.removeFirst()
//now loop around each row, and split it into each of its columns
for row in rows {
let columns = row.components(separatedBy: ",")
//check that we have enough columns
if columns.count == 4 {
let firstName = columns[0]
let lastName = columns[1]
let age = Int(columns[2]) ?? 0
let isRegistered = columns[3] == "True"
let person = Person(firstName: firstName, lastName: lastName, age: age, isRegistered: isRegistered)
people.append(person)
}
}
}

You are attempting to parse the file path rather than the contents of the file.
If you replace
let content = String(contentsOfURL: contentsOfURL, encoding: encoding, error: error)
with:
if let data = NSData(contentsOfURL: contentsOfURL) {
if let content = NSString(data: data, encoding: NSUTF8StringEncoding) {
//existing code
}
}
then the code will work for your example file.

Based on the error and where the error is taking place I would guess that your values array does not have 5 elements like you think it does. I would put a breakpoint on the line that is giving you an error and inspect your values variable and see how many are in it. Since your .csv file is obviously 5 elements long, then I would guess something is going wrong in your parsing.

Related

How to parse data effectively

I've a data coming from api which is in the form
"A,B,C,D,E\n
17945,10091,10088,3907,10132\n
2,12,13,48,11"
in the above form.
The meaning of data is A is mapped to 17945 and 2 (A->17945->2) similarly for others. I want to save this data on my model array
struct DataModel {
var name : String
var id1 : String
var id2 : String
}
The question is how do I do this effectively.
What Im thinking is splitting the the data from api , creating arrays respectively and initialising the data model accordingly, but is there a another way to do this using dictionaries , it is not neccassary to use model here , I just need all the respective data in one go.
Well if you want to create data from this raw data, you can do as like below
let names = ["A","B","C","D","E"]
let element2 = [17945,10091,10088,3907,10132]
let element3 = [2,12,13,48,11]
struct DataModel {
var name : String
var id1 : String
var id2 : String
}
var allElements: [DataModel] = []
for i in zip(names, zip(element2, element3)) {
let model = DataModel(name: i.0, id1: i.1.0.description, id2: i.1.1.description)
allElements.append(model)
}
I don't know if it is what you need, but one of the variants how to
struct DataModel {
var name: String
var id1: String
var id2: String
public init(n: String, i1: String, i2: String) {
name = n
id1 = i1
id2 = i2
}
}
let numberOfArrays = string.components(separatedBy: "\n").count
let aString = string.replacingOccurrences(of: "\n", with: ",").components(separatedBy: ",")
let step = aString.count / numberOfArrays
for i in 0..<step {
allElements.append(DataModel(n: aString[i], i1: aString[i + step], i2: aString[i + step * 2]))
}
UPDATE:
here is else one more variant with dictionary result
private func parse() {
var dictionary: [Int: String] = [:]
_ = inputString
.components(separatedBy: "\n")
.compactMap { $0.components(separatedBy: ",") }
.compactMap { arr in
arr
.enumerated()
.compactMap { index, element in
var str = dictionary[index] ?? ""
str += element
dictionary[index] = str
}
}
}
where dictionary.values() will be desired elements

Swift Realm. How to put 3 millions objects in database

I make offline dictionary app. And now I convert dictionary file to realm database.
Convert function:
if let path = Bundle.main.path(forResource: "dictionary", ofType: "dsl") {
do {
let data = try String(contentsOfFile: path, encoding: .utf8)
let myStrings = data.components(separatedBy: .newlines)
for (index, row) in myStrings.enumerated() {
if(row.containsChineseCharacters)
{
let firstWord = CNDict()
firstWord.word = row
firstWord.pinyin = myStrings[index+1]
firstWord.translate = myStrings[index+2]
try! realm.write {
realm.add(firstWord)
}
}
}
print("The task end.")
} catch {
print(error)
}
}
When I try to convert the dictionary immediately, the database file becomes a lot of gigabytes and crashes around the middle
Splitting the dictionary in parts is not an option, because there are about 3 million lines. It will take very much... (realm plugin crashes)
I need help on how to add as many values to the database with our crashes.
The issue is that your file is large and at this point, you load it into the memory:
let data = try String(contentsOfFile: path, encoding: .utf8)
And then you double the memory footprint here:
let myStrings = data.components(separatedBy: .newlines)
So my guess is that you received out of memory signal from the system.
Instead of loading into the memory all the data and than double it, you can use lazy collection. It will read and parse the line only when it needed for writing. It will not load all lines at once. One downside of lazy collections in Swift is that they could not provide all the functions that we get used to.
Here the full code for playground that solves your issue. You could and maybe should optimize some parts of it, but anyway it just showing how it could be done with lazy collection. (I changed some names, but it's still what you want).
import Foundation
extension String {
var containsOneSymbol: Bool {
return contains("1")
}
}
extension Character {
var isNewLine: Bool {
let string = String(self)
let set = CharacterSet(charactersIn: string)
return !set.isDisjoint(with: CharacterSet.newlines)
}
}
/// Add Object subclass for Realm
#objcMembers
final class CNDict {
dynamic var word = ""
dynamic var pinyin = ""
dynamic var translate = ""
}
final class ModelWriterWrapper {
private let bufferCapacity = 3
private var buffer: [String] = []
init() {
buffer.reserveCapacity(bufferCapacity)
}
func proccess(line: String) {
guard buffer.count == bufferCapacity else {
assert(buffer.count < bufferCapacity, "Buffer failer count: \(buffer.count)!")
buffer.append(line)
return
}
if let firstLine = buffer.first, firstLine.containsOneSymbol {
let dict = CNDict()
dict.word = firstLine
dict.pinyin = buffer[1]
dict.translate = buffer[2]
print("Ready for writing into DB \n word: \(dict.word) pinyin: \(dict.pinyin) translate: \(dict.translate)")
}
buffer.removeFirst()
buffer.append(line)
}
}
let data = stride(from: 0, to: 100_000, by: 1).map { "Line number \($0)\n"}.joined()
var line: String = ""
let myLines = data.lazy.map { char -> String in
line.append(char)
guard !char.isNewLine else {
defer { line = "" }
return line
}
return ""
}.filter { !$0.isEmpty }
let databaseWritter = ModelWriterWrapper()
myLines.forEach {
databaseWritter.proccess(line: $0)
}
If have any questions regarding the code, please ask.

Get specific values from a string and create array

From some URL I create an array of strings, and I would like to grab some data from those strings and turn them into another array of variables.
My array of strings looks like this:
#EXTINF:-1 tvg-logo="https://www.thetvdb.com/banners/posters/248741-9.jpg" group-title="Broke Girls", trailer
#EXTINF:-1 tvg-logo="https://www.thetvdb.com/banners/posters/210841-10.jpg" group-title="Alphas", Alphas trailer
#EXTINF:-1 tvg-logo="https://www.thetvdb.com/banners/posters/309053-2.jpg" group-title="American Gothic", trailer
Every line represents a new string item from my array.
I am trying to create a function to do it, but until now, I only have this:
func grabValuesFromUrl(savedUrl: String) {
var trailersArray = []()
if let url = URL(string: savedUrl) {
do {
let contents = try String(contentsOf: url)
contents.enumerateLines { (line, stop) in
// here i need to grab the values from every string inside tvg-logo="", group-title="", and the last one after "," that's the title, and put them into trailersArray[], afterwards i will make some model class to get the data like trailersArray.logo and trailersArray.group and trailersArray.title
}
} else {
print("no url added")
}
}
Thanks in advance
I'd use regex for anything related to extracting data from a string with known format. For this, lets first define helper function:
func matches(for regex: String, inText text: String) -> [String] {
guard let regex = try? NSRegularExpression(pattern: regex, options: [.caseInsensitive]) else { return [] }
let nsString = text as NSString
let results = regex.matches(in: text, options: [], range: NSMakeRange(0, nsString.length))
return results.flatMap { result in
(0..<result.numberOfRanges).map {
result.range(at: $0).location != NSNotFound ? nsString.substring(with: result.range(at: $0)) : ""
}
}
}
And then define the regular expression that will extract required data:
let regex = "^.*tvg-logo=\"(.+)\".*group-title=\"(.+)\".*, (.+)$"
Beware that this regex is sensitive to data format so you'll have to adapt it to new one in case of changes.
Finally, in your line enumeration closure you can extract the data:
let parts = matches(for: regex, inText: line).dropFirst()
parts is now an array with three corresponding items (we drop the first one because it is the line itself) if the line matches the regex, so we can, for example, append a tuple with values to the array:
if parts.count == 3 {
trailersArray.append((logo: parts[0], group: parts[1], title: parts[2]))
}

How to populate data from NSMutableArray into struct using Swift3

I'v created a struct and I want to populate it with my data.
My struct:
struct CrimeNameSection {
var firstChar: Character
var name: [String]
var detail: [String]
var time: [String]
init(firstLetter: Character, object1: [String], object2: [String], object3: [String]) {
firstChar = firstLetter // First letter of 'name'
name = object1
detail = object2
time = object3
}
The first value of my struct ('firstChar') should hold the first letter in 'name' to create an alphabetic sections in tableView, the rest ('name','detail','time') should hold the data from my database (three columns: name, detail, time).
My code:
var marrCrimesData : NSMutableArray! // Hold the database
func getSectionsFromData() -> [CrimeNameSection] {
guard marrCrimesData != nil else {
return []
}
var sectionDictionary = [CrimeNameSection]()
for crime in marrCrimesData {
let crime = crime as! CrimesInfo
let firstChar = CrimeNameSection(firstLetter: crime.name[crime.name.startIndex], object1: [crime.name], object2: [crime.detail], object3: [crime.time])
if var names = firstChar {
names.append(crime.name)
sectionDictionary[firstChar] = names
} else {
sectionDictionary[firstChar] = [crime.name]
}
}
let sections = sectionDictionary.map { (key, value) in
return CrimeNameSection(firstLetter: key, name: value)
}
let sortedSections = sections.sorted { $0.firstLetter < $1.firstLetter }
return sortedSections
}
I get errors all over the place, I need help with storing the data inside my struct and sort it alphabetically.
Thank you all
Consider
struct Crime {
let name: String
let detail: String
let time: String
}
let crimes = [
Crime(name: "Foo", detail: "detail 1", time: "9am"),
Crime(name: "Bar", detail: "detail 2", time: "10am"),
Crime(name: "Baz", detail: "detail 3", time: "11am"),
Crime(name: "Qux", detail: "detail 4", time: "12am")
]
One approach is to just build an dictionary indexed by the first character and then sort it:
var crimeIndex = [Character: [Crime]]()
for crime in crimes {
if let firstCharacter = crime.name.characters.first {
if crimeIndex[firstCharacter] == nil {
crimeIndex[firstCharacter] = [crime]
} else {
crimeIndex[firstCharacter]?.append(crime)
}
}
}
let sortedIndex = crimeIndex.sorted { $0.0 < $1.0 }
The advantage of the above is that we can use the dictionary to efficiently find the section. If you really want to use your custom "name section" structure, I'd first make it to use an array of Crime objects (having disjointed arrays of the properties of a Crime can be fragile, e.g. if you later decide to add sorting of the crimes). So it might look like:
struct CrimeNameSection {
let firstCharacter: Character
var crimes: [Crime]
}
And because we've lost some of the Dictionary efficiency for finding the index and have manually iterate through looking for the section, and I'll go ahead and do an insertion sort at the time, saving me from having to do a separate sort later:
var crimeSections = [CrimeNameSection]()
for crime in crimes {
if let firstCharacter = crime.name.characters.first {
var hasBeenAdded = false
for (index, crimeIndex) in crimeSections.enumerated() {
if firstCharacter == crimeIndex.firstCharacter { // if we found section, add to it
crimeSections[index].crimes.append(crime)
hasBeenAdded = true
break
}
if firstCharacter < crimeIndex.firstCharacter { // if we've passed where the section should have been, insert new section
crimeSections.insert(CrimeNameSection(firstCharacter: firstCharacter, crimes: [crime]), at: index)
hasBeenAdded = true
break
}
}
// if we've gotten to the end and still haven't found section, add new section to end
if !hasBeenAdded {
crimeSections.append(CrimeNameSection(firstCharacter: firstCharacter, crimes: [crime]))
}
}
}
First of all you could not instantiate an Array and map over it like a dictionary
var sectionDictionary = [CrimeNameSection]() // Here you are init an Array
For a dictionary you have also to specify the key, for instance if the key is a string:
var sectionDictionary = [String: CrimeNameSection]() // Dictionary init
But be aware that the key have to be unique so that the dict would work properly.
Another problem here is the constructor in your .map function because you have not created a constructor for your CrimeNameSection that only takes two parameters:
init(firstLetter: Character, object1: [String], object2: [String], object3: [String]) {
firstChar = firstLetter // First letter of 'name'
name = object1
detail = object2
time = object3
}
// Another constructor with 2 arguments
init(firstLetter: Character, object1: [String]) {
firstChar = firstLetter // First letter of 'name'
name = object1
detail = []()
time = []()
}
If you don't want to use another constructor then you have to provide default values to object2 and object3 in your initial constructor.

SwiftCsv Parser Support for DoubleQuotes

Im using Swiftcsv library to parse CSV file.
How to ignore Comma delimiter for strings within DoubleQuotes e.g "Abcd, went to Apple" ?
here the parser takes Abcd as one Value and went to Apple as another value.
Code :
func parseRows(fromLines lines: [String]) -> [Dictionary<String, String>] {
var rows: [Dictionary<String, String>] = []
for (lineNumber, line) in enumerate(lines) {
if lineNumber == 0 {
continue
}
var row = Dictionary<String, String>()
let values = line.componentsSeparatedByCharactersInSet(self.delimiter)
for (index, header) in enumerate(self.headers) {
let value = values[index]
row[header] = value
}
rows.append(row)
}
return rows
}
How can i change line.componentsSeparatedByCharactersInSet(self.delimiter) to ignore commas within Doublequotes?
Use this instead https://github.com/Daniel1of1/CSwiftV
Had the same issue. It took me an hour or so to figure out the problem is that SwiftCSV doesn't, erm, work.
Text in CSV files is inside quotes so commas and newlines in a CSV don't screw up the parser. I looked at the SwiftCSV source and there is no support for that - meaning that any commas or newlines screw up the parsing.
You could patch up SwiftCSV, or just go with CSwiftV I linked above.
I don't know if you are still looking for a solution, but I just came up with a quick way to do this as it is a problem that just came up for me.
The code isn't full proof because I am only using it for a side project so if you want to handle more cases you probably will need to make some changes.
func parseRows(fromLines lines: [String]) -> [Dictionary<String, String>] {
var rows: [Dictionary<String, String>] = []
for (lineNumber, line) in enumerate(lines) {
if lineNumber == 0 {
continue
}
var row = Dictionary<String, String>()
// escape commas in the string when it is surrounded by quotes
let convertedLine = NSString(string: line) // have to convert string to NSString because string does not have all NSString API
var escapedLine = line
var searchRange = NSMakeRange(1,convertedLine.length)
var foundRange:NSRange
if NSString(string: line).containsString("\"")
{
while (searchRange.location < convertedLine.length) {
searchRange.length = convertedLine.length-searchRange.location
foundRange = convertedLine.rangeOfString("\"", options: nil, range: searchRange)
if (foundRange.location != NSNotFound) {
// found a quotation mark
searchRange.location = foundRange.location+foundRange.length
let movieTitle = convertedLine.substringToIndex(foundRange.location)
escapedLine = convertedLine.stringByReplacingOccurrencesOfString(",", withString: "&c", options: nil, range: NSMakeRange(0,foundRange.location))
} else {
// no more substring to find
break
}
}
}
var values = escapedLine.componentsSeparatedByCharactersInSet(self.delimiter)
for (index, header) in enumerate(self.headers) {
var value = values[index]
//reinsert commas if they were escaped and get rid of quotation marks
value = value.stringByReplacingOccurrencesOfString("\"", withString: "")
value = value.stringByReplacingOccurrencesOfString("&c", withString: ",")
row[header] = value
}
rows.append(row)
}
return rows
}

Resources